import cassandra import cassandra.cluster import cassandra.query import json import datetime import sys cassandra_host = sys.argv[1] cassandra_port = sys.argv[2] keyspace = "websucker" ep = cassandra.cluster.ExecutionProfile(request_timeout=240.0,row_factory=cassandra.query.dict_factory) profiles = {cassandra.cluster.EXEC_PROFILE_DEFAULT:ep} cluster = cassandra.cluster.Cluster([cassandra_host],port=cassandra_port,execution_profiles=profiles) select_documents = "select json * from content" with cluster.connect(keyspace) as session: #session.row_factory = cassandra.query.dict_factory select_html = session.prepare("select json * from html where day=? and domain_name=? LIMIT 1") rows = session.execute(select_documents) for row in rows: doc = json.loads(row["[json]"]) dt = doc["update_time"] d = dt.split()[0] hrows = session.execute(select_html,(d,doc["domain_name"])) html = {} for h in hrows: html = json.loads(h["[json]"]) break doc["html_data"] = html print(json.dumps(doc))