diff --git a/websucker/cli.py b/websucker/cli.py index db149c4..0be29ec 100644 --- a/websucker/cli.py +++ b/websucker/cli.py @@ -12,7 +12,7 @@ import os def create_database_from_context(ctx): - return Data(ctx.obj["cassandra_keyspace"],ctx.obj["cassandra_host"],ctx.obj["cassandra_port"]) + return Data(ctx.obj["cassandra_keyspace"],ctx.obj["cassandra_host"],ctx.obj["cassandra_port"],ctx.obj["cassandra_username"],ctx.obj["cassandra_password"]) def create_queue_from_context(ctx): return greenstalk.Client((ctx.obj["beanstalkd_host"],ctx.obj["beanstalkd_port"]),use=ctx.obj["beanstalkd_tube"],watch=ctx.obj["beanstalkd_tube"],encoding="utf8") @@ -23,13 +23,15 @@ def create_queue_from_context(ctx): @click.option("--cassandra-keyspace",metavar="CASSANDRA_KEYSPACE",help="cassandra keyspace (if defined, value read from CASSANDRA_KEYSPACE env variable)",envvar="CASSANDRA_KEYSPACE",default="websucker",show_default=True) @click.option("--cassandra-host",metavar="CASSANDRA_HOST",help="cassandra host (if defined, value read from CASSANDRA_HOST env variable)",envvar="CASSANDRA_HOST",default="127.0.0.1",show_default=True) @click.option("--cassandra-port",metavar="CASSANDRA_PORT",help="cassandra port (if defined, value read from CASSANDRA_PORT env variable)",envvar="CASSANDRA_PORT",default=9042,show_default=True) +@click.option("--cassandra-username",metavar="CASSANDRA_USERNAME",help="cassandra username (if defined, value read from CASSANDRA_USERNAME env variable)",envvar="CASSANDRA_USERNAME") +@click.option("--cassandra-password",metavar="CASSANDRA_PASSWORD",help="cassandra password (if defined, value read from CASSANDRA_PASSWORD env variable)",envvar="CASSANDRA_PASSWORD") @click.option("--beanstalkd-tube",metavar="BEANSTALKD_TUBE",help="beanstalkd keyspace (if defined, value read from BEANSTALKD_TUBE env variable)",envvar="BEANSTALKD_TUBE",default="websucker",show_default=True) @click.option("--beanstalkd-host",metavar="BEANSTALKD_HOST",help="beanstalkd host (if defined, value read from beanstalkd_HOST env variable)",envvar="BEANSTALKD_HOST",default="127.0.0.1",show_default=True) @click.option("--beanstalkd-port",metavar="BEANSTALKD_PORT",help="beanstalkd port (if defined, value read from BEANSTALKD_PORT env variable)",envvar="BEANSTALKD_PORT",default=11300,show_default=True) @click.option("--parser",metavar="file_name",help="zzz") @click.option("--visit",is_flag=True) @click.option("--queue",is_flag=True) -def cli(ctx,cassandra_keyspace,cassandra_host,cassandra_port,beanstalkd_tube,beanstalkd_host,beanstalkd_port,parser,visit,queue): +def cli(ctx,cassandra_keyspace,cassandra_host,cassandra_port,cassandra_username,cassandra_password,beanstalkd_tube,beanstalkd_host,beanstalkd_port,parser,visit,queue): ctx.ensure_object(dict) p = BaseParser() if parser is not None: @@ -44,6 +46,8 @@ def cli(ctx,cassandra_keyspace,cassandra_host,cassandra_port,beanstalkd_tube,bea ctx.obj["parser"] = p ctx.obj["cassandra_host"] = cassandra_host ctx.obj["cassandra_port"] = cassandra_port + ctx.obj["cassandra_username"] = cassandra_username + ctx.obj["cassandra_password"] = cassandra_password ctx.obj["cassandra_keyspace"] = cassandra_keyspace ctx.obj["beanstalkd_host"] = beanstalkd_host ctx.obj["beanstalkd_port"] = beanstalkd_port diff --git a/websucker/db.py b/websucker/db.py index 03f51c2..7e8d84b 100644 --- a/websucker/db.py +++ b/websucker/db.py @@ -1,5 +1,6 @@ import cassandra import cassandra.cluster +from cassandra.auth import PlainTextAuthProvider import random import os import pkg_resources @@ -20,12 +21,15 @@ class Data: """ Database of text documents """ - def __init__(self,keyspace="websucker",cassandra_host="127.0.0.1",cassandra_port=9042): + def __init__(self,keyspace="websucker",cassandra_host="127.0.0.1",cassandra_port=9042,username=None,password=None): print("Database {}@{}:{}".format(keyspace,cassandra_host, cassandra_port)) + auth_provider = None + if username is not None and password is not None: + auth_provider = PlainTextAuthProvider(username=username, password=password) # execution profile ep = cassandra.cluster.ExecutionProfile(request_timeout=240.0) profiles = {cassandra.cluster.EXEC_PROFILE_DEFAULT:ep} - self.cluster = cassandra.cluster.Cluster([cassandra_host],port=cassandra_port,execution_profiles=profiles) + self.cluster = cassandra.cluster.Cluster([cassandra_host],port=cassandra_port,execution_profiles=profiles,auth_provider=auth_provider) self.session = self.cluster.connect(keyspace) self.check_document_select_query = self.session.prepare("SELECT count(url_hash) FROM paragraph_checksums WHERE checksum=?" )