zz
This commit is contained in:
parent
370cd1536f
commit
0278bed4ff
@ -371,25 +371,6 @@ class ParsedDocument:
|
||||
return ">>> ".join(r)
|
||||
|
||||
|
||||
def get_domains(arg):
|
||||
"""
|
||||
Get domains from argument or stdin
|
||||
if arg is -, get from stdin, else split arg
|
||||
@param arg dash or domains separated by comma
|
||||
@return domains
|
||||
"""
|
||||
domains = []
|
||||
if arg == "-":
|
||||
for l in sys.stdin:
|
||||
domain = l.rstrip()
|
||||
assert(domain is not None)
|
||||
if len(domain) == 0:
|
||||
continue
|
||||
domains.append(domain)
|
||||
else:
|
||||
domains = arg.split(",")
|
||||
return domains
|
||||
|
||||
def parse_and_index(work_link,parser,responses,db):
|
||||
"""
|
||||
Take all responses from work link, parse and store in db
|
||||
|
@ -52,7 +52,7 @@ def cli(ctx,cassandra_keyspace,cassandra_host,cassandra_port,beanstalkd_tube,bea
|
||||
ctx.obj["queue"] = queue
|
||||
|
||||
|
||||
@cli.command(help="All domains")
|
||||
@cli.command(help="Get visited domains from db")
|
||||
@click.pass_context
|
||||
@click.argument("count",type=int,default=20)
|
||||
def all(ctx,count):
|
||||
@ -63,28 +63,7 @@ def all(ctx,count):
|
||||
q = create_queue_from_context(ctx)
|
||||
process_domains(res,ctx.obj["visit"],ctx.obj["parser"],db,q)
|
||||
|
||||
@cli.command(help="Work queue")
|
||||
@click.pass_context
|
||||
def work(ctx):
|
||||
db = create_database_from_context(ctx)
|
||||
q = create_queue_from_context(ctx)
|
||||
work_domains(ctx.obj["parser"],db,q)
|
||||
|
||||
|
||||
@cli.command(help="find best domains")
|
||||
@click.pass_context
|
||||
@click.argument("count",type=int,default=20)
|
||||
#@click.option("visit",is_flag=True)
|
||||
def best(ctx, count):
|
||||
db = create_database_from_context(ctx)
|
||||
p = ctx.obj["parser"]
|
||||
domains = db.get_best_domains(count,p)
|
||||
q = None
|
||||
if ctx.obj["queue"]:
|
||||
q = create_queue_from_context(ctx)
|
||||
process_domains(domains,ctx.obj["visit"],p ,db,q)
|
||||
|
||||
@cli.command(help="select random domains")
|
||||
@cli.command(help="Get random domains")
|
||||
@click.pass_context
|
||||
@click.argument("count",type=int,default=20)
|
||||
#@click.option("visit",is_flag=True)
|
||||
@ -97,7 +76,29 @@ def blind(ctx, count):
|
||||
q = create_queue_from_context(ctx)
|
||||
process_domains(domains,ctx.obj["visit"],p ,db,q)
|
||||
|
||||
@cli.command(help="Find unvisited domains, Visit a site, get links and crawl")
|
||||
@cli.command(help="Visit domains from queue")
|
||||
@click.pass_context
|
||||
def work(ctx):
|
||||
db = create_database_from_context(ctx)
|
||||
q = create_queue_from_context(ctx)
|
||||
work_domains(ctx.obj["parser"],db,q)
|
||||
|
||||
|
||||
@cli.command(help="Get best domains from db")
|
||||
@click.pass_context
|
||||
@click.argument("count",type=int,default=20)
|
||||
#@click.option("visit",is_flag=True)
|
||||
def best(ctx, count):
|
||||
db = create_database_from_context(ctx)
|
||||
p = ctx.obj["parser"]
|
||||
domains = db.get_best_domains(count,p)
|
||||
q = None
|
||||
if ctx.obj["queue"]:
|
||||
q = create_queue_from_context(ctx)
|
||||
process_domains(domains,ctx.obj["visit"],p ,db,q)
|
||||
|
||||
|
||||
@cli.command(help="Get unvisited domains")
|
||||
@click.pass_context
|
||||
@click.argument("count",type=int,default=20)
|
||||
def unvisited(ctx, count):
|
||||
@ -124,7 +125,7 @@ def file(ctx, name):
|
||||
q = create_queue_from_context(ctx)
|
||||
process_domains(domains,ctx.obj["visit"],p,db,q)
|
||||
|
||||
@cli.command(help="Visit url and get links. Start here")
|
||||
@cli.command(help="Visit one url and get links. Start here")
|
||||
@click.pass_context
|
||||
@click.argument("link")
|
||||
def start(ctx, link):
|
||||
|
Loading…
Reference in New Issue
Block a user