zz
This commit is contained in:
parent
370cd1536f
commit
0278bed4ff
@ -371,25 +371,6 @@ class ParsedDocument:
|
|||||||
return ">>> ".join(r)
|
return ">>> ".join(r)
|
||||||
|
|
||||||
|
|
||||||
def get_domains(arg):
|
|
||||||
"""
|
|
||||||
Get domains from argument or stdin
|
|
||||||
if arg is -, get from stdin, else split arg
|
|
||||||
@param arg dash or domains separated by comma
|
|
||||||
@return domains
|
|
||||||
"""
|
|
||||||
domains = []
|
|
||||||
if arg == "-":
|
|
||||||
for l in sys.stdin:
|
|
||||||
domain = l.rstrip()
|
|
||||||
assert(domain is not None)
|
|
||||||
if len(domain) == 0:
|
|
||||||
continue
|
|
||||||
domains.append(domain)
|
|
||||||
else:
|
|
||||||
domains = arg.split(",")
|
|
||||||
return domains
|
|
||||||
|
|
||||||
def parse_and_index(work_link,parser,responses,db):
|
def parse_and_index(work_link,parser,responses,db):
|
||||||
"""
|
"""
|
||||||
Take all responses from work link, parse and store in db
|
Take all responses from work link, parse and store in db
|
||||||
|
@ -52,7 +52,7 @@ def cli(ctx,cassandra_keyspace,cassandra_host,cassandra_port,beanstalkd_tube,bea
|
|||||||
ctx.obj["queue"] = queue
|
ctx.obj["queue"] = queue
|
||||||
|
|
||||||
|
|
||||||
@cli.command(help="All domains")
|
@cli.command(help="Get visited domains from db")
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
@click.argument("count",type=int,default=20)
|
@click.argument("count",type=int,default=20)
|
||||||
def all(ctx,count):
|
def all(ctx,count):
|
||||||
@ -63,28 +63,7 @@ def all(ctx,count):
|
|||||||
q = create_queue_from_context(ctx)
|
q = create_queue_from_context(ctx)
|
||||||
process_domains(res,ctx.obj["visit"],ctx.obj["parser"],db,q)
|
process_domains(res,ctx.obj["visit"],ctx.obj["parser"],db,q)
|
||||||
|
|
||||||
@cli.command(help="Work queue")
|
@cli.command(help="Get random domains")
|
||||||
@click.pass_context
|
|
||||||
def work(ctx):
|
|
||||||
db = create_database_from_context(ctx)
|
|
||||||
q = create_queue_from_context(ctx)
|
|
||||||
work_domains(ctx.obj["parser"],db,q)
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command(help="find best domains")
|
|
||||||
@click.pass_context
|
|
||||||
@click.argument("count",type=int,default=20)
|
|
||||||
#@click.option("visit",is_flag=True)
|
|
||||||
def best(ctx, count):
|
|
||||||
db = create_database_from_context(ctx)
|
|
||||||
p = ctx.obj["parser"]
|
|
||||||
domains = db.get_best_domains(count,p)
|
|
||||||
q = None
|
|
||||||
if ctx.obj["queue"]:
|
|
||||||
q = create_queue_from_context(ctx)
|
|
||||||
process_domains(domains,ctx.obj["visit"],p ,db,q)
|
|
||||||
|
|
||||||
@cli.command(help="select random domains")
|
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
@click.argument("count",type=int,default=20)
|
@click.argument("count",type=int,default=20)
|
||||||
#@click.option("visit",is_flag=True)
|
#@click.option("visit",is_flag=True)
|
||||||
@ -97,7 +76,29 @@ def blind(ctx, count):
|
|||||||
q = create_queue_from_context(ctx)
|
q = create_queue_from_context(ctx)
|
||||||
process_domains(domains,ctx.obj["visit"],p ,db,q)
|
process_domains(domains,ctx.obj["visit"],p ,db,q)
|
||||||
|
|
||||||
@cli.command(help="Find unvisited domains, Visit a site, get links and crawl")
|
@cli.command(help="Visit domains from queue")
|
||||||
|
@click.pass_context
|
||||||
|
def work(ctx):
|
||||||
|
db = create_database_from_context(ctx)
|
||||||
|
q = create_queue_from_context(ctx)
|
||||||
|
work_domains(ctx.obj["parser"],db,q)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command(help="Get best domains from db")
|
||||||
|
@click.pass_context
|
||||||
|
@click.argument("count",type=int,default=20)
|
||||||
|
#@click.option("visit",is_flag=True)
|
||||||
|
def best(ctx, count):
|
||||||
|
db = create_database_from_context(ctx)
|
||||||
|
p = ctx.obj["parser"]
|
||||||
|
domains = db.get_best_domains(count,p)
|
||||||
|
q = None
|
||||||
|
if ctx.obj["queue"]:
|
||||||
|
q = create_queue_from_context(ctx)
|
||||||
|
process_domains(domains,ctx.obj["visit"],p ,db,q)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command(help="Get unvisited domains")
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
@click.argument("count",type=int,default=20)
|
@click.argument("count",type=int,default=20)
|
||||||
def unvisited(ctx, count):
|
def unvisited(ctx, count):
|
||||||
@ -124,7 +125,7 @@ def file(ctx, name):
|
|||||||
q = create_queue_from_context(ctx)
|
q = create_queue_from_context(ctx)
|
||||||
process_domains(domains,ctx.obj["visit"],p,db,q)
|
process_domains(domains,ctx.obj["visit"],p,db,q)
|
||||||
|
|
||||||
@cli.command(help="Visit url and get links. Start here")
|
@cli.command(help="Visit one url and get links. Start here")
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
@click.argument("link")
|
@click.argument("link")
|
||||||
def start(ctx, link):
|
def start(ctx, link):
|
||||||
|
Loading…
Reference in New Issue
Block a user