zz
This commit is contained in:
parent
2ac0b911f5
commit
2941bf8a4c
@ -84,6 +84,18 @@ def best(ctx, count):
|
||||
q = create_queue_from_context(ctx)
|
||||
process_domains(domains,ctx.obj["visit"],p ,db,q)
|
||||
|
||||
@cli.command(help="select random domains")
|
||||
@click.pass_context
|
||||
@click.argument("count",type=int,default=20)
|
||||
#@click.option("visit",is_flag=True)
|
||||
def blind(ctx, count):
|
||||
db = create_database_from_context(ctx)
|
||||
p = ctx.obj["parser"]
|
||||
domains = db.get_random_domains(count,p)
|
||||
q = None
|
||||
if ctx.obj["queue"]:
|
||||
q = create_queue_from_context(ctx)
|
||||
process_domains(domains,ctx.obj["visit"],p ,db,q)
|
||||
|
||||
@cli.command(help="Find unvisited domains, Visit a site, get links and crawl")
|
||||
@click.pass_context
|
||||
|
@ -494,6 +494,18 @@ INSERT INTO content(
|
||||
# returns sorted list of tuples domain,gain_ratio
|
||||
return res
|
||||
|
||||
def get_random_domains(self,count,parser):
|
||||
# get all domains
|
||||
rows = self.session.execute(self.domains_select)
|
||||
domains = []
|
||||
for row in rows:
|
||||
domain = row[0]
|
||||
if parser.is_domain_good(domain):
|
||||
domains.append(list(row))
|
||||
l = len(domains)
|
||||
ss = min(l,count)
|
||||
return random.sample(domains,ss)
|
||||
|
||||
def get_unvisited_domains(self,count,parser):
|
||||
# get all domains
|
||||
rows = self.session.execute(self.domains_select)
|
||||
|
Loading…
Reference in New Issue
Block a user