zz
This commit is contained in:
parent
2ac0b911f5
commit
2941bf8a4c
@ -84,6 +84,18 @@ def best(ctx, count):
|
|||||||
q = create_queue_from_context(ctx)
|
q = create_queue_from_context(ctx)
|
||||||
process_domains(domains,ctx.obj["visit"],p ,db,q)
|
process_domains(domains,ctx.obj["visit"],p ,db,q)
|
||||||
|
|
||||||
|
@cli.command(help="select random domains")
|
||||||
|
@click.pass_context
|
||||||
|
@click.argument("count",type=int,default=20)
|
||||||
|
#@click.option("visit",is_flag=True)
|
||||||
|
def blind(ctx, count):
|
||||||
|
db = create_database_from_context(ctx)
|
||||||
|
p = ctx.obj["parser"]
|
||||||
|
domains = db.get_random_domains(count,p)
|
||||||
|
q = None
|
||||||
|
if ctx.obj["queue"]:
|
||||||
|
q = create_queue_from_context(ctx)
|
||||||
|
process_domains(domains,ctx.obj["visit"],p ,db,q)
|
||||||
|
|
||||||
@cli.command(help="Find unvisited domains, Visit a site, get links and crawl")
|
@cli.command(help="Find unvisited domains, Visit a site, get links and crawl")
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
|
@ -494,6 +494,18 @@ INSERT INTO content(
|
|||||||
# returns sorted list of tuples domain,gain_ratio
|
# returns sorted list of tuples domain,gain_ratio
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def get_random_domains(self,count,parser):
|
||||||
|
# get all domains
|
||||||
|
rows = self.session.execute(self.domains_select)
|
||||||
|
domains = []
|
||||||
|
for row in rows:
|
||||||
|
domain = row[0]
|
||||||
|
if parser.is_domain_good(domain):
|
||||||
|
domains.append(list(row))
|
||||||
|
l = len(domains)
|
||||||
|
ss = min(l,count)
|
||||||
|
return random.sample(domains,ss)
|
||||||
|
|
||||||
def get_unvisited_domains(self,count,parser):
|
def get_unvisited_domains(self,count,parser):
|
||||||
# get all domains
|
# get all domains
|
||||||
rows = self.session.execute(self.domains_select)
|
rows = self.session.execute(self.domains_select)
|
||||||
|
Loading…
Reference in New Issue
Block a user