zz
This commit is contained in:
		
							parent
							
								
									2ac0b911f5
								
							
						
					
					
						commit
						2941bf8a4c
					
				| @ -84,6 +84,18 @@ def best(ctx, count): | |||||||
|         q = create_queue_from_context(ctx) |         q = create_queue_from_context(ctx) | ||||||
|     process_domains(domains,ctx.obj["visit"],p ,db,q) |     process_domains(domains,ctx.obj["visit"],p ,db,q) | ||||||
| 
 | 
 | ||||||
|  | @cli.command(help="select random domains") | ||||||
|  | @click.pass_context | ||||||
|  | @click.argument("count",type=int,default=20) | ||||||
|  | #@click.option("visit",is_flag=True) | ||||||
|  | def blind(ctx, count): | ||||||
|  |     db = create_database_from_context(ctx) | ||||||
|  |     p = ctx.obj["parser"] | ||||||
|  |     domains = db.get_random_domains(count,p) | ||||||
|  |     q = None | ||||||
|  |     if ctx.obj["queue"]: | ||||||
|  |         q = create_queue_from_context(ctx) | ||||||
|  |     process_domains(domains,ctx.obj["visit"],p ,db,q) | ||||||
| 
 | 
 | ||||||
| @cli.command(help="Find unvisited domains, Visit a site, get links and crawl") | @cli.command(help="Find unvisited domains, Visit a site, get links and crawl") | ||||||
| @click.pass_context | @click.pass_context | ||||||
|  | |||||||
| @ -494,6 +494,18 @@ INSERT INTO content( | |||||||
|         # returns sorted list of tuples domain,gain_ratio |         # returns sorted list of tuples domain,gain_ratio | ||||||
|         return res |         return res | ||||||
| 
 | 
 | ||||||
|  |     def get_random_domains(self,count,parser): | ||||||
|  |         # get all domains | ||||||
|  |         rows = self.session.execute(self.domains_select) | ||||||
|  |         domains = [] | ||||||
|  |         for row in rows: | ||||||
|  |             domain = row[0] | ||||||
|  |             if parser.is_domain_good(domain): | ||||||
|  |                 domains.append(list(row)) | ||||||
|  |         l = len(domains) | ||||||
|  |         ss = min(l,count) | ||||||
|  |         return random.sample(domains,ss) | ||||||
|  | 
 | ||||||
|     def get_unvisited_domains(self,count,parser): |     def get_unvisited_domains(self,count,parser): | ||||||
|         # get all domains |         # get all domains | ||||||
|         rows = self.session.execute(self.domains_select) |         rows = self.session.execute(self.domains_select) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user