zz
This commit is contained in:
parent
b6d9260882
commit
b838a9bbd6
13
mongo/cli.py
13
mongo/cli.py
@ -5,6 +5,7 @@ import redis
|
||||
import sys
|
||||
import os
|
||||
import pymongo
|
||||
import courlan
|
||||
from config import *
|
||||
|
||||
@click.group()
|
||||
@ -62,6 +63,18 @@ def sample(domain):
|
||||
links = mongocrawler.sample_links(db,domain,"frontlink",BATCH_SIZE)
|
||||
print(links)
|
||||
|
||||
@cli.command()
|
||||
@click.argument("start_link")
|
||||
def fetchlinks(start_link):
|
||||
myclient = pymongo.MongoClient(CONNECTION)
|
||||
db=myclient[DBNAME]
|
||||
start_link,hostname = courlan.check_url(start_link)
|
||||
rules = mongocrawler.fetch_robot(hostname)
|
||||
front_links = mongocrawler.fetch_front_links(start_link,rules)
|
||||
print(front_links)
|
||||
mongocrawler.index_links(db,front_links)
|
||||
|
||||
|
||||
|
||||
@cli.command(help="Enqueue a list of links into redis queue for crawling")
|
||||
def enqueue():
|
||||
|
Loading…
Reference in New Issue
Block a user