zz
This commit is contained in:
parent
b6d9260882
commit
b838a9bbd6
13
mongo/cli.py
13
mongo/cli.py
@ -5,6 +5,7 @@ import redis
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import pymongo
|
import pymongo
|
||||||
|
import courlan
|
||||||
from config import *
|
from config import *
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
@ -62,6 +63,18 @@ def sample(domain):
|
|||||||
links = mongocrawler.sample_links(db,domain,"frontlink",BATCH_SIZE)
|
links = mongocrawler.sample_links(db,domain,"frontlink",BATCH_SIZE)
|
||||||
print(links)
|
print(links)
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.argument("start_link")
|
||||||
|
def fetchlinks(start_link):
|
||||||
|
myclient = pymongo.MongoClient(CONNECTION)
|
||||||
|
db=myclient[DBNAME]
|
||||||
|
start_link,hostname = courlan.check_url(start_link)
|
||||||
|
rules = mongocrawler.fetch_robot(hostname)
|
||||||
|
front_links = mongocrawler.fetch_front_links(start_link,rules)
|
||||||
|
print(front_links)
|
||||||
|
mongocrawler.index_links(db,front_links)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command(help="Enqueue a list of links into redis queue for crawling")
|
@cli.command(help="Enqueue a list of links into redis queue for crawling")
|
||||||
def enqueue():
|
def enqueue():
|
||||||
|
Loading…
Reference in New Issue
Block a user