zz
This commit is contained in:
parent
01645b8862
commit
93717bed14
@ -730,6 +730,13 @@ def crawl_summary():
|
||||
for item in res:
|
||||
values = [str(item[x]) for x in headers]
|
||||
print("\t".join(values))
|
||||
contentcol = db["content"]
|
||||
res = contentcol.aggregate([
|
||||
{"$group":{"_id":None,total_text_size:{"$sum":"$text_size"}}}
|
||||
])
|
||||
print(">>>>> Total text size in content")
|
||||
for item in res:
|
||||
print(res)
|
||||
|
||||
|
||||
def import_html():
|
||||
|
4
mongo/submitdomains.sh
Normal file
4
mongo/submitdomains.sh
Normal file
@ -0,0 +1,4 @@
|
||||
DOMAINS=`cut -f 1 -d ";" domains.txt | shuf`
|
||||
for DOM in $DOMAINS ; do
|
||||
echo rq enqueue mongocrawler.visit hostname=$DOM
|
||||
done
|
Loading…
Reference in New Issue
Block a user