Compare commits
2 Commits
f5a3b03874
...
a7d048c952
Author | SHA1 | Date | |
---|---|---|---|
a7d048c952 | |||
93717bed14 |
@ -739,6 +739,13 @@ def crawl_summary():
|
|||||||
for item in res:
|
for item in res:
|
||||||
values = [str(item[x]) for x in headers]
|
values = [str(item[x]) for x in headers]
|
||||||
print("\t".join(values))
|
print("\t".join(values))
|
||||||
|
contentcol = db["content"]
|
||||||
|
res = contentcol.aggregate([
|
||||||
|
{"$group":{"_id":None,total_text_size:{"$sum":"$text_size"}}}
|
||||||
|
])
|
||||||
|
print(">>>>> Total text size in content")
|
||||||
|
for item in res:
|
||||||
|
print(res)
|
||||||
|
|
||||||
|
|
||||||
def import_html():
|
def import_html():
|
||||||
|
4
mongo/submitdomains.sh
Normal file
4
mongo/submitdomains.sh
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
DOMAINS=`cut -f 1 -d ";" domains.txt | shuf`
|
||||||
|
for DOM in $DOMAINS ; do
|
||||||
|
echo rq enqueue mongocrawler.visit hostname=$DOM
|
||||||
|
done
|
Loading…
Reference in New Issue
Block a user