Compare commits

..

No commits in common. "a7d048c952da28f8d497eba1ab586ffe83527135" and "f5a3b03874c472494230cfd97b69af47dc57dac9" have entirely different histories.

2 changed files with 0 additions and 11 deletions

View File

@ -739,13 +739,6 @@ def crawl_summary():
for item in res:
values = [str(item[x]) for x in headers]
print("\t".join(values))
contentcol = db["content"]
res = contentcol.aggregate([
{"$group":{"_id":None,total_text_size:{"$sum":"$text_size"}}}
])
print(">>>>> Total text size in content")
for item in res:
print(res)
def import_html():

View File

@ -1,4 +0,0 @@
DOMAINS=`cut -f 1 -d ";" domains.txt | shuf`
for DOM in $DOMAINS ; do
echo rq enqueue mongocrawler.visit hostname=$DOM
done