Compare commits
No commits in common. "a7d048c952da28f8d497eba1ab586ffe83527135" and "f5a3b03874c472494230cfd97b69af47dc57dac9" have entirely different histories.
a7d048c952
...
f5a3b03874
@ -739,13 +739,6 @@ def crawl_summary():
|
|||||||
for item in res:
|
for item in res:
|
||||||
values = [str(item[x]) for x in headers]
|
values = [str(item[x]) for x in headers]
|
||||||
print("\t".join(values))
|
print("\t".join(values))
|
||||||
contentcol = db["content"]
|
|
||||||
res = contentcol.aggregate([
|
|
||||||
{"$group":{"_id":None,total_text_size:{"$sum":"$text_size"}}}
|
|
||||||
])
|
|
||||||
print(">>>>> Total text size in content")
|
|
||||||
for item in res:
|
|
||||||
print(res)
|
|
||||||
|
|
||||||
|
|
||||||
def import_html():
|
def import_html():
|
||||||
|
@ -1,4 +0,0 @@
|
|||||||
DOMAINS=`cut -f 1 -d ";" domains.txt | shuf`
|
|
||||||
for DOM in $DOMAINS ; do
|
|
||||||
echo rq enqueue mongocrawler.visit hostname=$DOM
|
|
||||||
done
|
|
Loading…
Reference in New Issue
Block a user