Compare commits

...

2 Commits

Author SHA1 Message Date
a7d048c952 zz 2023-05-12 08:11:33 +02:00
93717bed14 zz 2023-05-12 08:11:15 +02:00
2 changed files with 11 additions and 0 deletions

View File

@ -739,6 +739,13 @@ def crawl_summary():
for item in res: for item in res:
values = [str(item[x]) for x in headers] values = [str(item[x]) for x in headers]
print("\t".join(values)) print("\t".join(values))
contentcol = db["content"]
res = contentcol.aggregate([
{"$group":{"_id":None,total_text_size:{"$sum":"$text_size"}}}
])
print(">>>>> Total text size in content")
for item in res:
print(res)
def import_html(): def import_html():

4
mongo/submitdomains.sh Normal file
View File

@ -0,0 +1,4 @@
DOMAINS=`cut -f 1 -d ";" domains.txt | shuf`
for DOM in $DOMAINS ; do
echo rq enqueue mongocrawler.visit hostname=$DOM
done