zz
This commit is contained in:
parent
ad52af705b
commit
f5dc1f42cf
@ -711,10 +711,21 @@ def crawl_summary():
|
||||
batchcol = db["batches"]
|
||||
yesterday = datetime.datetime.today() - datetime.timedelta(days=1)
|
||||
print(yesterday)
|
||||
res = batchcol.find({"created_at":{"$gt": yesterday.utcnow()}},limit=20).sort("average_fetch_characters")
|
||||
res = batchcol.find({"created_at":{"$lt": yesterday.utcnow()}},limit=20).sort("average_fetch_characters")
|
||||
res = batchcol.aggregate([
|
||||
{"$match":{"created_at":{"$lt": yesterday.utcnow()}}},
|
||||
{"$group":{"_id":"$host",
|
||||
"document_count":{"$sum":{"document_count":1}},
|
||||
"good_document_count":{"$sum":{"good_document_count":1}},
|
||||
"batch_size":{"$sum":{"batch_size":1}},
|
||||
"count":{"$sum":1},
|
||||
}
|
||||
},
|
||||
])
|
||||
print(">>>> Batches")
|
||||
for item in res:
|
||||
print(item["url"],item["average_fetch_characters"])
|
||||
print(item)
|
||||
#print(item["host"],item["document_count"],item["good_document_count"],item["created_at"])
|
||||
domaincol = db["domains"]
|
||||
print(">>>> Best domains")
|
||||
res = domaincol.find({},limit=100).sort("average_fetch_characters")
|
||||
|
Loading…
Reference in New Issue
Block a user