zz
This commit is contained in:
parent
ad52af705b
commit
f5dc1f42cf
@ -711,10 +711,21 @@ def crawl_summary():
|
|||||||
batchcol = db["batches"]
|
batchcol = db["batches"]
|
||||||
yesterday = datetime.datetime.today() - datetime.timedelta(days=1)
|
yesterday = datetime.datetime.today() - datetime.timedelta(days=1)
|
||||||
print(yesterday)
|
print(yesterday)
|
||||||
res = batchcol.find({"created_at":{"$gt": yesterday.utcnow()}},limit=20).sort("average_fetch_characters")
|
res = batchcol.find({"created_at":{"$lt": yesterday.utcnow()}},limit=20).sort("average_fetch_characters")
|
||||||
|
res = batchcol.aggregate([
|
||||||
|
{"$match":{"created_at":{"$lt": yesterday.utcnow()}}},
|
||||||
|
{"$group":{"_id":"$host",
|
||||||
|
"document_count":{"$sum":{"document_count":1}},
|
||||||
|
"good_document_count":{"$sum":{"good_document_count":1}},
|
||||||
|
"batch_size":{"$sum":{"batch_size":1}},
|
||||||
|
"count":{"$sum":1},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
])
|
||||||
print(">>>> Batches")
|
print(">>>> Batches")
|
||||||
for item in res:
|
for item in res:
|
||||||
print(item["url"],item["average_fetch_characters"])
|
print(item)
|
||||||
|
#print(item["host"],item["document_count"],item["good_document_count"],item["created_at"])
|
||||||
domaincol = db["domains"]
|
domaincol = db["domains"]
|
||||||
print(">>>> Best domains")
|
print(">>>> Best domains")
|
||||||
res = domaincol.find({},limit=100).sort("average_fetch_characters")
|
res = domaincol.find({},limit=100).sort("average_fetch_characters")
|
||||||
|
Loading…
Reference in New Issue
Block a user