This commit is contained in:
Daniel Hládek 2024-03-06 18:42:17 +01:00
parent c8a18fd8c7
commit 87f84b8eb8

View File

@ -681,6 +681,7 @@ def classify(start_link):
cl.train(trainset) cl.train(trainset)
cl.test(testset) cl.test(testset)
def visit(hostname,filter_content=True): def visit(hostname,filter_content=True):
myclient = pymongo.MongoClient(CONNECTION) myclient = pymongo.MongoClient(CONNECTION)
db=myclient[DBNAME] db=myclient[DBNAME]
@ -711,6 +712,7 @@ def visit(hostname,filter_content=True):
extracted_pages.append((original_link,final_link,html,doc)) extracted_pages.append((original_link,final_link,html,doc))
extracted_links = extract_links(links,responses,hostname,rules,"frontlink") extracted_links = extract_links(links,responses,hostname,rules,"frontlink")
index_links(db,extracted_links) index_links(db,extracted_links)
final_states = [] final_states = []
docs = [] docs = []