From 9b578a7660def9bffeb750b6c913160a14d15a7b Mon Sep 17 00:00:00 2001 From: Daniel Hladek Date: Tue, 14 Mar 2023 08:59:23 +0100 Subject: [PATCH] zz --- mongo/mongocwarler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mongo/mongocwarler.py b/mongo/mongocwarler.py index ef52087..71f3f3e 100644 --- a/mongo/mongocwarler.py +++ b/mongo/mongocwarler.py @@ -203,7 +203,7 @@ def extract_links(link_batch,responses,domain,rules,default_status="frontlink"): external_links = courlan.extract_links(html,final_link,external_bool=True,language=LANGUAGE) for link in external_links: links[link] = "frontlink" - internal_links = courlan.extract_links(html,final_link,external_bool=True,language=LANGUAGE) + internal_links = courlan.extract_links(html,final_link,external_bool=False,language=LANGUAGE) #print(extracted_links) for link in internal_links: status = str(default_status) @@ -305,16 +305,16 @@ def visit(start_link): navigation_links = get_links(db,domain,"navigation",batch_size) if start_link is not None: navigation_links.append(start_link) - print("Navigtaion links") - print(navigation_links) + print(f"Navigation links {len(navigation_links)}") process_links(db,domain,"frontlink",navigation_links,rules) links = get_links(db,domain,"frontlink",batch_size) bl = len(links) - batch_size + print(f"Got {len(links)} frontlinks") if bl > 0: print("Getting backlinks") front_links = get_links(db,domain,"backlink",bl) links += front_links - print("Pricessing backlinks") + print("Processing backlinks") process_links(db,domain,"backlink",links,rules=rules) link_summary(db,domain)