This commit is contained in:
Daniel Hládek 2023-03-14 08:59:23 +01:00
parent 2fce373d0f
commit 9b578a7660

View File

@ -203,7 +203,7 @@ def extract_links(link_batch,responses,domain,rules,default_status="frontlink"):
external_links = courlan.extract_links(html,final_link,external_bool=True,language=LANGUAGE)
for link in external_links:
links[link] = "frontlink"
internal_links = courlan.extract_links(html,final_link,external_bool=True,language=LANGUAGE)
internal_links = courlan.extract_links(html,final_link,external_bool=False,language=LANGUAGE)
#print(extracted_links)
for link in internal_links:
status = str(default_status)
@ -305,16 +305,16 @@ def visit(start_link):
navigation_links = get_links(db,domain,"navigation",batch_size)
if start_link is not None:
navigation_links.append(start_link)
print("Navigtaion links")
print(navigation_links)
print(f"Navigation links {len(navigation_links)}")
process_links(db,domain,"frontlink",navigation_links,rules)
links = get_links(db,domain,"frontlink",batch_size)
bl = len(links) - batch_size
print(f"Got {len(links)} frontlinks")
if bl > 0:
print("Getting backlinks")
front_links = get_links(db,domain,"backlink",bl)
links += front_links
print("Pricessing backlinks")
print("Processing backlinks")
process_links(db,domain,"backlink",links,rules=rules)
link_summary(db,domain)