This commit is contained in:
Daniel Hládek 2023-03-14 08:59:23 +01:00
parent 2fce373d0f
commit 9b578a7660

View File

@ -203,7 +203,7 @@ def extract_links(link_batch,responses,domain,rules,default_status="frontlink"):
external_links = courlan.extract_links(html,final_link,external_bool=True,language=LANGUAGE) external_links = courlan.extract_links(html,final_link,external_bool=True,language=LANGUAGE)
for link in external_links: for link in external_links:
links[link] = "frontlink" links[link] = "frontlink"
internal_links = courlan.extract_links(html,final_link,external_bool=True,language=LANGUAGE) internal_links = courlan.extract_links(html,final_link,external_bool=False,language=LANGUAGE)
#print(extracted_links) #print(extracted_links)
for link in internal_links: for link in internal_links:
status = str(default_status) status = str(default_status)
@ -305,16 +305,16 @@ def visit(start_link):
navigation_links = get_links(db,domain,"navigation",batch_size) navigation_links = get_links(db,domain,"navigation",batch_size)
if start_link is not None: if start_link is not None:
navigation_links.append(start_link) navigation_links.append(start_link)
print("Navigtaion links") print(f"Navigation links {len(navigation_links)}")
print(navigation_links)
process_links(db,domain,"frontlink",navigation_links,rules) process_links(db,domain,"frontlink",navigation_links,rules)
links = get_links(db,domain,"frontlink",batch_size) links = get_links(db,domain,"frontlink",batch_size)
bl = len(links) - batch_size bl = len(links) - batch_size
print(f"Got {len(links)} frontlinks")
if bl > 0: if bl > 0:
print("Getting backlinks") print("Getting backlinks")
front_links = get_links(db,domain,"backlink",bl) front_links = get_links(db,domain,"backlink",bl)
links += front_links links += front_links
print("Pricessing backlinks") print("Processing backlinks")
process_links(db,domain,"backlink",links,rules=rules) process_links(db,domain,"backlink",links,rules=rules)
link_summary(db,domain) link_summary(db,domain)