zz
This commit is contained in:
parent
c8a18fd8c7
commit
3413b1a190
@ -167,7 +167,8 @@ def fetch_page(link:str)->(str,str):
|
||||
print("fetching:::::")
|
||||
print(link)
|
||||
final_link = link
|
||||
response = trafilatura.fetch_url(link,decode=False)
|
||||
response = trafilatura.fetch_response(link,decode=False)
|
||||
print(response)
|
||||
time.sleep(2)
|
||||
html = None
|
||||
if response is not None :
|
||||
@ -183,7 +184,7 @@ def fetch_page(link:str)->(str,str):
|
||||
good = False
|
||||
LOGGER.error('too large: length %s for URL %s', len(response.data), link)
|
||||
if good:
|
||||
html = trafilatura.utils.decode_response(response)
|
||||
html = trafilatura.utils.decode_file(response.data)
|
||||
if html is not None:
|
||||
html, final_link = trafilatura.spider.refresh_detection(html, final_link)
|
||||
# is there a meta-refresh on the page?
|
||||
|
Loading…
Reference in New Issue
Block a user