This commit is contained in:
Daniel Hládek 2021-01-20 13:54:47 +01:00
parent 96bde590ad
commit 370cd1536f
4 changed files with 4 additions and 6 deletions

2
.dockerignore Normal file
View File

@ -0,0 +1,2 @@
venv
websucker.egg-info

View File

@ -1,10 +1,6 @@
FROM python:3.8-slim FROM python:3.8-slim
RUN apt-get update && apt-get install -y git curl libcurl4-openssl-dev build-essential vim libssl-dev RUN apt-get update && apt-get install -y git curl libcurl4-openssl-dev build-essential vim libssl-dev
# build-essentials vim lxml-dev libxml2-dev libxslt-dev
#RUN addgroup -S appgroup -g 1000 && \
# adduser -u 1000 -S appuser -G appgroup
RUN addgroup appgroup && \ RUN addgroup appgroup && \

View File

@ -430,7 +430,7 @@ def visit_sitemap(domain,connection,parser,db):
return True return True
def visit_links(links,connection,parser,db,is_online): def visit_links(links,connection,parser,db,is_online=True):
""" """
if the site is not online, then just check links if the site is not online, then just check links
""" """

View File

@ -132,7 +132,7 @@ def start(ctx, link):
p = ctx.obj["parser"] p = ctx.obj["parser"]
c = Connection() c = Connection()
visit_links([link],c,p,db) visit_links([link],c,p,db)
db.check_domain(domain) #db.check_domain(domain)
@cli.command(help="Continue crawling of seen links from a domain") @cli.command(help="Continue crawling of seen links from a domain")
@click.pass_context @click.pass_context