From 370cd1536f951e469d067d2672a08a8d11e43671 Mon Sep 17 00:00:00 2001 From: Daniel Hladek Date: Wed, 20 Jan 2021 13:54:47 +0100 Subject: [PATCH] fixes --- .dockerignore | 2 ++ Dockerfile | 4 ---- websucker/agent.py | 2 +- websucker/cli.py | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..ee54244 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +venv +websucker.egg-info diff --git a/Dockerfile b/Dockerfile index 58b93a8..b9e6f84 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,6 @@ FROM python:3.8-slim RUN apt-get update && apt-get install -y git curl libcurl4-openssl-dev build-essential vim libssl-dev -# build-essentials vim lxml-dev libxml2-dev libxslt-dev - -#RUN addgroup -S appgroup -g 1000 && \ -# adduser -u 1000 -S appuser -G appgroup RUN addgroup appgroup && \ diff --git a/websucker/agent.py b/websucker/agent.py index ad69f35..071eede 100755 --- a/websucker/agent.py +++ b/websucker/agent.py @@ -430,7 +430,7 @@ def visit_sitemap(domain,connection,parser,db): return True -def visit_links(links,connection,parser,db,is_online): +def visit_links(links,connection,parser,db,is_online=True): """ if the site is not online, then just check links """ diff --git a/websucker/cli.py b/websucker/cli.py index ff540ff..81b5784 100644 --- a/websucker/cli.py +++ b/websucker/cli.py @@ -132,7 +132,7 @@ def start(ctx, link): p = ctx.obj["parser"] c = Connection() visit_links([link],c,p,db) - db.check_domain(domain) + #db.check_domain(domain) @cli.command(help="Continue crawling of seen links from a domain") @click.pass_context