Compare commits

..

No commits in common. "370cd1536f951e469d067d2672a08a8d11e43671" and "adfe77be24fff8c069ffe655ea7c299ff58fe31d" have entirely different histories.

4 changed files with 10 additions and 14 deletions

View File

@ -1,2 +0,0 @@
venv
websucker.egg-info

View File

@ -1,16 +1,14 @@
FROM python:3.8-slim FROM python:3.8.0-alpine
RUN apt-get update && apt-get install -y git curl libcurl4-openssl-dev build-essential vim libssl-dev RUN apk add --update --no-cache git curl curl-dev vim py3-lxml gcc make libxml2-dev libxslt-dev libc-dev
RUN addgroup -S appgroup -g 1000 && \
adduser -u 1000 -S appuser -G appgroup
RUN addgroup appgroup && \ RUN mkdir /app
adduser appuser && adduser appuser appgroup
RUN mkdir /app /src ADD requirements.txt /
RUN CASS_DRIVER_BUILD_CONCURRENCY=4 pip install -r /requirements.txt
ADD requirements.txt /src RUN pip install https://git.kemt.fei.tuke.sk/dano/websucker-pip/archive/master.zip
RUN CASS_DRIVER_BUILD_CONCURRENCY=4 pip install -r /src/requirements.txt
ADD . /src
RUN pip install /src
WORKDIR /app WORKDIR /app
ENTRYPOINT ["websuck"] ENTRYPOINT ["websuck"]

View File

@ -430,7 +430,7 @@ def visit_sitemap(domain,connection,parser,db):
return True return True
def visit_links(links,connection,parser,db,is_online=True): def visit_links(links,connection,parser,db,is_online):
""" """
if the site is not online, then just check links if the site is not online, then just check links
""" """

View File

@ -132,7 +132,7 @@ def start(ctx, link):
p = ctx.obj["parser"] p = ctx.obj["parser"]
c = Connection() c = Connection()
visit_links([link],c,p,db) visit_links([link],c,p,db)
#db.check_domain(domain) db.check_domain(domain)
@cli.command(help="Continue crawling of seen links from a domain") @cli.command(help="Continue crawling of seen links from a domain")
@click.pass_context @click.pass_context