Compare commits
3 Commits
adfe77be24
...
370cd1536f
Author | SHA1 | Date | |
---|---|---|---|
370cd1536f | |||
96bde590ad | |||
92b9f8d489 |
2
.dockerignore
Normal file
2
.dockerignore
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
venv
|
||||||
|
websucker.egg-info
|
18
Dockerfile
18
Dockerfile
@ -1,14 +1,16 @@
|
|||||||
FROM python:3.8.0-alpine
|
FROM python:3.8-slim
|
||||||
|
|
||||||
RUN apk add --update --no-cache git curl curl-dev vim py3-lxml gcc make libxml2-dev libxslt-dev libc-dev
|
RUN apt-get update && apt-get install -y git curl libcurl4-openssl-dev build-essential vim libssl-dev
|
||||||
|
|
||||||
RUN addgroup -S appgroup -g 1000 && \
|
|
||||||
adduser -u 1000 -S appuser -G appgroup
|
|
||||||
|
|
||||||
RUN mkdir /app
|
RUN addgroup appgroup && \
|
||||||
|
adduser appuser && adduser appuser appgroup
|
||||||
|
|
||||||
ADD requirements.txt /
|
RUN mkdir /app /src
|
||||||
RUN CASS_DRIVER_BUILD_CONCURRENCY=4 pip install -r /requirements.txt
|
|
||||||
RUN pip install https://git.kemt.fei.tuke.sk/dano/websucker-pip/archive/master.zip
|
ADD requirements.txt /src
|
||||||
|
RUN CASS_DRIVER_BUILD_CONCURRENCY=4 pip install -r /src/requirements.txt
|
||||||
|
ADD . /src
|
||||||
|
RUN pip install /src
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
ENTRYPOINT ["websuck"]
|
ENTRYPOINT ["websuck"]
|
||||||
|
@ -430,7 +430,7 @@ def visit_sitemap(domain,connection,parser,db):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def visit_links(links,connection,parser,db,is_online):
|
def visit_links(links,connection,parser,db,is_online=True):
|
||||||
"""
|
"""
|
||||||
if the site is not online, then just check links
|
if the site is not online, then just check links
|
||||||
"""
|
"""
|
||||||
|
@ -132,7 +132,7 @@ def start(ctx, link):
|
|||||||
p = ctx.obj["parser"]
|
p = ctx.obj["parser"]
|
||||||
c = Connection()
|
c = Connection()
|
||||||
visit_links([link],c,p,db)
|
visit_links([link],c,p,db)
|
||||||
db.check_domain(domain)
|
#db.check_domain(domain)
|
||||||
|
|
||||||
@cli.command(help="Continue crawling of seen links from a domain")
|
@cli.command(help="Continue crawling of seen links from a domain")
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
|
Loading…
Reference in New Issue
Block a user