This commit is contained in:
Daniel Hládek 2023-04-07 09:18:18 +02:00
parent e06ef64c8f
commit 7d09f112df
3 changed files with 5 additions and 3 deletions

View File

@ -4,4 +4,4 @@ COPY requirements.txt /app
RUN pip install -r /app/requirements.txt RUN pip install -r /app/requirements.txt
COPY *.py /app COPY *.py /app
WORKDIR /app WORKDIR /app
ENTRYPOINT ["python", "./mongocrawler.py"] ENTRYPOINT ["rq", "worker"]

View File

@ -1,5 +1,9 @@
version: "3.0" version: "3.0"
services: services:
redis:
image: redis
ports:
- 6379:6379
mongo: mongo:
image: mongo image: mongo
environment: environment:

View File

@ -591,8 +591,6 @@ def visit(start_link):
batch_size = BATCHSIZE batch_size = BATCHSIZE
rules = fetch_robot(hostname) rules = fetch_robot(hostname)
# renew front links # renew front links
sitemap_links = fetch_sitemap_links(start_link)
index_links(db,sitemap_links)
front_links = fetch_front_links(start_link,rules) front_links = fetch_front_links(start_link,rules)
index_links(db,front_links) index_links(db,front_links)
# start crawling # start crawling