diff --git a/mongo/Dockerfile b/mongo/Dockerfile index 3dfd9ce..1a2ab22 100644 --- a/mongo/Dockerfile +++ b/mongo/Dockerfile @@ -4,4 +4,4 @@ COPY requirements.txt /app RUN pip install -r /app/requirements.txt COPY *.py /app WORKDIR /app -ENTRYPOINT ["python", "./mongocrawler.py"] +ENTRYPOINT ["rq", "worker"] diff --git a/mongo/docker-compose.yaml b/mongo/docker-compose.yaml index b527f0a..b5a5195 100644 --- a/mongo/docker-compose.yaml +++ b/mongo/docker-compose.yaml @@ -1,5 +1,9 @@ version: "3.0" services: + redis: + image: redis + ports: + - 6379:6379 mongo: image: mongo environment: diff --git a/mongo/mongocrawler.py b/mongo/mongocrawler.py index 9eb738d..0daf82e 100644 --- a/mongo/mongocrawler.py +++ b/mongo/mongocrawler.py @@ -591,8 +591,6 @@ def visit(start_link): batch_size = BATCHSIZE rules = fetch_robot(hostname) # renew front links - sitemap_links = fetch_sitemap_links(start_link) - index_links(db,sitemap_links) front_links = fetch_front_links(start_link,rules) index_links(db,front_links) # start crawling