zz
This commit is contained in:
parent
e06ef64c8f
commit
7d09f112df
@ -4,4 +4,4 @@ COPY requirements.txt /app
|
|||||||
RUN pip install -r /app/requirements.txt
|
RUN pip install -r /app/requirements.txt
|
||||||
COPY *.py /app
|
COPY *.py /app
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
ENTRYPOINT ["python", "./mongocrawler.py"]
|
ENTRYPOINT ["rq", "worker"]
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
version: "3.0"
|
version: "3.0"
|
||||||
services:
|
services:
|
||||||
|
redis:
|
||||||
|
image: redis
|
||||||
|
ports:
|
||||||
|
- 6379:6379
|
||||||
mongo:
|
mongo:
|
||||||
image: mongo
|
image: mongo
|
||||||
environment:
|
environment:
|
||||||
|
@ -591,8 +591,6 @@ def visit(start_link):
|
|||||||
batch_size = BATCHSIZE
|
batch_size = BATCHSIZE
|
||||||
rules = fetch_robot(hostname)
|
rules = fetch_robot(hostname)
|
||||||
# renew front links
|
# renew front links
|
||||||
sitemap_links = fetch_sitemap_links(start_link)
|
|
||||||
index_links(db,sitemap_links)
|
|
||||||
front_links = fetch_front_links(start_link,rules)
|
front_links = fetch_front_links(start_link,rules)
|
||||||
index_links(db,front_links)
|
index_links(db,front_links)
|
||||||
# start crawling
|
# start crawling
|
||||||
|
Loading…
Reference in New Issue
Block a user