zz
This commit is contained in:
		
							parent
							
								
									e06ef64c8f
								
							
						
					
					
						commit
						7d09f112df
					
				@ -4,4 +4,4 @@ COPY requirements.txt /app
 | 
				
			|||||||
RUN pip install -r /app/requirements.txt
 | 
					RUN pip install -r /app/requirements.txt
 | 
				
			||||||
COPY *.py /app
 | 
					COPY *.py /app
 | 
				
			||||||
WORKDIR /app
 | 
					WORKDIR /app
 | 
				
			||||||
ENTRYPOINT ["python", "./mongocrawler.py"]
 | 
					ENTRYPOINT ["rq", "worker"]
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,9 @@
 | 
				
			|||||||
version: "3.0"
 | 
					version: "3.0"
 | 
				
			||||||
services:
 | 
					services:
 | 
				
			||||||
 | 
					  redis:
 | 
				
			||||||
 | 
					    image: redis
 | 
				
			||||||
 | 
					    ports:
 | 
				
			||||||
 | 
					     - 6379:6379
 | 
				
			||||||
  mongo:
 | 
					  mongo:
 | 
				
			||||||
    image: mongo
 | 
					    image: mongo
 | 
				
			||||||
    environment:
 | 
					    environment:
 | 
				
			||||||
 | 
				
			|||||||
@ -591,8 +591,6 @@ def visit(start_link):
 | 
				
			|||||||
    batch_size = BATCHSIZE
 | 
					    batch_size = BATCHSIZE
 | 
				
			||||||
    rules = fetch_robot(hostname)
 | 
					    rules = fetch_robot(hostname)
 | 
				
			||||||
    # renew front links
 | 
					    # renew front links
 | 
				
			||||||
    sitemap_links = fetch_sitemap_links(start_link)
 | 
					 | 
				
			||||||
    index_links(db,sitemap_links)
 | 
					 | 
				
			||||||
    front_links = fetch_front_links(start_link,rules)
 | 
					    front_links = fetch_front_links(start_link,rules)
 | 
				
			||||||
    index_links(db,front_links)
 | 
					    index_links(db,front_links)
 | 
				
			||||||
    # start crawling
 | 
					    # start crawling
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user