diff --git a/mongo/cli.py b/mongo/cli.py
index 52089ad..d3ebcaa 100644
--- a/mongo/cli.py
+++ b/mongo/cli.py
@@ -32,9 +32,10 @@ def classify(start_link):
     mongocrawler.classify(start_link)
 
 @cli.command()
-@click.argument("hostname")
-def visit(hostname):
-    mongocrawler.visit(hostname)
+@click.argument("hostname",help="Hostname to crawl")
+@click.option("--filter_content",default=True,help="Filter content")
+def visit(hostname,filter_content=True):
+    mongocrawler.visit(hostname,filter_content=filter_content)
 
 @cli.command()
 def summary():
diff --git a/mongo/mongocrawler.py b/mongo/mongocrawler.py
index 6677429..7a6d33c 100644
--- a/mongo/mongocrawler.py
+++ b/mongo/mongocrawler.py
@@ -24,6 +24,8 @@ import hashlib
 from bs4 import BeautifulSoup
 import urllib.parse
 import os.path
+import binascii
+import json
 
 # database options
 CONNECTION=os.getenv("SUCKER_CONNECTION","mongodb://root:example@localhost:27017/")
@@ -166,6 +168,7 @@ def fetch_page(link:str)->(str,str):
     html = None
     if response is not None :
         good = True
+        print(response)
         if response.status != 200:
             good = False
             LOGGER.error('not a 200 response: %s for URL %s', response.status, url)
@@ -227,7 +230,7 @@ def set_content_checksums(doc):
             sentences += 1
     doc["sentences_count"] = sentences
 
-def index_page(db,original_link,final_link,html,doc):
+def index_page(db,original_link,final_link,html,doc,filter_content=True):
     linkcol = db["links"]
     htmlcol = db["html"]
     contentcol = db["content"]
@@ -246,7 +249,7 @@ def index_page(db,original_link,final_link,html,doc):
         set_content_checksums(doc)
         tsz = doc["text_size"]
         psz = doc["paragraph_sizes_sum"]
-        if tsz < MIN_TEXT_SIZE or psz/tsz < TEXT_TRASH_RATIO:
+        if filter_content and (tsz < MIN_TEXT_SIZE or psz/tsz < TEXT_TRASH_RATIO):
             state = "small"
     # check copy
     if state == "good":
@@ -258,7 +261,7 @@ def index_page(db,original_link,final_link,html,doc):
                 origsz += paragraph_size
         doc["original_text_size"] = origsz
 
-        if (1 - (origsz / tsz)) > TEXT_TRASH_RATIO:
+        if filter_content and (1 - (origsz / tsz)) > TEXT_TRASH_RATIO:
             state = "copy"
     if state == "good":
         htdoc = get_link_doc(link,state)
@@ -673,7 +676,7 @@ def classify(start_link):
     cl.train(trainset)
     cl.test(testset)
 
-def visit(hostname):
+def visit(hostname,filter_content=True):
     myclient = pymongo.MongoClient(CONNECTION)
     db=myclient[DBNAME]
     batch_size = BATCH_SIZE
@@ -707,7 +710,7 @@ def visit(hostname):
     final_states = []
     docs = []
     for original_link,final_link,html,doc in extracted_pages:
-        status = index_page(db,original_link,final_link,html,doc)
+        status = index_page(db,original_link,final_link,html,doc,filter_content)
         final_states.append(status)
         docs.append(doc)
     save_batch_info(db,hostname,final_states,docs)
@@ -737,8 +740,6 @@ def crawl_summary():
         values = [str(item[x]) for x in headers]
         print("\t".join(values))
 
-import binascii
-import json
 
 def import_html():
     myclient= pymongo.MongoClient(CONNECTION)