Add missing backend files (app.py, database.py, clear_cache.py)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
2533f75f2c
commit
c80694f543
514
backend/app.py
Executable file
514
backend/app.py
Executable file
@ -0,0 +1,514 @@
|
||||
import os
|
||||
import torch
|
||||
from dotenv import load_dotenv
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||
import requests
|
||||
from deep_translator import GoogleTranslator
|
||||
from database import get_cached_result, save_to_cache, get_history, get_stats, add_verified_fact, init_db
|
||||
|
||||
# 1. Load API key
|
||||
load_dotenv()
|
||||
SERP_API_KEY = os.getenv("SERPAPI_API_KEY")
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app, supports_credentials=True, allow_headers=['Content-Type'], methods=['GET', 'POST', 'OPTIONS'])
|
||||
|
||||
# Initialize Database
|
||||
init_db()
|
||||
|
||||
# 2. Model Configuration
|
||||
MODELS = {
|
||||
"roberta": {
|
||||
"name": "ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli",
|
||||
"type": "sequence-classification",
|
||||
"needs_translation": True
|
||||
},
|
||||
"mdeberta": {
|
||||
"name": "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7",
|
||||
"type": "sequence-classification",
|
||||
"needs_translation": False
|
||||
}
|
||||
}
|
||||
|
||||
# Global variables for current model
|
||||
current_model_key = "roberta"
|
||||
tokenizer = None
|
||||
model = None
|
||||
|
||||
def load_model(model_key="roberta"):
|
||||
global tokenizer, model, current_model_key
|
||||
|
||||
if model_key not in MODELS:
|
||||
model_key = "roberta"
|
||||
|
||||
if model is not None and current_model_key == model_key:
|
||||
return # Already loaded
|
||||
|
||||
print(f" Prepínam model na: {model_key} ({MODELS[model_key]['name']})...")
|
||||
|
||||
# Free up memory by setting to None (allows GC to collect)
|
||||
if model is not None:
|
||||
model = None
|
||||
if tokenizer is not None:
|
||||
tokenizer = None
|
||||
|
||||
torch.cuda.empty_cache() if torch.cuda.is_available() else None
|
||||
|
||||
config = MODELS[model_key]
|
||||
tokenizer = AutoTokenizer.from_pretrained(config["name"])
|
||||
model = AutoModelForSequenceClassification.from_pretrained(config["name"])
|
||||
model.eval()
|
||||
current_model_key = model_key
|
||||
print(f" Model {model_key} načítaný.")
|
||||
|
||||
# Initial load
|
||||
load_model("roberta")
|
||||
|
||||
def nli_label(premise: str, hypothesis: str, force_translation=False):
|
||||
"""
|
||||
Vyhodnotí vzťah medzi premise a hypothesis pomocou aktuálneho modelu.
|
||||
"""
|
||||
global tokenizer, model, current_model_key
|
||||
|
||||
config = MODELS[current_model_key]
|
||||
|
||||
# Preklad do EN len ak to model vyžaduje (RoBERTa)
|
||||
if config["needs_translation"] or force_translation:
|
||||
# Jednoduchá kontrola či je text v SK (veľmi hrubá)
|
||||
is_slovak = any(char in hypothesis for char in 'áäčďéěíĺľňóôŕšťúůýž') or 'je' in hypothesis.split() or 'nie' in hypothesis.split()
|
||||
if is_slovak or force_translation:
|
||||
try:
|
||||
original = hypothesis
|
||||
|
||||
# Rýchla oprava chýbajúcej diakritiky v bežných frázach pred prekladom
|
||||
hypothesis_for_translation = hypothesis.replace('hlavne mesto', 'hlavné mesto').replace('slovenska', 'Slovenska')
|
||||
|
||||
hypothesis = GoogleTranslator(source='sk', target='en').translate(hypothesis_for_translation)
|
||||
print(f" [Preklad pre {current_model_key}]: '{original}' -> '{hypothesis}'")
|
||||
except Exception as e:
|
||||
print(f" Preklad zlyhal: {e}")
|
||||
|
||||
# Logika pre Sequence Classification (RoBERTa, mDeBERTa)
|
||||
inputs = tokenizer(premise, hypothesis, return_tensors="pt", truncation=True, max_length=512)
|
||||
with torch.no_grad():
|
||||
logits = model(**inputs).logits[0]
|
||||
|
||||
probs = torch.softmax(logits, dim=-1).tolist()
|
||||
|
||||
# Obe používajú štandardné mapovanie: 0: entailment, 1: neutral, 2: contradiction
|
||||
labels = ["entailment", "neutral", "contradiction"]
|
||||
idx = int(torch.argmax(torch.tensor(probs)))
|
||||
|
||||
return labels[idx], probs[idx], probs
|
||||
|
||||
|
||||
@app.route("/api/check", methods=["POST"])
|
||||
def check_fact():
|
||||
data = request.get_json()
|
||||
claim = data.get("claim", "").strip()
|
||||
language = data.get("language", "all")
|
||||
date_from = data.get("dateFrom", "")
|
||||
date_to = data.get("dateTo", "")
|
||||
selected_source = data.get("selectedSource", "all")
|
||||
selected_model = data.get("model", "roberta") # Default to RoBERTa
|
||||
|
||||
# Load the requested model (or switch if needed)
|
||||
try:
|
||||
load_model(selected_model)
|
||||
except Exception as e:
|
||||
print(f"Failed to load model {selected_model}: {e}")
|
||||
return jsonify({"error": f"Failed to load AI model: {selected_model}"}), 500
|
||||
|
||||
if not claim:
|
||||
return jsonify({"error": "Žiadny výrok nebol zadaný"}), 400
|
||||
|
||||
# FILTER POLITICKÝCH A CITLIVÝCH SLOV
|
||||
FORBIDDEN_KEYWORDS = [
|
||||
# Politici/strany SK
|
||||
'fico', 'pellegrini', 'šimečka', 'simecka', 'kotleba', 'matovič', 'matovic',
|
||||
'smer', 'hlas', 'ps', 'progresívne slovensko', 'republika', 'sas', 'oľano', 'olano',
|
||||
# Politici medzinárodní
|
||||
'trump', 'biden', 'putin', 'zelenskyy', 'zelensky', 'orbán', 'orban',
|
||||
# Citlivé témy
|
||||
'vakcína', 'vakcinacia', 'covid', 'koronavirus', 'chemtrails',
|
||||
'voľby', 'volby', 'referendum', 'korupcia', 'korupčný', 'korupcny',
|
||||
# Nadávky a vulgárne výrazy
|
||||
'kokot', 'piča', 'pica', 'kurva', 'jebať', 'jebat', 'piči', 'pici',
|
||||
'kokoti', 'debil', 'idiot', 'kretén', 'kreten', 'zmrd', 'piči', 'pici',
|
||||
'hovädo', 'hovado', 'hajzel', 'sviňa', 'svina', 'smrad', 'kreténka', 'kretenka',
|
||||
'fuck', 'shit', 'bitch', 'asshole', 'bastard'
|
||||
]
|
||||
|
||||
claim_lower = claim.lower()
|
||||
forbidden_found = [word for word in FORBIDDEN_KEYWORDS if word in claim_lower]
|
||||
|
||||
if forbidden_found:
|
||||
print(f" Zakázané slová detekované: {forbidden_found}")
|
||||
return jsonify({
|
||||
"error": "Zakázané slovo",
|
||||
"message": " Systém neakceptuje vulgárne výrazy, politické tvrdenia ani konšpiračné teórie. Zadajte korektné faktické tvrdenie.",
|
||||
"forbidden_words": forbidden_found
|
||||
}), 400
|
||||
|
||||
# CACHE CHECK - najprv skontroluj databázu
|
||||
cached = get_cached_result(claim)
|
||||
if cached:
|
||||
print(f" Cache hit: {claim[:50]}...")
|
||||
# Ignorujeme cache ak používateľ mení filtre (dátum/jazyk/zdroj), pretože cache nemá tieto metadáta
|
||||
# Pre jednoduchosť zatiaľ vrátime cache len ak sú filtre predvolené ("all", bez dátumu)
|
||||
if language == "all" and not date_from and not date_to and selected_source == "all":
|
||||
return jsonify(cached)
|
||||
|
||||
if not SERP_API_KEY:
|
||||
return jsonify({"error": "Chýba SERPAPI_API_KEY v .env"}), 500
|
||||
|
||||
# Detekuj či je claim v slovenčine (diakritika alebo slovenské slová)
|
||||
has_diacritics = any(char in claim for char in 'áäčďéěíĺľňóôŕšťúůýžÁÄČĎÉĚÍĹĽŇÓÔŔŠŤÚŮÝŽ')
|
||||
slovak_words = ['je', 'nieje', 'nie', 'je to', 'a', 'že', 'ktorý', 'ktorá', 'ktoré',
|
||||
'hlavné', 'mesto', 'slovensko', 'slovenská', 'slovenské']
|
||||
has_slovak_words = any(word.lower() in claim.lower() for word in slovak_words)
|
||||
|
||||
# Ak používateľ explicitne vybral 'sk', berieme to ako "potrebujeme translation z SK pre model",
|
||||
# ale PRE SEARCH možno budeme chcieť radšej slovenské výsledky.
|
||||
needs_translation = has_diacritics or has_slovak_words or (language == 'sk')
|
||||
|
||||
# --- NOVÁ LOGIKA PRE JAZYKOVÉ FILTRE A ZDROJE ---
|
||||
|
||||
# OPRAVA TYPO CHÝB V POUŽÍVATEĽSKOM VSTUPE PRE LEPŠÍ PREKLAD A VYHĽADÁVANIE
|
||||
claim_for_translation = claim.replace('hlavne mesto', 'hlavné mesto').replace('slovenska', 'Slovenska')
|
||||
|
||||
SK_TRUSTED_SOURCES = [
|
||||
# Hlavné spravodajstvo
|
||||
'dennikn.sk', 'sme.sk', 'aktuality.sk', 'tasr.sk', 'rtvs.sk', 'teraz.sk',
|
||||
'hnonline.sk', 'pravda.sk', 'trend.sk', 'tyzden.sk', 'postoj.sk', 'euractiv.sk',
|
||||
'noviny.sk', 'tvnoviny.sk', 'ta3.com', 'webnoviny.sk',
|
||||
# Inštitúcie
|
||||
'vladne.sk', 'nrsr.sk', 'prezident.sk', 'gov.sk', 'sav.sk', 'demagog.sk',
|
||||
'minv.sk', 'health.gov.sk', 'policia.sk', 'statistics.sk'
|
||||
]
|
||||
|
||||
search_query = claim
|
||||
|
||||
if language == 'sk':
|
||||
print(" Jazyk nastavený na 'SK' -> Vyhľadávam v slovenčine")
|
||||
search_query = claim_for_translation # Použijeme opravený text s diakritikou pre lepšie výsledky
|
||||
elif needs_translation:
|
||||
try:
|
||||
claim_en = GoogleTranslator(source='sk', target='en').translate(claim_for_translation)
|
||||
print(f" Preklad pre search (EN nastavenie): '{claim_for_translation}' -> '{claim_en}'")
|
||||
search_query = claim_en
|
||||
except Exception as e:
|
||||
print(f"Preklad zlyhal, použijem originál: {e}")
|
||||
search_query = claim
|
||||
|
||||
sites_to_check = []
|
||||
|
||||
# Aplikovanie špecifického zdroja
|
||||
if selected_source != "all":
|
||||
if selected_source == "demagog": sites_to_check = ["demagog.sk"]
|
||||
elif selected_source == "afp": sites_to_check = ["factcheck.afp.com"]
|
||||
elif selected_source == "reuters": sites_to_check = ["reuters.com"]
|
||||
# Ak sa používa dátumový filter a nie je vybraný zdroj, obmedzíme výber pre lepšie výsledky
|
||||
elif date_from or date_to:
|
||||
if language == 'sk':
|
||||
sites_to_check = SK_TRUSTED_SOURCES
|
||||
else:
|
||||
sites_to_check = ['reuters.com', 'apnews.com', 'bbc.com', 'cnn.com', 'nytimes.com', 'theguardian.com']
|
||||
|
||||
if sites_to_check:
|
||||
site_query = " OR ".join([f"site:{site}" for site in sites_to_check[:15]])
|
||||
search_query = f"{search_query} ({site_query})"
|
||||
print(f" Pridaný filter domén k hľadaniu: {site_query}")
|
||||
|
||||
# Zostavenie parametrov pre SerpAPI
|
||||
params = {
|
||||
"engine": "google",
|
||||
"q": search_query,
|
||||
"api_key": SERP_API_KEY,
|
||||
"num": 20,
|
||||
}
|
||||
|
||||
# Aplikovanie filtrov na parametre
|
||||
if language == 'sk':
|
||||
params["gl"] = "sk" # Geolocation: Slovakia
|
||||
params["hl"] = "sk" # UI Language: Slovak
|
||||
elif language == 'cs':
|
||||
params["gl"] = "cz"
|
||||
params["hl"] = "cs"
|
||||
else:
|
||||
params["hl"] = "en"
|
||||
|
||||
# Časové ohraničenie (tbs parameter v Google Search)
|
||||
# Formát: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY
|
||||
if date_from or date_to:
|
||||
tbs_parts = []
|
||||
if date_from:
|
||||
try:
|
||||
# date_from prichádza ako YYYY-MM-DD
|
||||
y, m, d = date_from.split("-")
|
||||
tbs_parts.append(f"cd_min:{int(m)}/{int(d)}/{y}")
|
||||
except:
|
||||
pass
|
||||
if date_to:
|
||||
try:
|
||||
y, m, d = date_to.split("-")
|
||||
tbs_parts.append(f"cd_max:{int(m)}/{int(d)}/{y}")
|
||||
except:
|
||||
pass
|
||||
|
||||
if tbs_parts:
|
||||
# cdr:1 zapína custom date range
|
||||
params["tbs"] = "cdr:1," + ",".join(tbs_parts)
|
||||
print(f" Dátumový filter aplikovaný: {params['tbs']}")
|
||||
|
||||
# Volanie SerpAPI
|
||||
res = requests.get("https://serpapi.com/search", params=params)
|
||||
|
||||
if res.status_code == 429:
|
||||
return jsonify({"error": "Limit SerpAPI prekročený"}), 429
|
||||
|
||||
data = res.json()
|
||||
organic = data.get("organic_results", [])
|
||||
|
||||
# Blacklist nedôveryhodných domén
|
||||
DOMAIN_BLACKLIST = [
|
||||
'spotify.com', 'youtube.com', 'facebook.com', 'instagram.com',
|
||||
'twitter.com', 'tiktok.com', 'pinterest.com', 'reddit.com',
|
||||
'karaoke', 'lyrics', 'texty.cz', 'karaoketexty',
|
||||
# Slovenské nevhodné (študentské, fóra, bulvár, recepty, blogy)
|
||||
'referaty', 'tahaky', 'zones.sk', 'student', 'zadania', 'maturita',
|
||||
'forum', 'diskusia', 'chat', 'komentare', 'blog.', 'nazory.',
|
||||
'mimibazar', 'modrykon', 'diva.sk', 'najmama', 'dobruchut', 'varecha',
|
||||
# Konšpiračné weby
|
||||
'badatel', 'zemavek', 'infovojna', 'slobodnyvysielac', 'hlavnespravy', 'hrot.info',
|
||||
# Hobby/Školské projekty (z obrázku) a iné nerelevantné
|
||||
'hockicko', 'astroportal', 'vesmir.sk', 'szm.com', 'blogspot'
|
||||
]
|
||||
|
||||
# Whitelist dôveryhodných domén (vyššia priorita)
|
||||
DOMAIN_WHITELIST = [
|
||||
'wikipedia.org', 'britannica.com', 'nature.com', 'sciencedirect.com',
|
||||
'ncbi.nlm.nih.gov', 'scholar.google', '.gov', '.edu', 'reuters.com',
|
||||
'apnews.com', 'bbc.com', 'nature.com', 'science.org',
|
||||
# Slovenské spravodajstvo, televízie a agentúry
|
||||
'dennikn.sk', 'sme.sk', 'aktuality.sk', 'tasr.sk', 'rtvs.sk', 'teraz.sk',
|
||||
'hnonline.sk', 'pravda.sk', 'trend.sk', 'tyzden.sk', 'postoj.sk', 'euractiv.sk',
|
||||
'noviny.sk', 'tvnoviny.sk', 'ta3.com', 'webnoviny.sk', 'sita.sk',
|
||||
# Slovenské inštitúcie
|
||||
'vladne.sk', 'nrsr.sk', 'prezident.sk', 'gov.sk', 'sav.sk', 'demagog.sk',
|
||||
'minv.sk', 'mosr.sk', 'mzv.sk', 'health.gov.sk', 'policia.sk', 'statistics.sk',
|
||||
'shmu.sk', 'slov-lex.sk', 'uvzsr.sk',
|
||||
# Slovenské Univerzity
|
||||
'uniba.sk', 'stuba.sk', 'tuke.sk', 'upjs.sk', 'ukf.sk', 'umb.sk', 'sav.sk'
|
||||
]
|
||||
|
||||
# Pridáme preferované zdroje z selectedSource
|
||||
source_priority_list = []
|
||||
if selected_source == "demagog":
|
||||
source_priority_list.append("demagog.sk")
|
||||
if selected_source == "afp":
|
||||
source_priority_list.append("factcheck.afp.com")
|
||||
if selected_source == "reuters":
|
||||
source_priority_list.append("reuters.com")
|
||||
|
||||
# Filtruj výsledky
|
||||
filtered_results = []
|
||||
for r in organic:
|
||||
link = r.get('link', '')
|
||||
snippet = r.get('snippet', '')
|
||||
|
||||
# Preskočiť ak obsahuje blacklist doménu
|
||||
if any(bad in link.lower() for bad in DOMAIN_BLACKLIST):
|
||||
continue
|
||||
|
||||
if snippet:
|
||||
# Výpočet priority
|
||||
priority = 0
|
||||
|
||||
# Najvyššia priorita pre vybraný zdroj
|
||||
if any(s in link.lower() for s in source_priority_list):
|
||||
priority = 2
|
||||
# Stredná priorita pre whitelist (vrátane SK webov)
|
||||
elif any(good in link.lower() for good in DOMAIN_WHITELIST):
|
||||
priority = 1
|
||||
|
||||
# Ak je zvolený SK jazyk, preložíme snippet do EN pre NLI model?
|
||||
# Model vie hlavne EN. Ak je snippet SK, treba preklad!
|
||||
if language == 'sk' or needs_translation:
|
||||
try:
|
||||
snippet_en = GoogleTranslator(source='auto', target='en').translate(snippet)
|
||||
r['snippet_en'] = snippet_en # Uložíme si preklad
|
||||
except:
|
||||
r['snippet_en'] = snippet
|
||||
else:
|
||||
r['snippet_en'] = snippet
|
||||
|
||||
filtered_results.append((priority, r))
|
||||
|
||||
# Zoradi podľa priority (whitelist najprv)
|
||||
filtered_results.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
# DEDUPUKÁCIA DOMÉN - aby sme nemali viac výsledkov z tej istej domény
|
||||
seen_domains = set()
|
||||
final_results = []
|
||||
|
||||
for priority, r in filtered_results:
|
||||
link = r.get('link', '')
|
||||
try:
|
||||
# Získaj doménu (napr. bbc.com z https://www.bbc.com/news/...)
|
||||
domain = link.split('//')[-1].split('/')[0].replace('www.', '')
|
||||
except:
|
||||
domain = link
|
||||
|
||||
if domain not in seen_domains:
|
||||
seen_domains.add(domain)
|
||||
final_results.append(r)
|
||||
|
||||
if len(final_results) >= 5:
|
||||
break
|
||||
|
||||
filtered_results = final_results
|
||||
|
||||
snippets = [r.get("snippet_en", r.get("snippet")) for r in filtered_results] # Použijeme preložené
|
||||
links = [r.get("link") for r in filtered_results]
|
||||
|
||||
print(f" Nájdených zdrojov: {len(snippets)}")
|
||||
print(f" Linky: {links[:3]}")
|
||||
|
||||
if not snippets:
|
||||
print(" Žiadne snippety nenájdené!")
|
||||
return jsonify({
|
||||
"claim": claim,
|
||||
"verdict": " Nedostatok zdrojov – neoveriteľné",
|
||||
"sources": links
|
||||
})
|
||||
|
||||
# 4. NLI voting (s váženými skóre podľa confidence)
|
||||
# Už sme detekovali jazyk vyššie pre search, použijeme to isté
|
||||
|
||||
entail_score = 0
|
||||
contra_score = 0
|
||||
evidences_for = []
|
||||
evidences_against = []
|
||||
detailed_sources = []
|
||||
|
||||
for i, snip in enumerate(snippets):
|
||||
nli_translate = needs_translation or (language == 'sk')
|
||||
label, conf, probs = nli_label(snip, claim, force_translation=nli_translate)
|
||||
print(f"\n [{i+1}] NLI Výsledok: {label} (confidence: {conf:.3f})")
|
||||
print(f" Snippet (EN): {snip[:150]}...")
|
||||
|
||||
# SOFT SCORING - započítame všetky pravdepodobnosti, nie len max
|
||||
# probs = [entailment, neutral, contradiction]
|
||||
entail_score += probs[0] # Entailment je index 0
|
||||
contra_score += probs[2] # Contradiction je index 2
|
||||
|
||||
# Pre zobrazenie dôkazov vrátime pôvodný snippet z JSONu (ak bol preložený, chceme vidieť SK pre usera?)
|
||||
# User vidí "evidence". Ak je stránka SK, snippet je SK. Ale model hodnotil EN.
|
||||
# V filtered_results máme pôvodný objekt `r`. Dajme naspäť pôvodný snippet pre UI.
|
||||
original_snippet = filtered_results[i].get('snippet', snip)
|
||||
source_link = filtered_results[i].get('link')
|
||||
|
||||
if label == "entailment":
|
||||
evidences_for.append({"snippet": original_snippet, "confidence": round(conf, 3)})
|
||||
elif label == "contradiction":
|
||||
evidences_against.append({"snippet": original_snippet, "confidence": round(conf, 3)})
|
||||
|
||||
# Uložíme detailné info o zdroji
|
||||
detailed_sources.append({
|
||||
"url": source_link,
|
||||
"label": label,
|
||||
"confidence": round(conf, 3),
|
||||
"entailment_prob": round(probs[0], 3),
|
||||
"contradiction_prob": round(probs[2], 3),
|
||||
"neutral_prob": round(probs[1], 3)
|
||||
})
|
||||
|
||||
# 5. Verdict - používame soft skóre
|
||||
total_score = entail_score + contra_score
|
||||
|
||||
print(f"\n Výsledky NLI (soft scoring):")
|
||||
print(f" Entailment score: {entail_score:.3f}")
|
||||
print(f" Contradiction score: {contra_score:.3f}")
|
||||
print(f" Total score: {total_score:.3f}")
|
||||
|
||||
# Ak je absolútne skóre pre potvrdenie/vyvrátenie príliš nízke oproti počtu nájdených zdrojov
|
||||
if total_score < 1.0 and len(snippets) > 0:
|
||||
verdict = "⚠️ Nejednoznačné"
|
||||
print(" [Nedostatok silných dôkazov - prevládajú 'neutral' výsledky]")
|
||||
elif total_score > 0:
|
||||
entail_ratio = entail_score / total_score
|
||||
contra_ratio = contra_score / total_score
|
||||
print(f" Entail ratio: {entail_ratio:.1%}, Contra ratio: {contra_ratio:.1%}")
|
||||
|
||||
# Zmierňujeme prahy z 60% na 50%
|
||||
if entail_ratio > 0.5:
|
||||
verdict = "✅ Pravda"
|
||||
elif contra_ratio > 0.5:
|
||||
verdict = "❌ Nepravda"
|
||||
else:
|
||||
verdict = "⚠️ Nejednoznačné"
|
||||
else:
|
||||
verdict = "⚠️ Neoveriteľné"
|
||||
|
||||
print(f" Verdikt: {verdict}\n")
|
||||
|
||||
result = {
|
||||
"claim": claim,
|
||||
"nli_votes": {
|
||||
"entailment_score": round(entail_score, 3),
|
||||
"contradiction_score": round(contra_score, 3)
|
||||
},
|
||||
"verdict": verdict,
|
||||
"evidence_for": evidences_for[:3],
|
||||
"evidence_against": evidences_against[:3],
|
||||
"sources": detailed_sources if detailed_sources else links,
|
||||
"model_name": selected_model,
|
||||
"cached": False
|
||||
}
|
||||
|
||||
# Ulož do cache
|
||||
save_to_cache(claim, result, model_name=selected_model)
|
||||
|
||||
return jsonify(result)
|
||||
|
||||
|
||||
@app.route("/api/history", methods=["GET"])
|
||||
def get_check_history():
|
||||
"""Vráti históriu fact-checkov"""
|
||||
limit = request.args.get("limit", 50, type=int)
|
||||
history = get_history(limit)
|
||||
return jsonify({"history": history, "count": len(history)})
|
||||
|
||||
|
||||
@app.route("/api/stats", methods=["GET"])
|
||||
def get_statistics():
|
||||
"""Vráti štatistiky databázy"""
|
||||
stats = get_stats()
|
||||
return jsonify(stats)
|
||||
|
||||
|
||||
@app.route("/api/admin/add-fact", methods=["POST"])
|
||||
def admin_add_fact():
|
||||
"""Admin endpoint na pridanie overeného faktu"""
|
||||
data = request.get_json()
|
||||
claim = data.get("claim", "").strip()
|
||||
verdict = data.get("verdict", "").strip()
|
||||
explanation = data.get("explanation", "")
|
||||
source_url = data.get("source_url", "")
|
||||
|
||||
if not claim or not verdict:
|
||||
return jsonify({"error": "Claim a verdict sú povinné"}), 400
|
||||
|
||||
success = add_verified_fact(claim, verdict, explanation, source_url)
|
||||
|
||||
if success:
|
||||
return jsonify({"message": "Overený fakt pridaný", "claim": claim})
|
||||
else:
|
||||
return jsonify({"error": "Fakt už existuje"}), 409
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(port=5000, debug=True)
|
||||
29
backend/clear_cache.py
Executable file
29
backend/clear_cache.py
Executable file
@ -0,0 +1,29 @@
|
||||
import sqlite3
|
||||
from database import DB_NAME
|
||||
|
||||
def clear_cache():
|
||||
"""Vymaže všetky záznamy z databázy"""
|
||||
conn = sqlite3.connect(DB_NAME)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Vymaž všetky fact-checky
|
||||
cursor.execute("DELETE FROM fact_checks")
|
||||
deleted_checks = cursor.rowcount
|
||||
|
||||
# Vymaž všetky verified facts
|
||||
cursor.execute("DELETE FROM verified_facts")
|
||||
deleted_facts = cursor.rowcount
|
||||
|
||||
# Reset autoincrement
|
||||
cursor.execute("DELETE FROM sqlite_sequence WHERE name='fact_checks'")
|
||||
cursor.execute("DELETE FROM sqlite_sequence WHERE name='verified_facts'")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
print(f"✅ Cache vyčistená!")
|
||||
print(f" - Vymazaných fact-checkov: {deleted_checks}")
|
||||
print(f" - Vymazaných verified facts: {deleted_facts}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
clear_cache()
|
||||
254
backend/database.py
Executable file
254
backend/database.py
Executable file
@ -0,0 +1,254 @@
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
DB_NAME = "factchecker.db"
|
||||
|
||||
def get_db_connection():
|
||||
"""Vytvorí spojenie s databázou"""
|
||||
conn = sqlite3.connect(DB_NAME)
|
||||
conn.row_factory = sqlite3.Row # Umožní prístup k stĺpcom podľa mena
|
||||
return conn
|
||||
|
||||
def init_db():
|
||||
"""Inicializuje databázu s tabuľkami"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Tabuľka pre cachované fact-checky
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS fact_checks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
claim_hash TEXT UNIQUE NOT NULL,
|
||||
claim TEXT NOT NULL,
|
||||
verdict TEXT NOT NULL,
|
||||
confidence REAL,
|
||||
nli_votes TEXT,
|
||||
evidence_for TEXT,
|
||||
evidence_against TEXT,
|
||||
sources TEXT,
|
||||
model_name TEXT,
|
||||
checked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
check_count INTEGER DEFAULT 1
|
||||
)
|
||||
''')
|
||||
|
||||
# MIGRÁCIA: Pridanie stĺpca model_name ak neexistuje
|
||||
try:
|
||||
cursor.execute('ALTER TABLE fact_checks ADD COLUMN model_name TEXT')
|
||||
print("Stĺpec 'model_name' bol pridaný do tabuľky fact_checks")
|
||||
except sqlite3.OperationalError:
|
||||
pass # Stĺpec už existuje
|
||||
|
||||
# Tabuľka pre manuálne overené fakty (admin)
|
||||
|
||||
# Tabuľka pre manuálne overené fakty (admin)
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS verified_facts (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
claim TEXT UNIQUE NOT NULL,
|
||||
verdict TEXT NOT NULL,
|
||||
explanation TEXT,
|
||||
source_url TEXT,
|
||||
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
added_by TEXT DEFAULT 'admin'
|
||||
)
|
||||
''')
|
||||
|
||||
# Index pre rýchle vyhľadávanie
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_claim_hash ON fact_checks(claim_hash)')
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print(" Databáza inicializovaná")
|
||||
|
||||
def hash_claim(claim: str) -> str:
|
||||
"""Vytvorí hash z tvrdenia pre unikátnu identifikáciu"""
|
||||
normalized = claim.lower().strip()
|
||||
return hashlib.md5(normalized.encode('utf-8')).hexdigest()
|
||||
|
||||
def get_cached_result(claim: str):
|
||||
"""Skontroluje či výrok už bol overený"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
claim_hash = hash_claim(claim)
|
||||
|
||||
# Najprv skontroluj verified_facts (najvyššia priorita)
|
||||
cursor.execute('''
|
||||
SELECT claim, verdict, explanation, source_url, 'verified' as source_type
|
||||
FROM verified_facts
|
||||
WHERE LOWER(claim) = LOWER(?)
|
||||
''', (claim.strip(),))
|
||||
|
||||
verified = cursor.fetchone()
|
||||
if verified:
|
||||
conn.close()
|
||||
return {
|
||||
"claim": verified["claim"],
|
||||
"verdict": f"✅ {verified['verdict']} (Overené)",
|
||||
"explanation": verified["explanation"],
|
||||
"sources": [verified["source_url"]] if verified["source_url"] else [],
|
||||
"cached": True,
|
||||
"verified": True
|
||||
}
|
||||
|
||||
# Potom skontroluj cache
|
||||
cursor.execute('''
|
||||
SELECT * FROM fact_checks
|
||||
WHERE claim_hash = ?
|
||||
ORDER BY checked_at DESC
|
||||
LIMIT 1
|
||||
''', (claim_hash,))
|
||||
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result:
|
||||
# Inkrementuj počítadlo
|
||||
cursor.execute('''
|
||||
UPDATE fact_checks
|
||||
SET check_count = check_count + 1,
|
||||
checked_at = CURRENT_TIMESTAMP
|
||||
WHERE claim_hash = ?
|
||||
''', (claim_hash,))
|
||||
conn.commit()
|
||||
|
||||
# Deserializuj JSON polia
|
||||
cached_data = {
|
||||
"claim": result["claim"],
|
||||
"verdict": result["verdict"],
|
||||
"nli_votes": json.loads(result["nli_votes"]) if result["nli_votes"] else None,
|
||||
"evidence_for": json.loads(result["evidence_for"]) if result["evidence_for"] else [],
|
||||
"evidence_against": json.loads(result["evidence_against"]) if result["evidence_against"] else [],
|
||||
"sources": json.loads(result["sources"]) if result["sources"] else [],
|
||||
"model_name": result["model_name"] if "model_name" in result.keys() else None,
|
||||
"cached": True,
|
||||
"checked_at": result["checked_at"],
|
||||
"check_count": result["check_count"]
|
||||
}
|
||||
|
||||
conn.close()
|
||||
return cached_data
|
||||
|
||||
conn.close()
|
||||
return None
|
||||
|
||||
def save_to_cache(claim: str, result: dict, model_name: str = "unknown"):
|
||||
"""Uloží výsledok fact-checku do cache"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
claim_hash = hash_claim(claim)
|
||||
|
||||
try:
|
||||
cursor.execute('''
|
||||
INSERT INTO fact_checks
|
||||
(claim_hash, claim, verdict, confidence, nli_votes, evidence_for, evidence_against, sources, model_name)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(claim_hash) DO UPDATE SET
|
||||
verdict = excluded.verdict,
|
||||
confidence = excluded.confidence,
|
||||
nli_votes = excluded.nli_votes,
|
||||
evidence_for = excluded.evidence_for,
|
||||
evidence_against = excluded.evidence_against,
|
||||
sources = excluded.sources,
|
||||
model_name = excluded.model_name,
|
||||
checked_at = CURRENT_TIMESTAMP,
|
||||
check_count = check_count + 1
|
||||
''', (
|
||||
claim_hash,
|
||||
claim,
|
||||
result.get("verdict", ""),
|
||||
result.get("confidence"),
|
||||
json.dumps(result.get("nli_votes")) if result.get("nli_votes") else None,
|
||||
json.dumps(result.get("evidence_for", [])),
|
||||
json.dumps(result.get("evidence_against", [])),
|
||||
json.dumps(result.get("sources", [])),
|
||||
model_name
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" Chyba pri ukladaní do cache: {e}")
|
||||
conn.close()
|
||||
return False
|
||||
|
||||
def get_history(limit: int = 50):
|
||||
"""Vráti históriu fact-checkov"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Skúsime načítať aj model_name, ak stĺpec neexistuje, použijeme fallback
|
||||
try:
|
||||
cursor.execute('''
|
||||
SELECT claim, verdict, checked_at, check_count, sources, model_name
|
||||
FROM fact_checks
|
||||
ORDER BY checked_at DESC
|
||||
LIMIT ?
|
||||
''', (limit,))
|
||||
except sqlite3.OperationalError:
|
||||
# Fallback pre starú DB bez model_name
|
||||
cursor.execute('''
|
||||
SELECT claim, verdict, checked_at, check_count, sources, NULL as model_name
|
||||
FROM fact_checks
|
||||
ORDER BY checked_at DESC
|
||||
LIMIT ?
|
||||
''', (limit,))
|
||||
|
||||
results = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [{
|
||||
"claim": row["claim"],
|
||||
"verdict": row["verdict"],
|
||||
"checked_at": row["checked_at"],
|
||||
"check_count": row["check_count"],
|
||||
"sources": json.loads(row["sources"]) if row["sources"] else [],
|
||||
"model_name": row["model_name"]
|
||||
} for row in results]
|
||||
|
||||
def add_verified_fact(claim: str, verdict: str, explanation: str = None, source_url: str = None):
|
||||
"""Pridá manuálne overený fakt (admin funkcia)"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
cursor.execute('''
|
||||
INSERT INTO verified_facts (claim, verdict, explanation, source_url)
|
||||
VALUES (?, ?, ?, ?)
|
||||
''', (claim, verdict, explanation, source_url))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return True
|
||||
except sqlite3.IntegrityError:
|
||||
conn.close()
|
||||
return False # Už existuje
|
||||
|
||||
def get_stats():
|
||||
"""Vráti štatistiky"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('SELECT COUNT(*) as total FROM fact_checks')
|
||||
total = cursor.fetchone()["total"]
|
||||
|
||||
cursor.execute('SELECT SUM(check_count) as total_checks FROM fact_checks')
|
||||
total_checks = cursor.fetchone()["total_checks"] or 0
|
||||
|
||||
cursor.execute('SELECT COUNT(*) as verified_count FROM verified_facts')
|
||||
verified = cursor.fetchone()["verified_count"]
|
||||
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
"unique_claims": total,
|
||||
"total_checks": total_checks,
|
||||
"verified_facts": verified
|
||||
}
|
||||
|
||||
# Inicializuj databázu pri prvom importe
|
||||
init_db()
|
||||
Loading…
Reference in New Issue
Block a user