Add missing backend files (app.py, database.py, clear_cache.py)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Džubara 2026-03-26 15:30:09 +01:00
parent 2533f75f2c
commit c80694f543
3 changed files with 797 additions and 0 deletions

514
backend/app.py Executable file
View File

@ -0,0 +1,514 @@
import os
import torch
from dotenv import load_dotenv
from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import requests
from deep_translator import GoogleTranslator
from database import get_cached_result, save_to_cache, get_history, get_stats, add_verified_fact, init_db
# 1. Load API key
load_dotenv()
SERP_API_KEY = os.getenv("SERPAPI_API_KEY")
app = Flask(__name__)
CORS(app, supports_credentials=True, allow_headers=['Content-Type'], methods=['GET', 'POST', 'OPTIONS'])
# Initialize Database
init_db()
# 2. Model Configuration
MODELS = {
"roberta": {
"name": "ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli",
"type": "sequence-classification",
"needs_translation": True
},
"mdeberta": {
"name": "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7",
"type": "sequence-classification",
"needs_translation": False
}
}
# Global variables for current model
current_model_key = "roberta"
tokenizer = None
model = None
def load_model(model_key="roberta"):
global tokenizer, model, current_model_key
if model_key not in MODELS:
model_key = "roberta"
if model is not None and current_model_key == model_key:
return # Already loaded
print(f" Prepínam model na: {model_key} ({MODELS[model_key]['name']})...")
# Free up memory by setting to None (allows GC to collect)
if model is not None:
model = None
if tokenizer is not None:
tokenizer = None
torch.cuda.empty_cache() if torch.cuda.is_available() else None
config = MODELS[model_key]
tokenizer = AutoTokenizer.from_pretrained(config["name"])
model = AutoModelForSequenceClassification.from_pretrained(config["name"])
model.eval()
current_model_key = model_key
print(f" Model {model_key} načítaný.")
# Initial load
load_model("roberta")
def nli_label(premise: str, hypothesis: str, force_translation=False):
"""
Vyhodnotí vzťah medzi premise a hypothesis pomocou aktuálneho modelu.
"""
global tokenizer, model, current_model_key
config = MODELS[current_model_key]
# Preklad do EN len ak to model vyžaduje (RoBERTa)
if config["needs_translation"] or force_translation:
# Jednoduchá kontrola či je text v SK (veľmi hrubá)
is_slovak = any(char in hypothesis for char in 'áäčďéěíĺľňóôŕšťúůýž') or 'je' in hypothesis.split() or 'nie' in hypothesis.split()
if is_slovak or force_translation:
try:
original = hypothesis
# Rýchla oprava chýbajúcej diakritiky v bežných frázach pred prekladom
hypothesis_for_translation = hypothesis.replace('hlavne mesto', 'hlavné mesto').replace('slovenska', 'Slovenska')
hypothesis = GoogleTranslator(source='sk', target='en').translate(hypothesis_for_translation)
print(f" [Preklad pre {current_model_key}]: '{original}' -> '{hypothesis}'")
except Exception as e:
print(f" Preklad zlyhal: {e}")
# Logika pre Sequence Classification (RoBERTa, mDeBERTa)
inputs = tokenizer(premise, hypothesis, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
logits = model(**inputs).logits[0]
probs = torch.softmax(logits, dim=-1).tolist()
# Obe používajú štandardné mapovanie: 0: entailment, 1: neutral, 2: contradiction
labels = ["entailment", "neutral", "contradiction"]
idx = int(torch.argmax(torch.tensor(probs)))
return labels[idx], probs[idx], probs
@app.route("/api/check", methods=["POST"])
def check_fact():
data = request.get_json()
claim = data.get("claim", "").strip()
language = data.get("language", "all")
date_from = data.get("dateFrom", "")
date_to = data.get("dateTo", "")
selected_source = data.get("selectedSource", "all")
selected_model = data.get("model", "roberta") # Default to RoBERTa
# Load the requested model (or switch if needed)
try:
load_model(selected_model)
except Exception as e:
print(f"Failed to load model {selected_model}: {e}")
return jsonify({"error": f"Failed to load AI model: {selected_model}"}), 500
if not claim:
return jsonify({"error": "Žiadny výrok nebol zadaný"}), 400
# FILTER POLITICKÝCH A CITLIVÝCH SLOV
FORBIDDEN_KEYWORDS = [
# Politici/strany SK
'fico', 'pellegrini', 'šimečka', 'simecka', 'kotleba', 'matovič', 'matovic',
'smer', 'hlas', 'ps', 'progresívne slovensko', 'republika', 'sas', 'oľano', 'olano',
# Politici medzinárodní
'trump', 'biden', 'putin', 'zelenskyy', 'zelensky', 'orbán', 'orban',
# Citlivé témy
'vakcína', 'vakcinacia', 'covid', 'koronavirus', 'chemtrails',
'voľby', 'volby', 'referendum', 'korupcia', 'korupčný', 'korupcny',
# Nadávky a vulgárne výrazy
'kokot', 'piča', 'pica', 'kurva', 'jebať', 'jebat', 'piči', 'pici',
'kokoti', 'debil', 'idiot', 'kretén', 'kreten', 'zmrd', 'piči', 'pici',
'hovädo', 'hovado', 'hajzel', 'sviňa', 'svina', 'smrad', 'kreténka', 'kretenka',
'fuck', 'shit', 'bitch', 'asshole', 'bastard'
]
claim_lower = claim.lower()
forbidden_found = [word for word in FORBIDDEN_KEYWORDS if word in claim_lower]
if forbidden_found:
print(f" Zakázané slová detekované: {forbidden_found}")
return jsonify({
"error": "Zakázané slovo",
"message": " Systém neakceptuje vulgárne výrazy, politické tvrdenia ani konšpiračné teórie. Zadajte korektné faktické tvrdenie.",
"forbidden_words": forbidden_found
}), 400
# CACHE CHECK - najprv skontroluj databázu
cached = get_cached_result(claim)
if cached:
print(f" Cache hit: {claim[:50]}...")
# Ignorujeme cache ak používateľ mení filtre (dátum/jazyk/zdroj), pretože cache nemá tieto metadáta
# Pre jednoduchosť zatiaľ vrátime cache len ak sú filtre predvolené ("all", bez dátumu)
if language == "all" and not date_from and not date_to and selected_source == "all":
return jsonify(cached)
if not SERP_API_KEY:
return jsonify({"error": "Chýba SERPAPI_API_KEY v .env"}), 500
# Detekuj či je claim v slovenčine (diakritika alebo slovenské slová)
has_diacritics = any(char in claim for char in 'áäčďéěíĺľňóôŕšťúůýžÁÄČĎÉĚÍĹĽŇÓÔŔŠŤÚŮÝŽ')
slovak_words = ['je', 'nieje', 'nie', 'je to', 'a', 'že', 'ktorý', 'ktorá', 'ktoré',
'hlavné', 'mesto', 'slovensko', 'slovenská', 'slovenské']
has_slovak_words = any(word.lower() in claim.lower() for word in slovak_words)
# Ak používateľ explicitne vybral 'sk', berieme to ako "potrebujeme translation z SK pre model",
# ale PRE SEARCH možno budeme chcieť radšej slovenské výsledky.
needs_translation = has_diacritics or has_slovak_words or (language == 'sk')
# --- NOVÁ LOGIKA PRE JAZYKOVÉ FILTRE A ZDROJE ---
# OPRAVA TYPO CHÝB V POUŽÍVATEĽSKOM VSTUPE PRE LEPŠÍ PREKLAD A VYHĽADÁVANIE
claim_for_translation = claim.replace('hlavne mesto', 'hlavné mesto').replace('slovenska', 'Slovenska')
SK_TRUSTED_SOURCES = [
# Hlavné spravodajstvo
'dennikn.sk', 'sme.sk', 'aktuality.sk', 'tasr.sk', 'rtvs.sk', 'teraz.sk',
'hnonline.sk', 'pravda.sk', 'trend.sk', 'tyzden.sk', 'postoj.sk', 'euractiv.sk',
'noviny.sk', 'tvnoviny.sk', 'ta3.com', 'webnoviny.sk',
# Inštitúcie
'vladne.sk', 'nrsr.sk', 'prezident.sk', 'gov.sk', 'sav.sk', 'demagog.sk',
'minv.sk', 'health.gov.sk', 'policia.sk', 'statistics.sk'
]
search_query = claim
if language == 'sk':
print(" Jazyk nastavený na 'SK' -> Vyhľadávam v slovenčine")
search_query = claim_for_translation # Použijeme opravený text s diakritikou pre lepšie výsledky
elif needs_translation:
try:
claim_en = GoogleTranslator(source='sk', target='en').translate(claim_for_translation)
print(f" Preklad pre search (EN nastavenie): '{claim_for_translation}' -> '{claim_en}'")
search_query = claim_en
except Exception as e:
print(f"Preklad zlyhal, použijem originál: {e}")
search_query = claim
sites_to_check = []
# Aplikovanie špecifického zdroja
if selected_source != "all":
if selected_source == "demagog": sites_to_check = ["demagog.sk"]
elif selected_source == "afp": sites_to_check = ["factcheck.afp.com"]
elif selected_source == "reuters": sites_to_check = ["reuters.com"]
# Ak sa používa dátumový filter a nie je vybraný zdroj, obmedzíme výber pre lepšie výsledky
elif date_from or date_to:
if language == 'sk':
sites_to_check = SK_TRUSTED_SOURCES
else:
sites_to_check = ['reuters.com', 'apnews.com', 'bbc.com', 'cnn.com', 'nytimes.com', 'theguardian.com']
if sites_to_check:
site_query = " OR ".join([f"site:{site}" for site in sites_to_check[:15]])
search_query = f"{search_query} ({site_query})"
print(f" Pridaný filter domén k hľadaniu: {site_query}")
# Zostavenie parametrov pre SerpAPI
params = {
"engine": "google",
"q": search_query,
"api_key": SERP_API_KEY,
"num": 20,
}
# Aplikovanie filtrov na parametre
if language == 'sk':
params["gl"] = "sk" # Geolocation: Slovakia
params["hl"] = "sk" # UI Language: Slovak
elif language == 'cs':
params["gl"] = "cz"
params["hl"] = "cs"
else:
params["hl"] = "en"
# Časové ohraničenie (tbs parameter v Google Search)
# Formát: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY
if date_from or date_to:
tbs_parts = []
if date_from:
try:
# date_from prichádza ako YYYY-MM-DD
y, m, d = date_from.split("-")
tbs_parts.append(f"cd_min:{int(m)}/{int(d)}/{y}")
except:
pass
if date_to:
try:
y, m, d = date_to.split("-")
tbs_parts.append(f"cd_max:{int(m)}/{int(d)}/{y}")
except:
pass
if tbs_parts:
# cdr:1 zapína custom date range
params["tbs"] = "cdr:1," + ",".join(tbs_parts)
print(f" Dátumový filter aplikovaný: {params['tbs']}")
# Volanie SerpAPI
res = requests.get("https://serpapi.com/search", params=params)
if res.status_code == 429:
return jsonify({"error": "Limit SerpAPI prekročený"}), 429
data = res.json()
organic = data.get("organic_results", [])
# Blacklist nedôveryhodných domén
DOMAIN_BLACKLIST = [
'spotify.com', 'youtube.com', 'facebook.com', 'instagram.com',
'twitter.com', 'tiktok.com', 'pinterest.com', 'reddit.com',
'karaoke', 'lyrics', 'texty.cz', 'karaoketexty',
# Slovenské nevhodné (študentské, fóra, bulvár, recepty, blogy)
'referaty', 'tahaky', 'zones.sk', 'student', 'zadania', 'maturita',
'forum', 'diskusia', 'chat', 'komentare', 'blog.', 'nazory.',
'mimibazar', 'modrykon', 'diva.sk', 'najmama', 'dobruchut', 'varecha',
# Konšpiračné weby
'badatel', 'zemavek', 'infovojna', 'slobodnyvysielac', 'hlavnespravy', 'hrot.info',
# Hobby/Školské projekty (z obrázku) a iné nerelevantné
'hockicko', 'astroportal', 'vesmir.sk', 'szm.com', 'blogspot'
]
# Whitelist dôveryhodných domén (vyššia priorita)
DOMAIN_WHITELIST = [
'wikipedia.org', 'britannica.com', 'nature.com', 'sciencedirect.com',
'ncbi.nlm.nih.gov', 'scholar.google', '.gov', '.edu', 'reuters.com',
'apnews.com', 'bbc.com', 'nature.com', 'science.org',
# Slovenské spravodajstvo, televízie a agentúry
'dennikn.sk', 'sme.sk', 'aktuality.sk', 'tasr.sk', 'rtvs.sk', 'teraz.sk',
'hnonline.sk', 'pravda.sk', 'trend.sk', 'tyzden.sk', 'postoj.sk', 'euractiv.sk',
'noviny.sk', 'tvnoviny.sk', 'ta3.com', 'webnoviny.sk', 'sita.sk',
# Slovenské inštitúcie
'vladne.sk', 'nrsr.sk', 'prezident.sk', 'gov.sk', 'sav.sk', 'demagog.sk',
'minv.sk', 'mosr.sk', 'mzv.sk', 'health.gov.sk', 'policia.sk', 'statistics.sk',
'shmu.sk', 'slov-lex.sk', 'uvzsr.sk',
# Slovenské Univerzity
'uniba.sk', 'stuba.sk', 'tuke.sk', 'upjs.sk', 'ukf.sk', 'umb.sk', 'sav.sk'
]
# Pridáme preferované zdroje z selectedSource
source_priority_list = []
if selected_source == "demagog":
source_priority_list.append("demagog.sk")
if selected_source == "afp":
source_priority_list.append("factcheck.afp.com")
if selected_source == "reuters":
source_priority_list.append("reuters.com")
# Filtruj výsledky
filtered_results = []
for r in organic:
link = r.get('link', '')
snippet = r.get('snippet', '')
# Preskočiť ak obsahuje blacklist doménu
if any(bad in link.lower() for bad in DOMAIN_BLACKLIST):
continue
if snippet:
# Výpočet priority
priority = 0
# Najvyššia priorita pre vybraný zdroj
if any(s in link.lower() for s in source_priority_list):
priority = 2
# Stredná priorita pre whitelist (vrátane SK webov)
elif any(good in link.lower() for good in DOMAIN_WHITELIST):
priority = 1
# Ak je zvolený SK jazyk, preložíme snippet do EN pre NLI model?
# Model vie hlavne EN. Ak je snippet SK, treba preklad!
if language == 'sk' or needs_translation:
try:
snippet_en = GoogleTranslator(source='auto', target='en').translate(snippet)
r['snippet_en'] = snippet_en # Uložíme si preklad
except:
r['snippet_en'] = snippet
else:
r['snippet_en'] = snippet
filtered_results.append((priority, r))
# Zoradi podľa priority (whitelist najprv)
filtered_results.sort(key=lambda x: x[0], reverse=True)
# DEDUPUKÁCIA DOMÉN - aby sme nemali viac výsledkov z tej istej domény
seen_domains = set()
final_results = []
for priority, r in filtered_results:
link = r.get('link', '')
try:
# Získaj doménu (napr. bbc.com z https://www.bbc.com/news/...)
domain = link.split('//')[-1].split('/')[0].replace('www.', '')
except:
domain = link
if domain not in seen_domains:
seen_domains.add(domain)
final_results.append(r)
if len(final_results) >= 5:
break
filtered_results = final_results
snippets = [r.get("snippet_en", r.get("snippet")) for r in filtered_results] # Použijeme preložené
links = [r.get("link") for r in filtered_results]
print(f" Nájdených zdrojov: {len(snippets)}")
print(f" Linky: {links[:3]}")
if not snippets:
print(" Žiadne snippety nenájdené!")
return jsonify({
"claim": claim,
"verdict": " Nedostatok zdrojov neoveriteľné",
"sources": links
})
# 4. NLI voting (s váženými skóre podľa confidence)
# Už sme detekovali jazyk vyššie pre search, použijeme to isté
entail_score = 0
contra_score = 0
evidences_for = []
evidences_against = []
detailed_sources = []
for i, snip in enumerate(snippets):
nli_translate = needs_translation or (language == 'sk')
label, conf, probs = nli_label(snip, claim, force_translation=nli_translate)
print(f"\n [{i+1}] NLI Výsledok: {label} (confidence: {conf:.3f})")
print(f" Snippet (EN): {snip[:150]}...")
# SOFT SCORING - započítame všetky pravdepodobnosti, nie len max
# probs = [entailment, neutral, contradiction]
entail_score += probs[0] # Entailment je index 0
contra_score += probs[2] # Contradiction je index 2
# Pre zobrazenie dôkazov vrátime pôvodný snippet z JSONu (ak bol preložený, chceme vidieť SK pre usera?)
# User vidí "evidence". Ak je stránka SK, snippet je SK. Ale model hodnotil EN.
# V filtered_results máme pôvodný objekt `r`. Dajme naspäť pôvodný snippet pre UI.
original_snippet = filtered_results[i].get('snippet', snip)
source_link = filtered_results[i].get('link')
if label == "entailment":
evidences_for.append({"snippet": original_snippet, "confidence": round(conf, 3)})
elif label == "contradiction":
evidences_against.append({"snippet": original_snippet, "confidence": round(conf, 3)})
# Uložíme detailné info o zdroji
detailed_sources.append({
"url": source_link,
"label": label,
"confidence": round(conf, 3),
"entailment_prob": round(probs[0], 3),
"contradiction_prob": round(probs[2], 3),
"neutral_prob": round(probs[1], 3)
})
# 5. Verdict - používame soft skóre
total_score = entail_score + contra_score
print(f"\n Výsledky NLI (soft scoring):")
print(f" Entailment score: {entail_score:.3f}")
print(f" Contradiction score: {contra_score:.3f}")
print(f" Total score: {total_score:.3f}")
# Ak je absolútne skóre pre potvrdenie/vyvrátenie príliš nízke oproti počtu nájdených zdrojov
if total_score < 1.0 and len(snippets) > 0:
verdict = "⚠️ Nejednoznačné"
print(" [Nedostatok silných dôkazov - prevládajú 'neutral' výsledky]")
elif total_score > 0:
entail_ratio = entail_score / total_score
contra_ratio = contra_score / total_score
print(f" Entail ratio: {entail_ratio:.1%}, Contra ratio: {contra_ratio:.1%}")
# Zmierňujeme prahy z 60% na 50%
if entail_ratio > 0.5:
verdict = "✅ Pravda"
elif contra_ratio > 0.5:
verdict = "❌ Nepravda"
else:
verdict = "⚠️ Nejednoznačné"
else:
verdict = "⚠️ Neoveriteľné"
print(f" Verdikt: {verdict}\n")
result = {
"claim": claim,
"nli_votes": {
"entailment_score": round(entail_score, 3),
"contradiction_score": round(contra_score, 3)
},
"verdict": verdict,
"evidence_for": evidences_for[:3],
"evidence_against": evidences_against[:3],
"sources": detailed_sources if detailed_sources else links,
"model_name": selected_model,
"cached": False
}
# Ulož do cache
save_to_cache(claim, result, model_name=selected_model)
return jsonify(result)
@app.route("/api/history", methods=["GET"])
def get_check_history():
"""Vráti históriu fact-checkov"""
limit = request.args.get("limit", 50, type=int)
history = get_history(limit)
return jsonify({"history": history, "count": len(history)})
@app.route("/api/stats", methods=["GET"])
def get_statistics():
"""Vráti štatistiky databázy"""
stats = get_stats()
return jsonify(stats)
@app.route("/api/admin/add-fact", methods=["POST"])
def admin_add_fact():
"""Admin endpoint na pridanie overeného faktu"""
data = request.get_json()
claim = data.get("claim", "").strip()
verdict = data.get("verdict", "").strip()
explanation = data.get("explanation", "")
source_url = data.get("source_url", "")
if not claim or not verdict:
return jsonify({"error": "Claim a verdict sú povinné"}), 400
success = add_verified_fact(claim, verdict, explanation, source_url)
if success:
return jsonify({"message": "Overený fakt pridaný", "claim": claim})
else:
return jsonify({"error": "Fakt už existuje"}), 409
if __name__ == "__main__":
app.run(port=5000, debug=True)

29
backend/clear_cache.py Executable file
View File

@ -0,0 +1,29 @@
import sqlite3
from database import DB_NAME
def clear_cache():
"""Vymaže všetky záznamy z databázy"""
conn = sqlite3.connect(DB_NAME)
cursor = conn.cursor()
# Vymaž všetky fact-checky
cursor.execute("DELETE FROM fact_checks")
deleted_checks = cursor.rowcount
# Vymaž všetky verified facts
cursor.execute("DELETE FROM verified_facts")
deleted_facts = cursor.rowcount
# Reset autoincrement
cursor.execute("DELETE FROM sqlite_sequence WHERE name='fact_checks'")
cursor.execute("DELETE FROM sqlite_sequence WHERE name='verified_facts'")
conn.commit()
conn.close()
print(f"✅ Cache vyčistená!")
print(f" - Vymazaných fact-checkov: {deleted_checks}")
print(f" - Vymazaných verified facts: {deleted_facts}")
if __name__ == "__main__":
clear_cache()

254
backend/database.py Executable file
View File

@ -0,0 +1,254 @@
import sqlite3
from datetime import datetime
import hashlib
import json
DB_NAME = "factchecker.db"
def get_db_connection():
"""Vytvorí spojenie s databázou"""
conn = sqlite3.connect(DB_NAME)
conn.row_factory = sqlite3.Row # Umožní prístup k stĺpcom podľa mena
return conn
def init_db():
"""Inicializuje databázu s tabuľkami"""
conn = get_db_connection()
cursor = conn.cursor()
# Tabuľka pre cachované fact-checky
cursor.execute('''
CREATE TABLE IF NOT EXISTS fact_checks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
claim_hash TEXT UNIQUE NOT NULL,
claim TEXT NOT NULL,
verdict TEXT NOT NULL,
confidence REAL,
nli_votes TEXT,
evidence_for TEXT,
evidence_against TEXT,
sources TEXT,
model_name TEXT,
checked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
check_count INTEGER DEFAULT 1
)
''')
# MIGRÁCIA: Pridanie stĺpca model_name ak neexistuje
try:
cursor.execute('ALTER TABLE fact_checks ADD COLUMN model_name TEXT')
print("Stĺpec 'model_name' bol pridaný do tabuľky fact_checks")
except sqlite3.OperationalError:
pass # Stĺpec už existuje
# Tabuľka pre manuálne overené fakty (admin)
# Tabuľka pre manuálne overené fakty (admin)
cursor.execute('''
CREATE TABLE IF NOT EXISTS verified_facts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
claim TEXT UNIQUE NOT NULL,
verdict TEXT NOT NULL,
explanation TEXT,
source_url TEXT,
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
added_by TEXT DEFAULT 'admin'
)
''')
# Index pre rýchle vyhľadávanie
cursor.execute('CREATE INDEX IF NOT EXISTS idx_claim_hash ON fact_checks(claim_hash)')
conn.commit()
conn.close()
print(" Databáza inicializovaná")
def hash_claim(claim: str) -> str:
"""Vytvorí hash z tvrdenia pre unikátnu identifikáciu"""
normalized = claim.lower().strip()
return hashlib.md5(normalized.encode('utf-8')).hexdigest()
def get_cached_result(claim: str):
"""Skontroluje či výrok už bol overený"""
conn = get_db_connection()
cursor = conn.cursor()
claim_hash = hash_claim(claim)
# Najprv skontroluj verified_facts (najvyššia priorita)
cursor.execute('''
SELECT claim, verdict, explanation, source_url, 'verified' as source_type
FROM verified_facts
WHERE LOWER(claim) = LOWER(?)
''', (claim.strip(),))
verified = cursor.fetchone()
if verified:
conn.close()
return {
"claim": verified["claim"],
"verdict": f"{verified['verdict']} (Overené)",
"explanation": verified["explanation"],
"sources": [verified["source_url"]] if verified["source_url"] else [],
"cached": True,
"verified": True
}
# Potom skontroluj cache
cursor.execute('''
SELECT * FROM fact_checks
WHERE claim_hash = ?
ORDER BY checked_at DESC
LIMIT 1
''', (claim_hash,))
result = cursor.fetchone()
if result:
# Inkrementuj počítadlo
cursor.execute('''
UPDATE fact_checks
SET check_count = check_count + 1,
checked_at = CURRENT_TIMESTAMP
WHERE claim_hash = ?
''', (claim_hash,))
conn.commit()
# Deserializuj JSON polia
cached_data = {
"claim": result["claim"],
"verdict": result["verdict"],
"nli_votes": json.loads(result["nli_votes"]) if result["nli_votes"] else None,
"evidence_for": json.loads(result["evidence_for"]) if result["evidence_for"] else [],
"evidence_against": json.loads(result["evidence_against"]) if result["evidence_against"] else [],
"sources": json.loads(result["sources"]) if result["sources"] else [],
"model_name": result["model_name"] if "model_name" in result.keys() else None,
"cached": True,
"checked_at": result["checked_at"],
"check_count": result["check_count"]
}
conn.close()
return cached_data
conn.close()
return None
def save_to_cache(claim: str, result: dict, model_name: str = "unknown"):
"""Uloží výsledok fact-checku do cache"""
conn = get_db_connection()
cursor = conn.cursor()
claim_hash = hash_claim(claim)
try:
cursor.execute('''
INSERT INTO fact_checks
(claim_hash, claim, verdict, confidence, nli_votes, evidence_for, evidence_against, sources, model_name)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(claim_hash) DO UPDATE SET
verdict = excluded.verdict,
confidence = excluded.confidence,
nli_votes = excluded.nli_votes,
evidence_for = excluded.evidence_for,
evidence_against = excluded.evidence_against,
sources = excluded.sources,
model_name = excluded.model_name,
checked_at = CURRENT_TIMESTAMP,
check_count = check_count + 1
''', (
claim_hash,
claim,
result.get("verdict", ""),
result.get("confidence"),
json.dumps(result.get("nli_votes")) if result.get("nli_votes") else None,
json.dumps(result.get("evidence_for", [])),
json.dumps(result.get("evidence_against", [])),
json.dumps(result.get("sources", [])),
model_name
))
conn.commit()
conn.close()
return True
except Exception as e:
print(f" Chyba pri ukladaní do cache: {e}")
conn.close()
return False
def get_history(limit: int = 50):
"""Vráti históriu fact-checkov"""
conn = get_db_connection()
cursor = conn.cursor()
# Skúsime načítať aj model_name, ak stĺpec neexistuje, použijeme fallback
try:
cursor.execute('''
SELECT claim, verdict, checked_at, check_count, sources, model_name
FROM fact_checks
ORDER BY checked_at DESC
LIMIT ?
''', (limit,))
except sqlite3.OperationalError:
# Fallback pre starú DB bez model_name
cursor.execute('''
SELECT claim, verdict, checked_at, check_count, sources, NULL as model_name
FROM fact_checks
ORDER BY checked_at DESC
LIMIT ?
''', (limit,))
results = cursor.fetchall()
conn.close()
return [{
"claim": row["claim"],
"verdict": row["verdict"],
"checked_at": row["checked_at"],
"check_count": row["check_count"],
"sources": json.loads(row["sources"]) if row["sources"] else [],
"model_name": row["model_name"]
} for row in results]
def add_verified_fact(claim: str, verdict: str, explanation: str = None, source_url: str = None):
"""Pridá manuálne overený fakt (admin funkcia)"""
conn = get_db_connection()
cursor = conn.cursor()
try:
cursor.execute('''
INSERT INTO verified_facts (claim, verdict, explanation, source_url)
VALUES (?, ?, ?, ?)
''', (claim, verdict, explanation, source_url))
conn.commit()
conn.close()
return True
except sqlite3.IntegrityError:
conn.close()
return False # Už existuje
def get_stats():
"""Vráti štatistiky"""
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute('SELECT COUNT(*) as total FROM fact_checks')
total = cursor.fetchone()["total"]
cursor.execute('SELECT SUM(check_count) as total_checks FROM fact_checks')
total_checks = cursor.fetchone()["total_checks"] or 0
cursor.execute('SELECT COUNT(*) as verified_count FROM verified_facts')
verified = cursor.fetchone()["verified_count"]
conn.close()
return {
"unique_claims": total,
"total_checks": total_checks,
"verified_facts": verified
}
# Inicializuj databázu pri prvom importe
init_db()