From c80694f543fdcc123dec5848e48029b52d888bd4 Mon Sep 17 00:00:00 2001 From: VIliam Date: Thu, 26 Mar 2026 15:30:09 +0100 Subject: [PATCH] Add missing backend files (app.py, database.py, clear_cache.py) Co-Authored-By: Claude Opus 4.6 --- backend/app.py | 514 +++++++++++++++++++++++++++++++++++++++++ backend/clear_cache.py | 29 +++ backend/database.py | 254 ++++++++++++++++++++ 3 files changed, 797 insertions(+) create mode 100755 backend/app.py create mode 100755 backend/clear_cache.py create mode 100755 backend/database.py diff --git a/backend/app.py b/backend/app.py new file mode 100755 index 0000000..5a88174 --- /dev/null +++ b/backend/app.py @@ -0,0 +1,514 @@ +import os +import torch +from dotenv import load_dotenv +from flask import Flask, request, jsonify +from flask_cors import CORS +from transformers import AutoTokenizer, AutoModelForSequenceClassification +import requests +from deep_translator import GoogleTranslator +from database import get_cached_result, save_to_cache, get_history, get_stats, add_verified_fact, init_db + +# 1. Load API key +load_dotenv() +SERP_API_KEY = os.getenv("SERPAPI_API_KEY") + +app = Flask(__name__) +CORS(app, supports_credentials=True, allow_headers=['Content-Type'], methods=['GET', 'POST', 'OPTIONS']) + +# Initialize Database +init_db() + +# 2. Model Configuration +MODELS = { + "roberta": { + "name": "ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli", + "type": "sequence-classification", + "needs_translation": True + }, + "mdeberta": { + "name": "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7", + "type": "sequence-classification", + "needs_translation": False + } +} + +# Global variables for current model +current_model_key = "roberta" +tokenizer = None +model = None + +def load_model(model_key="roberta"): + global tokenizer, model, current_model_key + + if model_key not in MODELS: + model_key = "roberta" + + if model is not None and current_model_key == model_key: + return # Already loaded + + print(f" Prepínam model na: {model_key} ({MODELS[model_key]['name']})...") + + # Free up memory by setting to None (allows GC to collect) + if model is not None: + model = None + if tokenizer is not None: + tokenizer = None + + torch.cuda.empty_cache() if torch.cuda.is_available() else None + + config = MODELS[model_key] + tokenizer = AutoTokenizer.from_pretrained(config["name"]) + model = AutoModelForSequenceClassification.from_pretrained(config["name"]) + model.eval() + current_model_key = model_key + print(f" Model {model_key} načítaný.") + +# Initial load +load_model("roberta") + +def nli_label(premise: str, hypothesis: str, force_translation=False): + """ + Vyhodnotí vzťah medzi premise a hypothesis pomocou aktuálneho modelu. + """ + global tokenizer, model, current_model_key + + config = MODELS[current_model_key] + + # Preklad do EN len ak to model vyžaduje (RoBERTa) + if config["needs_translation"] or force_translation: + # Jednoduchá kontrola či je text v SK (veľmi hrubá) + is_slovak = any(char in hypothesis for char in 'áäčďéěíĺľňóôŕšťúůýž') or 'je' in hypothesis.split() or 'nie' in hypothesis.split() + if is_slovak or force_translation: + try: + original = hypothesis + + # Rýchla oprava chýbajúcej diakritiky v bežných frázach pred prekladom + hypothesis_for_translation = hypothesis.replace('hlavne mesto', 'hlavné mesto').replace('slovenska', 'Slovenska') + + hypothesis = GoogleTranslator(source='sk', target='en').translate(hypothesis_for_translation) + print(f" [Preklad pre {current_model_key}]: '{original}' -> '{hypothesis}'") + except Exception as e: + print(f" Preklad zlyhal: {e}") + + # Logika pre Sequence Classification (RoBERTa, mDeBERTa) + inputs = tokenizer(premise, hypothesis, return_tensors="pt", truncation=True, max_length=512) + with torch.no_grad(): + logits = model(**inputs).logits[0] + + probs = torch.softmax(logits, dim=-1).tolist() + + # Obe používajú štandardné mapovanie: 0: entailment, 1: neutral, 2: contradiction + labels = ["entailment", "neutral", "contradiction"] + idx = int(torch.argmax(torch.tensor(probs))) + + return labels[idx], probs[idx], probs + + +@app.route("/api/check", methods=["POST"]) +def check_fact(): + data = request.get_json() + claim = data.get("claim", "").strip() + language = data.get("language", "all") + date_from = data.get("dateFrom", "") + date_to = data.get("dateTo", "") + selected_source = data.get("selectedSource", "all") + selected_model = data.get("model", "roberta") # Default to RoBERTa + + # Load the requested model (or switch if needed) + try: + load_model(selected_model) + except Exception as e: + print(f"Failed to load model {selected_model}: {e}") + return jsonify({"error": f"Failed to load AI model: {selected_model}"}), 500 + + if not claim: + return jsonify({"error": "Žiadny výrok nebol zadaný"}), 400 + + # FILTER POLITICKÝCH A CITLIVÝCH SLOV + FORBIDDEN_KEYWORDS = [ + # Politici/strany SK + 'fico', 'pellegrini', 'šimečka', 'simecka', 'kotleba', 'matovič', 'matovic', + 'smer', 'hlas', 'ps', 'progresívne slovensko', 'republika', 'sas', 'oľano', 'olano', + # Politici medzinárodní + 'trump', 'biden', 'putin', 'zelenskyy', 'zelensky', 'orbán', 'orban', + # Citlivé témy + 'vakcína', 'vakcinacia', 'covid', 'koronavirus', 'chemtrails', + 'voľby', 'volby', 'referendum', 'korupcia', 'korupčný', 'korupcny', + # Nadávky a vulgárne výrazy + 'kokot', 'piča', 'pica', 'kurva', 'jebať', 'jebat', 'piči', 'pici', + 'kokoti', 'debil', 'idiot', 'kretén', 'kreten', 'zmrd', 'piči', 'pici', + 'hovädo', 'hovado', 'hajzel', 'sviňa', 'svina', 'smrad', 'kreténka', 'kretenka', + 'fuck', 'shit', 'bitch', 'asshole', 'bastard' + ] + + claim_lower = claim.lower() + forbidden_found = [word for word in FORBIDDEN_KEYWORDS if word in claim_lower] + + if forbidden_found: + print(f" Zakázané slová detekované: {forbidden_found}") + return jsonify({ + "error": "Zakázané slovo", + "message": " Systém neakceptuje vulgárne výrazy, politické tvrdenia ani konšpiračné teórie. Zadajte korektné faktické tvrdenie.", + "forbidden_words": forbidden_found + }), 400 + + # CACHE CHECK - najprv skontroluj databázu + cached = get_cached_result(claim) + if cached: + print(f" Cache hit: {claim[:50]}...") + # Ignorujeme cache ak používateľ mení filtre (dátum/jazyk/zdroj), pretože cache nemá tieto metadáta + # Pre jednoduchosť zatiaľ vrátime cache len ak sú filtre predvolené ("all", bez dátumu) + if language == "all" and not date_from and not date_to and selected_source == "all": + return jsonify(cached) + + if not SERP_API_KEY: + return jsonify({"error": "Chýba SERPAPI_API_KEY v .env"}), 500 + + # Detekuj či je claim v slovenčine (diakritika alebo slovenské slová) + has_diacritics = any(char in claim for char in 'áäčďéěíĺľňóôŕšťúůýžÁÄČĎÉĚÍĹĽŇÓÔŔŠŤÚŮÝŽ') + slovak_words = ['je', 'nieje', 'nie', 'je to', 'a', 'že', 'ktorý', 'ktorá', 'ktoré', + 'hlavné', 'mesto', 'slovensko', 'slovenská', 'slovenské'] + has_slovak_words = any(word.lower() in claim.lower() for word in slovak_words) + + # Ak používateľ explicitne vybral 'sk', berieme to ako "potrebujeme translation z SK pre model", + # ale PRE SEARCH možno budeme chcieť radšej slovenské výsledky. + needs_translation = has_diacritics or has_slovak_words or (language == 'sk') + + # --- NOVÁ LOGIKA PRE JAZYKOVÉ FILTRE A ZDROJE --- + + # OPRAVA TYPO CHÝB V POUŽÍVATEĽSKOM VSTUPE PRE LEPŠÍ PREKLAD A VYHĽADÁVANIE + claim_for_translation = claim.replace('hlavne mesto', 'hlavné mesto').replace('slovenska', 'Slovenska') + + SK_TRUSTED_SOURCES = [ + # Hlavné spravodajstvo + 'dennikn.sk', 'sme.sk', 'aktuality.sk', 'tasr.sk', 'rtvs.sk', 'teraz.sk', + 'hnonline.sk', 'pravda.sk', 'trend.sk', 'tyzden.sk', 'postoj.sk', 'euractiv.sk', + 'noviny.sk', 'tvnoviny.sk', 'ta3.com', 'webnoviny.sk', + # Inštitúcie + 'vladne.sk', 'nrsr.sk', 'prezident.sk', 'gov.sk', 'sav.sk', 'demagog.sk', + 'minv.sk', 'health.gov.sk', 'policia.sk', 'statistics.sk' + ] + + search_query = claim + + if language == 'sk': + print(" Jazyk nastavený na 'SK' -> Vyhľadávam v slovenčine") + search_query = claim_for_translation # Použijeme opravený text s diakritikou pre lepšie výsledky + elif needs_translation: + try: + claim_en = GoogleTranslator(source='sk', target='en').translate(claim_for_translation) + print(f" Preklad pre search (EN nastavenie): '{claim_for_translation}' -> '{claim_en}'") + search_query = claim_en + except Exception as e: + print(f"Preklad zlyhal, použijem originál: {e}") + search_query = claim + + sites_to_check = [] + + # Aplikovanie špecifického zdroja + if selected_source != "all": + if selected_source == "demagog": sites_to_check = ["demagog.sk"] + elif selected_source == "afp": sites_to_check = ["factcheck.afp.com"] + elif selected_source == "reuters": sites_to_check = ["reuters.com"] + # Ak sa používa dátumový filter a nie je vybraný zdroj, obmedzíme výber pre lepšie výsledky + elif date_from or date_to: + if language == 'sk': + sites_to_check = SK_TRUSTED_SOURCES + else: + sites_to_check = ['reuters.com', 'apnews.com', 'bbc.com', 'cnn.com', 'nytimes.com', 'theguardian.com'] + + if sites_to_check: + site_query = " OR ".join([f"site:{site}" for site in sites_to_check[:15]]) + search_query = f"{search_query} ({site_query})" + print(f" Pridaný filter domén k hľadaniu: {site_query}") + + # Zostavenie parametrov pre SerpAPI + params = { + "engine": "google", + "q": search_query, + "api_key": SERP_API_KEY, + "num": 20, + } + + # Aplikovanie filtrov na parametre + if language == 'sk': + params["gl"] = "sk" # Geolocation: Slovakia + params["hl"] = "sk" # UI Language: Slovak + elif language == 'cs': + params["gl"] = "cz" + params["hl"] = "cs" + else: + params["hl"] = "en" + + # Časové ohraničenie (tbs parameter v Google Search) + # Formát: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY + if date_from or date_to: + tbs_parts = [] + if date_from: + try: + # date_from prichádza ako YYYY-MM-DD + y, m, d = date_from.split("-") + tbs_parts.append(f"cd_min:{int(m)}/{int(d)}/{y}") + except: + pass + if date_to: + try: + y, m, d = date_to.split("-") + tbs_parts.append(f"cd_max:{int(m)}/{int(d)}/{y}") + except: + pass + + if tbs_parts: + # cdr:1 zapína custom date range + params["tbs"] = "cdr:1," + ",".join(tbs_parts) + print(f" Dátumový filter aplikovaný: {params['tbs']}") + + # Volanie SerpAPI + res = requests.get("https://serpapi.com/search", params=params) + + if res.status_code == 429: + return jsonify({"error": "Limit SerpAPI prekročený"}), 429 + + data = res.json() + organic = data.get("organic_results", []) + + # Blacklist nedôveryhodných domén + DOMAIN_BLACKLIST = [ + 'spotify.com', 'youtube.com', 'facebook.com', 'instagram.com', + 'twitter.com', 'tiktok.com', 'pinterest.com', 'reddit.com', + 'karaoke', 'lyrics', 'texty.cz', 'karaoketexty', + # Slovenské nevhodné (študentské, fóra, bulvár, recepty, blogy) + 'referaty', 'tahaky', 'zones.sk', 'student', 'zadania', 'maturita', + 'forum', 'diskusia', 'chat', 'komentare', 'blog.', 'nazory.', + 'mimibazar', 'modrykon', 'diva.sk', 'najmama', 'dobruchut', 'varecha', + # Konšpiračné weby + 'badatel', 'zemavek', 'infovojna', 'slobodnyvysielac', 'hlavnespravy', 'hrot.info', + # Hobby/Školské projekty (z obrázku) a iné nerelevantné + 'hockicko', 'astroportal', 'vesmir.sk', 'szm.com', 'blogspot' + ] + + # Whitelist dôveryhodných domén (vyššia priorita) + DOMAIN_WHITELIST = [ + 'wikipedia.org', 'britannica.com', 'nature.com', 'sciencedirect.com', + 'ncbi.nlm.nih.gov', 'scholar.google', '.gov', '.edu', 'reuters.com', + 'apnews.com', 'bbc.com', 'nature.com', 'science.org', + # Slovenské spravodajstvo, televízie a agentúry + 'dennikn.sk', 'sme.sk', 'aktuality.sk', 'tasr.sk', 'rtvs.sk', 'teraz.sk', + 'hnonline.sk', 'pravda.sk', 'trend.sk', 'tyzden.sk', 'postoj.sk', 'euractiv.sk', + 'noviny.sk', 'tvnoviny.sk', 'ta3.com', 'webnoviny.sk', 'sita.sk', + # Slovenské inštitúcie + 'vladne.sk', 'nrsr.sk', 'prezident.sk', 'gov.sk', 'sav.sk', 'demagog.sk', + 'minv.sk', 'mosr.sk', 'mzv.sk', 'health.gov.sk', 'policia.sk', 'statistics.sk', + 'shmu.sk', 'slov-lex.sk', 'uvzsr.sk', + # Slovenské Univerzity + 'uniba.sk', 'stuba.sk', 'tuke.sk', 'upjs.sk', 'ukf.sk', 'umb.sk', 'sav.sk' + ] + + # Pridáme preferované zdroje z selectedSource + source_priority_list = [] + if selected_source == "demagog": + source_priority_list.append("demagog.sk") + if selected_source == "afp": + source_priority_list.append("factcheck.afp.com") + if selected_source == "reuters": + source_priority_list.append("reuters.com") + + # Filtruj výsledky + filtered_results = [] + for r in organic: + link = r.get('link', '') + snippet = r.get('snippet', '') + + # Preskočiť ak obsahuje blacklist doménu + if any(bad in link.lower() for bad in DOMAIN_BLACKLIST): + continue + + if snippet: + # Výpočet priority + priority = 0 + + # Najvyššia priorita pre vybraný zdroj + if any(s in link.lower() for s in source_priority_list): + priority = 2 + # Stredná priorita pre whitelist (vrátane SK webov) + elif any(good in link.lower() for good in DOMAIN_WHITELIST): + priority = 1 + + # Ak je zvolený SK jazyk, preložíme snippet do EN pre NLI model? + # Model vie hlavne EN. Ak je snippet SK, treba preklad! + if language == 'sk' or needs_translation: + try: + snippet_en = GoogleTranslator(source='auto', target='en').translate(snippet) + r['snippet_en'] = snippet_en # Uložíme si preklad + except: + r['snippet_en'] = snippet + else: + r['snippet_en'] = snippet + + filtered_results.append((priority, r)) + + # Zoradi podľa priority (whitelist najprv) + filtered_results.sort(key=lambda x: x[0], reverse=True) + + # DEDUPUKÁCIA DOMÉN - aby sme nemali viac výsledkov z tej istej domény + seen_domains = set() + final_results = [] + + for priority, r in filtered_results: + link = r.get('link', '') + try: + # Získaj doménu (napr. bbc.com z https://www.bbc.com/news/...) + domain = link.split('//')[-1].split('/')[0].replace('www.', '') + except: + domain = link + + if domain not in seen_domains: + seen_domains.add(domain) + final_results.append(r) + + if len(final_results) >= 5: + break + + filtered_results = final_results + + snippets = [r.get("snippet_en", r.get("snippet")) for r in filtered_results] # Použijeme preložené + links = [r.get("link") for r in filtered_results] + + print(f" Nájdených zdrojov: {len(snippets)}") + print(f" Linky: {links[:3]}") + + if not snippets: + print(" Žiadne snippety nenájdené!") + return jsonify({ + "claim": claim, + "verdict": " Nedostatok zdrojov – neoveriteľné", + "sources": links + }) + + # 4. NLI voting (s váženými skóre podľa confidence) + # Už sme detekovali jazyk vyššie pre search, použijeme to isté + + entail_score = 0 + contra_score = 0 + evidences_for = [] + evidences_against = [] + detailed_sources = [] + + for i, snip in enumerate(snippets): + nli_translate = needs_translation or (language == 'sk') + label, conf, probs = nli_label(snip, claim, force_translation=nli_translate) + print(f"\n [{i+1}] NLI Výsledok: {label} (confidence: {conf:.3f})") + print(f" Snippet (EN): {snip[:150]}...") + + # SOFT SCORING - započítame všetky pravdepodobnosti, nie len max + # probs = [entailment, neutral, contradiction] + entail_score += probs[0] # Entailment je index 0 + contra_score += probs[2] # Contradiction je index 2 + + # Pre zobrazenie dôkazov vrátime pôvodný snippet z JSONu (ak bol preložený, chceme vidieť SK pre usera?) + # User vidí "evidence". Ak je stránka SK, snippet je SK. Ale model hodnotil EN. + # V filtered_results máme pôvodný objekt `r`. Dajme naspäť pôvodný snippet pre UI. + original_snippet = filtered_results[i].get('snippet', snip) + source_link = filtered_results[i].get('link') + + if label == "entailment": + evidences_for.append({"snippet": original_snippet, "confidence": round(conf, 3)}) + elif label == "contradiction": + evidences_against.append({"snippet": original_snippet, "confidence": round(conf, 3)}) + + # Uložíme detailné info o zdroji + detailed_sources.append({ + "url": source_link, + "label": label, + "confidence": round(conf, 3), + "entailment_prob": round(probs[0], 3), + "contradiction_prob": round(probs[2], 3), + "neutral_prob": round(probs[1], 3) + }) + + # 5. Verdict - používame soft skóre + total_score = entail_score + contra_score + + print(f"\n Výsledky NLI (soft scoring):") + print(f" Entailment score: {entail_score:.3f}") + print(f" Contradiction score: {contra_score:.3f}") + print(f" Total score: {total_score:.3f}") + + # Ak je absolútne skóre pre potvrdenie/vyvrátenie príliš nízke oproti počtu nájdených zdrojov + if total_score < 1.0 and len(snippets) > 0: + verdict = "⚠️ Nejednoznačné" + print(" [Nedostatok silných dôkazov - prevládajú 'neutral' výsledky]") + elif total_score > 0: + entail_ratio = entail_score / total_score + contra_ratio = contra_score / total_score + print(f" Entail ratio: {entail_ratio:.1%}, Contra ratio: {contra_ratio:.1%}") + + # Zmierňujeme prahy z 60% na 50% + if entail_ratio > 0.5: + verdict = "✅ Pravda" + elif contra_ratio > 0.5: + verdict = "❌ Nepravda" + else: + verdict = "⚠️ Nejednoznačné" + else: + verdict = "⚠️ Neoveriteľné" + + print(f" Verdikt: {verdict}\n") + + result = { + "claim": claim, + "nli_votes": { + "entailment_score": round(entail_score, 3), + "contradiction_score": round(contra_score, 3) + }, + "verdict": verdict, + "evidence_for": evidences_for[:3], + "evidence_against": evidences_against[:3], + "sources": detailed_sources if detailed_sources else links, + "model_name": selected_model, + "cached": False + } + + # Ulož do cache + save_to_cache(claim, result, model_name=selected_model) + + return jsonify(result) + + +@app.route("/api/history", methods=["GET"]) +def get_check_history(): + """Vráti históriu fact-checkov""" + limit = request.args.get("limit", 50, type=int) + history = get_history(limit) + return jsonify({"history": history, "count": len(history)}) + + +@app.route("/api/stats", methods=["GET"]) +def get_statistics(): + """Vráti štatistiky databázy""" + stats = get_stats() + return jsonify(stats) + + +@app.route("/api/admin/add-fact", methods=["POST"]) +def admin_add_fact(): + """Admin endpoint na pridanie overeného faktu""" + data = request.get_json() + claim = data.get("claim", "").strip() + verdict = data.get("verdict", "").strip() + explanation = data.get("explanation", "") + source_url = data.get("source_url", "") + + if not claim or not verdict: + return jsonify({"error": "Claim a verdict sú povinné"}), 400 + + success = add_verified_fact(claim, verdict, explanation, source_url) + + if success: + return jsonify({"message": "Overený fakt pridaný", "claim": claim}) + else: + return jsonify({"error": "Fakt už existuje"}), 409 + + +if __name__ == "__main__": + app.run(port=5000, debug=True) diff --git a/backend/clear_cache.py b/backend/clear_cache.py new file mode 100755 index 0000000..10f9a7a --- /dev/null +++ b/backend/clear_cache.py @@ -0,0 +1,29 @@ +import sqlite3 +from database import DB_NAME + +def clear_cache(): + """Vymaže všetky záznamy z databázy""" + conn = sqlite3.connect(DB_NAME) + cursor = conn.cursor() + + # Vymaž všetky fact-checky + cursor.execute("DELETE FROM fact_checks") + deleted_checks = cursor.rowcount + + # Vymaž všetky verified facts + cursor.execute("DELETE FROM verified_facts") + deleted_facts = cursor.rowcount + + # Reset autoincrement + cursor.execute("DELETE FROM sqlite_sequence WHERE name='fact_checks'") + cursor.execute("DELETE FROM sqlite_sequence WHERE name='verified_facts'") + + conn.commit() + conn.close() + + print(f"✅ Cache vyčistená!") + print(f" - Vymazaných fact-checkov: {deleted_checks}") + print(f" - Vymazaných verified facts: {deleted_facts}") + +if __name__ == "__main__": + clear_cache() diff --git a/backend/database.py b/backend/database.py new file mode 100755 index 0000000..c92ef6b --- /dev/null +++ b/backend/database.py @@ -0,0 +1,254 @@ +import sqlite3 +from datetime import datetime +import hashlib +import json + +DB_NAME = "factchecker.db" + +def get_db_connection(): + """Vytvorí spojenie s databázou""" + conn = sqlite3.connect(DB_NAME) + conn.row_factory = sqlite3.Row # Umožní prístup k stĺpcom podľa mena + return conn + +def init_db(): + """Inicializuje databázu s tabuľkami""" + conn = get_db_connection() + cursor = conn.cursor() + + # Tabuľka pre cachované fact-checky + cursor.execute(''' + CREATE TABLE IF NOT EXISTS fact_checks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + claim_hash TEXT UNIQUE NOT NULL, + claim TEXT NOT NULL, + verdict TEXT NOT NULL, + confidence REAL, + nli_votes TEXT, + evidence_for TEXT, + evidence_against TEXT, + sources TEXT, + model_name TEXT, + checked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + check_count INTEGER DEFAULT 1 + ) + ''') + + # MIGRÁCIA: Pridanie stĺpca model_name ak neexistuje + try: + cursor.execute('ALTER TABLE fact_checks ADD COLUMN model_name TEXT') + print("Stĺpec 'model_name' bol pridaný do tabuľky fact_checks") + except sqlite3.OperationalError: + pass # Stĺpec už existuje + + # Tabuľka pre manuálne overené fakty (admin) + + # Tabuľka pre manuálne overené fakty (admin) + cursor.execute(''' + CREATE TABLE IF NOT EXISTS verified_facts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + claim TEXT UNIQUE NOT NULL, + verdict TEXT NOT NULL, + explanation TEXT, + source_url TEXT, + added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + added_by TEXT DEFAULT 'admin' + ) + ''') + + # Index pre rýchle vyhľadávanie + cursor.execute('CREATE INDEX IF NOT EXISTS idx_claim_hash ON fact_checks(claim_hash)') + + conn.commit() + conn.close() + print(" Databáza inicializovaná") + +def hash_claim(claim: str) -> str: + """Vytvorí hash z tvrdenia pre unikátnu identifikáciu""" + normalized = claim.lower().strip() + return hashlib.md5(normalized.encode('utf-8')).hexdigest() + +def get_cached_result(claim: str): + """Skontroluje či výrok už bol overený""" + conn = get_db_connection() + cursor = conn.cursor() + + claim_hash = hash_claim(claim) + + # Najprv skontroluj verified_facts (najvyššia priorita) + cursor.execute(''' + SELECT claim, verdict, explanation, source_url, 'verified' as source_type + FROM verified_facts + WHERE LOWER(claim) = LOWER(?) + ''', (claim.strip(),)) + + verified = cursor.fetchone() + if verified: + conn.close() + return { + "claim": verified["claim"], + "verdict": f"✅ {verified['verdict']} (Overené)", + "explanation": verified["explanation"], + "sources": [verified["source_url"]] if verified["source_url"] else [], + "cached": True, + "verified": True + } + + # Potom skontroluj cache + cursor.execute(''' + SELECT * FROM fact_checks + WHERE claim_hash = ? + ORDER BY checked_at DESC + LIMIT 1 + ''', (claim_hash,)) + + result = cursor.fetchone() + + if result: + # Inkrementuj počítadlo + cursor.execute(''' + UPDATE fact_checks + SET check_count = check_count + 1, + checked_at = CURRENT_TIMESTAMP + WHERE claim_hash = ? + ''', (claim_hash,)) + conn.commit() + + # Deserializuj JSON polia + cached_data = { + "claim": result["claim"], + "verdict": result["verdict"], + "nli_votes": json.loads(result["nli_votes"]) if result["nli_votes"] else None, + "evidence_for": json.loads(result["evidence_for"]) if result["evidence_for"] else [], + "evidence_against": json.loads(result["evidence_against"]) if result["evidence_against"] else [], + "sources": json.loads(result["sources"]) if result["sources"] else [], + "model_name": result["model_name"] if "model_name" in result.keys() else None, + "cached": True, + "checked_at": result["checked_at"], + "check_count": result["check_count"] + } + + conn.close() + return cached_data + + conn.close() + return None + +def save_to_cache(claim: str, result: dict, model_name: str = "unknown"): + """Uloží výsledok fact-checku do cache""" + conn = get_db_connection() + cursor = conn.cursor() + + claim_hash = hash_claim(claim) + + try: + cursor.execute(''' + INSERT INTO fact_checks + (claim_hash, claim, verdict, confidence, nli_votes, evidence_for, evidence_against, sources, model_name) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(claim_hash) DO UPDATE SET + verdict = excluded.verdict, + confidence = excluded.confidence, + nli_votes = excluded.nli_votes, + evidence_for = excluded.evidence_for, + evidence_against = excluded.evidence_against, + sources = excluded.sources, + model_name = excluded.model_name, + checked_at = CURRENT_TIMESTAMP, + check_count = check_count + 1 + ''', ( + claim_hash, + claim, + result.get("verdict", ""), + result.get("confidence"), + json.dumps(result.get("nli_votes")) if result.get("nli_votes") else None, + json.dumps(result.get("evidence_for", [])), + json.dumps(result.get("evidence_against", [])), + json.dumps(result.get("sources", [])), + model_name + )) + + conn.commit() + conn.close() + return True + except Exception as e: + print(f" Chyba pri ukladaní do cache: {e}") + conn.close() + return False + +def get_history(limit: int = 50): + """Vráti históriu fact-checkov""" + conn = get_db_connection() + cursor = conn.cursor() + + # Skúsime načítať aj model_name, ak stĺpec neexistuje, použijeme fallback + try: + cursor.execute(''' + SELECT claim, verdict, checked_at, check_count, sources, model_name + FROM fact_checks + ORDER BY checked_at DESC + LIMIT ? + ''', (limit,)) + except sqlite3.OperationalError: + # Fallback pre starú DB bez model_name + cursor.execute(''' + SELECT claim, verdict, checked_at, check_count, sources, NULL as model_name + FROM fact_checks + ORDER BY checked_at DESC + LIMIT ? + ''', (limit,)) + + results = cursor.fetchall() + conn.close() + + return [{ + "claim": row["claim"], + "verdict": row["verdict"], + "checked_at": row["checked_at"], + "check_count": row["check_count"], + "sources": json.loads(row["sources"]) if row["sources"] else [], + "model_name": row["model_name"] + } for row in results] + +def add_verified_fact(claim: str, verdict: str, explanation: str = None, source_url: str = None): + """Pridá manuálne overený fakt (admin funkcia)""" + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + INSERT INTO verified_facts (claim, verdict, explanation, source_url) + VALUES (?, ?, ?, ?) + ''', (claim, verdict, explanation, source_url)) + + conn.commit() + conn.close() + return True + except sqlite3.IntegrityError: + conn.close() + return False # Už existuje + +def get_stats(): + """Vráti štatistiky""" + conn = get_db_connection() + cursor = conn.cursor() + + cursor.execute('SELECT COUNT(*) as total FROM fact_checks') + total = cursor.fetchone()["total"] + + cursor.execute('SELECT SUM(check_count) as total_checks FROM fact_checks') + total_checks = cursor.fetchone()["total_checks"] or 0 + + cursor.execute('SELECT COUNT(*) as verified_count FROM verified_facts') + verified = cursor.fetchone()["verified_count"] + + conn.close() + + return { + "unique_claims": total, + "total_checks": total_checks, + "verified_facts": verified + } + +# Inicializuj databázu pri prvom importe +init_db()