fact_checker/backend/database.py
VIliam c80694f543 Add missing backend files (app.py, database.py, clear_cache.py)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-26 15:30:09 +01:00

255 lines
8.4 KiB
Python
Executable File

import sqlite3
from datetime import datetime
import hashlib
import json
DB_NAME = "factchecker.db"
def get_db_connection():
"""Vytvorí spojenie s databázou"""
conn = sqlite3.connect(DB_NAME)
conn.row_factory = sqlite3.Row # Umožní prístup k stĺpcom podľa mena
return conn
def init_db():
"""Inicializuje databázu s tabuľkami"""
conn = get_db_connection()
cursor = conn.cursor()
# Tabuľka pre cachované fact-checky
cursor.execute('''
CREATE TABLE IF NOT EXISTS fact_checks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
claim_hash TEXT UNIQUE NOT NULL,
claim TEXT NOT NULL,
verdict TEXT NOT NULL,
confidence REAL,
nli_votes TEXT,
evidence_for TEXT,
evidence_against TEXT,
sources TEXT,
model_name TEXT,
checked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
check_count INTEGER DEFAULT 1
)
''')
# MIGRÁCIA: Pridanie stĺpca model_name ak neexistuje
try:
cursor.execute('ALTER TABLE fact_checks ADD COLUMN model_name TEXT')
print("Stĺpec 'model_name' bol pridaný do tabuľky fact_checks")
except sqlite3.OperationalError:
pass # Stĺpec už existuje
# Tabuľka pre manuálne overené fakty (admin)
# Tabuľka pre manuálne overené fakty (admin)
cursor.execute('''
CREATE TABLE IF NOT EXISTS verified_facts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
claim TEXT UNIQUE NOT NULL,
verdict TEXT NOT NULL,
explanation TEXT,
source_url TEXT,
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
added_by TEXT DEFAULT 'admin'
)
''')
# Index pre rýchle vyhľadávanie
cursor.execute('CREATE INDEX IF NOT EXISTS idx_claim_hash ON fact_checks(claim_hash)')
conn.commit()
conn.close()
print(" Databáza inicializovaná")
def hash_claim(claim: str) -> str:
"""Vytvorí hash z tvrdenia pre unikátnu identifikáciu"""
normalized = claim.lower().strip()
return hashlib.md5(normalized.encode('utf-8')).hexdigest()
def get_cached_result(claim: str):
"""Skontroluje či výrok už bol overený"""
conn = get_db_connection()
cursor = conn.cursor()
claim_hash = hash_claim(claim)
# Najprv skontroluj verified_facts (najvyššia priorita)
cursor.execute('''
SELECT claim, verdict, explanation, source_url, 'verified' as source_type
FROM verified_facts
WHERE LOWER(claim) = LOWER(?)
''', (claim.strip(),))
verified = cursor.fetchone()
if verified:
conn.close()
return {
"claim": verified["claim"],
"verdict": f"{verified['verdict']} (Overené)",
"explanation": verified["explanation"],
"sources": [verified["source_url"]] if verified["source_url"] else [],
"cached": True,
"verified": True
}
# Potom skontroluj cache
cursor.execute('''
SELECT * FROM fact_checks
WHERE claim_hash = ?
ORDER BY checked_at DESC
LIMIT 1
''', (claim_hash,))
result = cursor.fetchone()
if result:
# Inkrementuj počítadlo
cursor.execute('''
UPDATE fact_checks
SET check_count = check_count + 1,
checked_at = CURRENT_TIMESTAMP
WHERE claim_hash = ?
''', (claim_hash,))
conn.commit()
# Deserializuj JSON polia
cached_data = {
"claim": result["claim"],
"verdict": result["verdict"],
"nli_votes": json.loads(result["nli_votes"]) if result["nli_votes"] else None,
"evidence_for": json.loads(result["evidence_for"]) if result["evidence_for"] else [],
"evidence_against": json.loads(result["evidence_against"]) if result["evidence_against"] else [],
"sources": json.loads(result["sources"]) if result["sources"] else [],
"model_name": result["model_name"] if "model_name" in result.keys() else None,
"cached": True,
"checked_at": result["checked_at"],
"check_count": result["check_count"]
}
conn.close()
return cached_data
conn.close()
return None
def save_to_cache(claim: str, result: dict, model_name: str = "unknown"):
"""Uloží výsledok fact-checku do cache"""
conn = get_db_connection()
cursor = conn.cursor()
claim_hash = hash_claim(claim)
try:
cursor.execute('''
INSERT INTO fact_checks
(claim_hash, claim, verdict, confidence, nli_votes, evidence_for, evidence_against, sources, model_name)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(claim_hash) DO UPDATE SET
verdict = excluded.verdict,
confidence = excluded.confidence,
nli_votes = excluded.nli_votes,
evidence_for = excluded.evidence_for,
evidence_against = excluded.evidence_against,
sources = excluded.sources,
model_name = excluded.model_name,
checked_at = CURRENT_TIMESTAMP,
check_count = check_count + 1
''', (
claim_hash,
claim,
result.get("verdict", ""),
result.get("confidence"),
json.dumps(result.get("nli_votes")) if result.get("nli_votes") else None,
json.dumps(result.get("evidence_for", [])),
json.dumps(result.get("evidence_against", [])),
json.dumps(result.get("sources", [])),
model_name
))
conn.commit()
conn.close()
return True
except Exception as e:
print(f" Chyba pri ukladaní do cache: {e}")
conn.close()
return False
def get_history(limit: int = 50):
"""Vráti históriu fact-checkov"""
conn = get_db_connection()
cursor = conn.cursor()
# Skúsime načítať aj model_name, ak stĺpec neexistuje, použijeme fallback
try:
cursor.execute('''
SELECT claim, verdict, checked_at, check_count, sources, model_name
FROM fact_checks
ORDER BY checked_at DESC
LIMIT ?
''', (limit,))
except sqlite3.OperationalError:
# Fallback pre starú DB bez model_name
cursor.execute('''
SELECT claim, verdict, checked_at, check_count, sources, NULL as model_name
FROM fact_checks
ORDER BY checked_at DESC
LIMIT ?
''', (limit,))
results = cursor.fetchall()
conn.close()
return [{
"claim": row["claim"],
"verdict": row["verdict"],
"checked_at": row["checked_at"],
"check_count": row["check_count"],
"sources": json.loads(row["sources"]) if row["sources"] else [],
"model_name": row["model_name"]
} for row in results]
def add_verified_fact(claim: str, verdict: str, explanation: str = None, source_url: str = None):
"""Pridá manuálne overený fakt (admin funkcia)"""
conn = get_db_connection()
cursor = conn.cursor()
try:
cursor.execute('''
INSERT INTO verified_facts (claim, verdict, explanation, source_url)
VALUES (?, ?, ?, ?)
''', (claim, verdict, explanation, source_url))
conn.commit()
conn.close()
return True
except sqlite3.IntegrityError:
conn.close()
return False # Už existuje
def get_stats():
"""Vráti štatistiky"""
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute('SELECT COUNT(*) as total FROM fact_checks')
total = cursor.fetchone()["total"]
cursor.execute('SELECT SUM(check_count) as total_checks FROM fact_checks')
total_checks = cursor.fetchone()["total_checks"] or 0
cursor.execute('SELECT COUNT(*) as verified_count FROM verified_facts')
verified = cursor.fetchone()["verified_count"]
conn.close()
return {
"unique_claims": total,
"total_checks": total_checks,
"verified_facts": verified
}
# Inicializuj databázu pri prvom importe
init_db()