zkt26/sk1/web/api/main.py
2026-05-13 21:49:47 +02:00

90 lines
2.4 KiB
Python

import os
import httpx
import trafilatura
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import asyncpg
import asyncio
from contextlib import asynccontextmanager
DB_DSN = "postgresql://{user}:{password}@{host}:{port}/{dbname}".format(
user=os.environ["DB_USER"],
password=os.environ["DB_PASSWORD"],
host=os.environ["DB_HOST"],
port=os.environ.get("DB_PORT", "5432"),
dbname=os.environ["DB_NAME"],
)
SUMMARIZER_URL = os.environ.get("SUMMARIZER_URL", "http://summarizer:8000")
pool = None
@asynccontextmanager
async def lifespan(app: FastAPI):
global pool
pool = await asyncpg.create_pool(DB_DSN)
await pool.execute("""
CREATE TABLE IF NOT EXISTS articles (
id SERIAL PRIMARY KEY,
url TEXT NOT NULL,
title TEXT DEFAULT '',
full_text TEXT DEFAULT '',
summary TEXT DEFAULT '',
created_at TIMESTAMPTZ DEFAULT NOW()
)
""")
yield
await pool.close()
app = FastAPI(lifespan=lifespan)
class URLRequest(BaseModel):
url: str
@app.get("/api/health")
async def health():
return {"status": "ok"}
@app.get("/api/articles")
async def list_articles():
rows = await pool.fetch(
"SELECT id, url, title, summary, created_at "
"FROM articles ORDER BY created_at DESC LIMIT 50"
)
return [dict(r) for r in rows]
@app.post("/api/articles")
async def create_article(req: URLRequest):
downloaded = await asyncio.to_thread(trafilatura.fetch_url, req.url)
if not downloaded:
raise HTTPException(400, "Failed to fetch URL")
result = await asyncio.to_thread(trafilatura.bare_extraction, downloaded, include_comments=False)
if not result or not result.get("text"):
raise HTTPException(400, "Failed to extract text")
text = result["text"]
title = result.get("title", "")
async with httpx.AsyncClient(timeout=120.0) as client:
resp = await client.post(
f"{SUMMARIZER_URL}/summarize", json={"text": text}
)
if resp.status_code != 200:
raise HTTPException(502, "Summarizer service unavailable")
summary = resp.json()["summary"]
row = await pool.fetchrow(
"INSERT INTO articles (url, title, full_text, summary) "
"VALUES ($1, $2, $3, $4) "
"RETURNING id, url, title, summary, created_at",
req.url, title, text, summary,
)
return dict(row)