90 lines
2.4 KiB
Python
90 lines
2.4 KiB
Python
import os
|
|
import httpx
|
|
import trafilatura
|
|
from fastapi import FastAPI, HTTPException
|
|
from pydantic import BaseModel
|
|
import asyncpg
|
|
import asyncio
|
|
from contextlib import asynccontextmanager
|
|
|
|
DB_DSN = "postgresql://{user}:{password}@{host}:{port}/{dbname}".format(
|
|
user=os.environ["DB_USER"],
|
|
password=os.environ["DB_PASSWORD"],
|
|
host=os.environ["DB_HOST"],
|
|
port=os.environ.get("DB_PORT", "5432"),
|
|
dbname=os.environ["DB_NAME"],
|
|
)
|
|
SUMMARIZER_URL = os.environ.get("SUMMARIZER_URL", "http://summarizer:8000")
|
|
|
|
pool = None
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
global pool
|
|
pool = await asyncpg.create_pool(DB_DSN)
|
|
await pool.execute("""
|
|
CREATE TABLE IF NOT EXISTS articles (
|
|
id SERIAL PRIMARY KEY,
|
|
url TEXT NOT NULL,
|
|
title TEXT DEFAULT '',
|
|
full_text TEXT DEFAULT '',
|
|
summary TEXT DEFAULT '',
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
)
|
|
""")
|
|
yield
|
|
await pool.close()
|
|
|
|
|
|
app = FastAPI(lifespan=lifespan)
|
|
|
|
|
|
class URLRequest(BaseModel):
|
|
url: str
|
|
|
|
|
|
@app.get("/api/health")
|
|
async def health():
|
|
return {"status": "ok"}
|
|
|
|
|
|
@app.get("/api/articles")
|
|
async def list_articles():
|
|
rows = await pool.fetch(
|
|
"SELECT id, url, title, summary, created_at "
|
|
"FROM articles ORDER BY created_at DESC LIMIT 50"
|
|
)
|
|
return [dict(r) for r in rows]
|
|
|
|
|
|
|
|
@app.post("/api/articles")
|
|
async def create_article(req: URLRequest):
|
|
downloaded = await asyncio.to_thread(trafilatura.fetch_url, req.url)
|
|
if not downloaded:
|
|
raise HTTPException(400, "Failed to fetch URL")
|
|
|
|
result = await asyncio.to_thread(trafilatura.bare_extraction, downloaded, include_comments=False)
|
|
if not result or not result.get("text"):
|
|
raise HTTPException(400, "Failed to extract text")
|
|
|
|
text = result["text"]
|
|
title = result.get("title", "")
|
|
|
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
resp = await client.post(
|
|
f"{SUMMARIZER_URL}/summarize", json={"text": text}
|
|
)
|
|
if resp.status_code != 200:
|
|
raise HTTPException(502, "Summarizer service unavailable")
|
|
summary = resp.json()["summary"]
|
|
|
|
row = await pool.fetchrow(
|
|
"INSERT INTO articles (url, title, full_text, summary) "
|
|
"VALUES ($1, $2, $3, $4) "
|
|
"RETURNING id, url, title, summary, created_at",
|
|
req.url, title, text, summary,
|
|
)
|
|
return dict(row)
|