DiplomovaPraca/embedding_server.py
2026-05-14 12:26:11 +02:00

70 lines
1.9 KiB
Python

"""
Локальный OpenAI-compatible embedding сервер на базе sentence-transformers.
Модель: paraphrase-multilingual-MiniLM-L12-v2 (поддерживает словацкий язык!)
Запуск:
python embedding_server.py
Тест:
curl http://localhost:8010/v1/embeddings -H "Content-Type: application/json" \
-d '{"model": "local-embed", "input": "test"}'
"""
import time
import json
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
import uvicorn
from sentence_transformers import SentenceTransformer
MODEL_NAME = "paraphrase-multilingual-MiniLM-L12-v2"
PORT = 8010
print(f"Загрузка модели {MODEL_NAME}...")
model = SentenceTransformer(MODEL_NAME)
EMBED_DIM = model.get_sentence_embedding_dimension()
print(f"Модель загружена. Размерность: {EMBED_DIM}")
app = FastAPI(title="Local Embedding Server")
@app.get("/health")
def health():
return {"status": "ok", "model": MODEL_NAME, "dim": EMBED_DIM}
async def _handle_embeddings(request: Request):
body = await request.json()
inp = body.get("input", "")
if isinstance(inp, str):
texts = [inp]
else:
texts = inp
vecs = model.encode(texts, normalize_embeddings=True).tolist()
data = [
{"object": "embedding", "index": i, "embedding": vec}
for i, vec in enumerate(vecs)
]
return JSONResponse({
"object": "list",
"data": data,
"model": MODEL_NAME,
"usage": {"prompt_tokens": sum(len(t.split()) for t in texts), "total_tokens": sum(len(t.split()) for t in texts)}
})
@app.post("/v1/embeddings")
async def embeddings_v1(request: Request):
return await _handle_embeddings(request)
@app.post("/embeddings")
async def embeddings_root(request: Request):
return await _handle_embeddings(request)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="warning")