""" Локальный OpenAI-compatible embedding сервер на базе sentence-transformers. Модель: paraphrase-multilingual-MiniLM-L12-v2 (поддерживает словацкий язык!) Запуск: python embedding_server.py Тест: curl http://localhost:8010/v1/embeddings -H "Content-Type: application/json" \ -d '{"model": "local-embed", "input": "test"}' """ import time import json from fastapi import FastAPI, Request from fastapi.responses import JSONResponse import uvicorn from sentence_transformers import SentenceTransformer MODEL_NAME = "paraphrase-multilingual-MiniLM-L12-v2" PORT = 8010 print(f"Загрузка модели {MODEL_NAME}...") model = SentenceTransformer(MODEL_NAME) EMBED_DIM = model.get_sentence_embedding_dimension() print(f"Модель загружена. Размерность: {EMBED_DIM}") app = FastAPI(title="Local Embedding Server") @app.get("/health") def health(): return {"status": "ok", "model": MODEL_NAME, "dim": EMBED_DIM} async def _handle_embeddings(request: Request): body = await request.json() inp = body.get("input", "") if isinstance(inp, str): texts = [inp] else: texts = inp vecs = model.encode(texts, normalize_embeddings=True).tolist() data = [ {"object": "embedding", "index": i, "embedding": vec} for i, vec in enumerate(vecs) ] return JSONResponse({ "object": "list", "data": data, "model": MODEL_NAME, "usage": {"prompt_tokens": sum(len(t.split()) for t in texts), "total_tokens": sum(len(t.split()) for t in texts)} }) @app.post("/v1/embeddings") async def embeddings_v1(request: Request): return await _handle_embeddings(request) @app.post("/embeddings") async def embeddings_root(request: Request): return await _handle_embeddings(request) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="warning")