70 lines
1.9 KiB
Python
70 lines
1.9 KiB
Python
"""
|
|
Локальный OpenAI-compatible embedding сервер на базе sentence-transformers.
|
|
Модель: paraphrase-multilingual-MiniLM-L12-v2 (поддерживает словацкий язык!)
|
|
|
|
Запуск:
|
|
python embedding_server.py
|
|
|
|
Тест:
|
|
curl http://localhost:8010/v1/embeddings -H "Content-Type: application/json" \
|
|
-d '{"model": "local-embed", "input": "test"}'
|
|
"""
|
|
|
|
import time
|
|
import json
|
|
from fastapi import FastAPI, Request
|
|
from fastapi.responses import JSONResponse
|
|
import uvicorn
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
MODEL_NAME = "paraphrase-multilingual-MiniLM-L12-v2"
|
|
PORT = 8010
|
|
|
|
print(f"Загрузка модели {MODEL_NAME}...")
|
|
model = SentenceTransformer(MODEL_NAME)
|
|
EMBED_DIM = model.get_sentence_embedding_dimension()
|
|
print(f"Модель загружена. Размерность: {EMBED_DIM}")
|
|
|
|
app = FastAPI(title="Local Embedding Server")
|
|
|
|
|
|
@app.get("/health")
|
|
def health():
|
|
return {"status": "ok", "model": MODEL_NAME, "dim": EMBED_DIM}
|
|
|
|
|
|
async def _handle_embeddings(request: Request):
|
|
body = await request.json()
|
|
inp = body.get("input", "")
|
|
if isinstance(inp, str):
|
|
texts = [inp]
|
|
else:
|
|
texts = inp
|
|
|
|
vecs = model.encode(texts, normalize_embeddings=True).tolist()
|
|
|
|
data = [
|
|
{"object": "embedding", "index": i, "embedding": vec}
|
|
for i, vec in enumerate(vecs)
|
|
]
|
|
return JSONResponse({
|
|
"object": "list",
|
|
"data": data,
|
|
"model": MODEL_NAME,
|
|
"usage": {"prompt_tokens": sum(len(t.split()) for t in texts), "total_tokens": sum(len(t.split()) for t in texts)}
|
|
})
|
|
|
|
|
|
@app.post("/v1/embeddings")
|
|
async def embeddings_v1(request: Request):
|
|
return await _handle_embeddings(request)
|
|
|
|
|
|
@app.post("/embeddings")
|
|
async def embeddings_root(request: Request):
|
|
return await _handle_embeddings(request)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="warning")
|