Bakalarska_praca/Backend/index_JSON.py
2024-10-12 14:08:12 +02:00

36 lines
970 B
Python

import json
from elasticsearch import Elasticsearch
from langchain_huggingface import HuggingFaceEmbeddings
es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}])
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
def load_drug_data(json_path):
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
return data
def index_documents(data):
for i, item in enumerate(data):
doc_text = f"{item['link']} {item.get('pribalovy_letak', '')} {item.get('spc', '')}"
vector = embeddings.embed_query(doc_text)
es.index(index='drug_docs', id=i, body={
'text': doc_text,
'vector': vector,
'full_data': item
})
data_path = "data/cleaned_general_info_additional.json"
drug_data = load_drug_data(data_path)
index_documents(drug_data)
print("Индексирование завершено.")