implemented cloud requesting elasticsearch and more through langchain
This commit is contained in:
parent
a527489f43
commit
8b2aad77aa
Binary file not shown.
3
Backend/config.json
Normal file
3
Backend/config.json
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"useCloud" : false
|
||||
}
|
41
Backend/indexCloud.py
Normal file
41
Backend/indexCloud.py
Normal file
@ -0,0 +1,41 @@
|
||||
from elasticsearch import Elasticsearch
|
||||
from langchain_huggingface import HuggingFaceEmbeddings
|
||||
import json
|
||||
import sys
|
||||
|
||||
es = Elasticsearch(
|
||||
cloud_id="tt:dXMtZWFzdC0yLmF3cy5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOGM3ODQ0ZWVhZTEyNGY3NmFjNjQyNDFhNjI4NmVhYzMkZTI3YjlkNTQ0ODdhNGViNmEyMTcxMjMxNmJhMWI0ZGU=",
|
||||
basic_auth=("elastic", "sSz2BEGv56JRNjGFwoQ191RJ")
|
||||
)
|
||||
|
||||
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
||||
|
||||
|
||||
def load_drug_data(json_path):
|
||||
with open(json_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
|
||||
|
||||
def index_documents(data):
|
||||
total_documents = len(data)
|
||||
for i, item in enumerate(data, start=1):
|
||||
doc_text = f"{item['link']} {item.get('pribalovy_letak', '')} {item.get('spc', '')}"
|
||||
|
||||
vector = embeddings.embed_query(doc_text)
|
||||
|
||||
es.index(index='drug_docs', id=i, body={
|
||||
'text': doc_text,
|
||||
'vector': vector,
|
||||
'full_data': item
|
||||
})
|
||||
|
||||
sys.stdout.write(f"\rПроиндексировано {i} из {total_documents} документов")
|
||||
sys.stdout.flush()
|
||||
|
||||
print("\nИндексирование завершено.")
|
||||
|
||||
|
||||
data_path = "../../data_adc_databaza/cleaned_general_info_additional.json"
|
||||
drug_data = load_drug_data(data_path)
|
||||
index_documents(drug_data)
|
@ -2,10 +2,8 @@ import json
|
||||
from elasticsearch import Elasticsearch
|
||||
from langchain_huggingface import HuggingFaceEmbeddings
|
||||
|
||||
|
||||
es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}])
|
||||
|
||||
|
||||
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
||||
|
||||
|
||||
|
@ -1,7 +1,11 @@
|
||||
import os
|
||||
from elasticsearch import Elasticsearch
|
||||
import json
|
||||
import requests
|
||||
from langchain.chains import SequentialChain
|
||||
from langchain.chains import LLMChain, SequentialChain
|
||||
from langchain_huggingface import HuggingFaceEmbeddings
|
||||
from langchain_elasticsearch import ElasticsearchStore
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
import logging
|
||||
|
||||
|
||||
@ -9,10 +13,11 @@ logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
|
||||
mistral_api_key = "hXDC4RBJk1qy5pOlrgr01GtOlmyCBaNs"
|
||||
if not mistral_api_key:
|
||||
raise ValueError("API ключ не найден. Убедитесь, что переменная MISTRAL_API_KEY установлена.")
|
||||
|
||||
raise ValueError("API ключ не найден.")
|
||||
|
||||
|
||||
class CustomMistralLLM:
|
||||
@ -38,30 +43,47 @@ class CustomMistralLLM:
|
||||
return result.get("choices", [{}])[0].get("message", {}).get("content", "No response")
|
||||
|
||||
|
||||
|
||||
logger.info("Загрузка модели HuggingFaceEmbeddings...")
|
||||
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
||||
|
||||
|
||||
vectorstore = ElasticsearchStore(
|
||||
config_file_path = "config.json"
|
||||
|
||||
|
||||
with open(config_file_path, 'r') as config_file:
|
||||
config = json.load(config_file)
|
||||
|
||||
# Cloud ID
|
||||
if config.get("useCloud", False):
|
||||
logger.info("CLOUD ELASTIC")
|
||||
cloud_id = "tt:dXMtZWFzdC0yLmF3cy5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOGM3ODQ0ZWVhZTEyNGY3NmFjNjQyNDFhNjI4NmVhYzMkZTI3YjlkNTQ0ODdhNGViNmEyMTcxMjMxNmJhMWI0ZGU=" # Замените на ваш Cloud ID
|
||||
vectorstore = ElasticsearchStore(
|
||||
es_cloud_id=cloud_id,
|
||||
index_name='drug_docs',
|
||||
embedding=embeddings,
|
||||
es_user = "elastic",
|
||||
es_password = "sSz2BEGv56JRNjGFwoQ191RJ",
|
||||
)
|
||||
else:
|
||||
logger.info("LOCAL ELASTIC")
|
||||
vectorstore = ElasticsearchStore(
|
||||
es_url="http://localhost:9200",
|
||||
index_name='drug_docs',
|
||||
embedding=embeddings,
|
||||
es_user='elastic',
|
||||
es_password='sSz2BEGv56JRNjGFwoQ191RJ'
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(f"Подключение установлено к {'облачному' if config.get('useCloud', False) else 'локальному'} Elasticsearch")
|
||||
|
||||
# LLM
|
||||
llm = CustomMistralLLM(
|
||||
api_key=mistral_api_key,
|
||||
endpoint_url="https://api.mistral.ai/v1/chat/completions"
|
||||
)
|
||||
|
||||
|
||||
|
||||
def process_query_with_mistral(query, k=10):
|
||||
logger.info("Обработка запроса началась.")
|
||||
try:
|
||||
# Elasticsearch LangChain
|
||||
response = vectorstore.similarity_search(query, k=k)
|
||||
if not response:
|
||||
return {"summary": "Ничего не найдено", "links": [], "status_log": ["Ничего не найдено."]}
|
||||
@ -75,8 +97,12 @@ def process_query_with_mistral(query, k=10):
|
||||
)
|
||||
|
||||
summary = llm.generate_text(prompt=structured_prompt, max_tokens=512, temperature=0.7)
|
||||
return {"summary": summary, "links": links, "status_log": ["Ответ получен от модели Mistral."]}
|
||||
|
||||
#TextSplitter
|
||||
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
|
||||
split_summary = splitter.split_text(summary)
|
||||
|
||||
return {"summary": split_summary, "links": links, "status_log": ["Ответ получен от модели Mistral."]}
|
||||
except Exception as e:
|
||||
logger.info(f"Ошибка: {str(e)}")
|
||||
return {"summary": "Произошла ошибка", "links": [], "status_log": [f"Ошибка: {str(e)}"]}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user