implemented cloud requesting elasticsearch and more through langchain

This commit is contained in:
oleh 2024-10-22 15:39:09 +02:00
parent a527489f43
commit 8b2aad77aa
5 changed files with 85 additions and 17 deletions

3
Backend/config.json Normal file
View File

@ -0,0 +1,3 @@
{
"useCloud" : false
}

41
Backend/indexCloud.py Normal file
View File

@ -0,0 +1,41 @@
from elasticsearch import Elasticsearch
from langchain_huggingface import HuggingFaceEmbeddings
import json
import sys
es = Elasticsearch(
cloud_id="tt:dXMtZWFzdC0yLmF3cy5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOGM3ODQ0ZWVhZTEyNGY3NmFjNjQyNDFhNjI4NmVhYzMkZTI3YjlkNTQ0ODdhNGViNmEyMTcxMjMxNmJhMWI0ZGU=",
basic_auth=("elastic", "sSz2BEGv56JRNjGFwoQ191RJ")
)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
def load_drug_data(json_path):
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
return data
def index_documents(data):
total_documents = len(data)
for i, item in enumerate(data, start=1):
doc_text = f"{item['link']} {item.get('pribalovy_letak', '')} {item.get('spc', '')}"
vector = embeddings.embed_query(doc_text)
es.index(index='drug_docs', id=i, body={
'text': doc_text,
'vector': vector,
'full_data': item
})
sys.stdout.write(f"\rПроиндексировано {i} из {total_documents} документов")
sys.stdout.flush()
print("\nИндексирование завершено.")
data_path = "../../data_adc_databaza/cleaned_general_info_additional.json"
drug_data = load_drug_data(data_path)
index_documents(drug_data)

View File

@ -2,10 +2,8 @@ import json
from elasticsearch import Elasticsearch from elasticsearch import Elasticsearch
from langchain_huggingface import HuggingFaceEmbeddings from langchain_huggingface import HuggingFaceEmbeddings
es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}]) es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}])
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")

View File

@ -1,7 +1,11 @@
import os from elasticsearch import Elasticsearch
import json
import requests import requests
from langchain.chains import SequentialChain
from langchain.chains import LLMChain, SequentialChain
from langchain_huggingface import HuggingFaceEmbeddings from langchain_huggingface import HuggingFaceEmbeddings
from langchain_elasticsearch import ElasticsearchStore from langchain_elasticsearch import ElasticsearchStore
from langchain.text_splitter import RecursiveCharacterTextSplitter
import logging import logging
@ -9,10 +13,11 @@ logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
mistral_api_key = "hXDC4RBJk1qy5pOlrgr01GtOlmyCBaNs" mistral_api_key = "hXDC4RBJk1qy5pOlrgr01GtOlmyCBaNs"
if not mistral_api_key: if not mistral_api_key:
raise ValueError("API ключ не найден. Убедитесь, что переменная MISTRAL_API_KEY установлена.") raise ValueError("API ключ не найден.")
class CustomMistralLLM: class CustomMistralLLM:
@ -38,30 +43,47 @@ class CustomMistralLLM:
return result.get("choices", [{}])[0].get("message", {}).get("content", "No response") return result.get("choices", [{}])[0].get("message", {}).get("content", "No response")
logger.info("Загрузка модели HuggingFaceEmbeddings...") logger.info("Загрузка модели HuggingFaceEmbeddings...")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
config_file_path = "config.json"
with open(config_file_path, 'r') as config_file:
config = json.load(config_file)
# Cloud ID
if config.get("useCloud", False):
logger.info("CLOUD ELASTIC")
cloud_id = "tt:dXMtZWFzdC0yLmF3cy5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOGM3ODQ0ZWVhZTEyNGY3NmFjNjQyNDFhNjI4NmVhYzMkZTI3YjlkNTQ0ODdhNGViNmEyMTcxMjMxNmJhMWI0ZGU=" # Замените на ваш Cloud ID
vectorstore = ElasticsearchStore(
es_cloud_id=cloud_id,
index_name='drug_docs',
embedding=embeddings,
es_user = "elastic",
es_password = "sSz2BEGv56JRNjGFwoQ191RJ",
)
else:
logger.info("LOCAL ELASTIC")
vectorstore = ElasticsearchStore( vectorstore = ElasticsearchStore(
es_url="http://localhost:9200", es_url="http://localhost:9200",
index_name='drug_docs', index_name='drug_docs',
embedding=embeddings, embedding=embeddings,
es_user='elastic',
es_password='sSz2BEGv56JRNjGFwoQ191RJ'
) )
logger.info(f"Подключение установлено к {'облачному' if config.get('useCloud', False) else 'локальному'} Elasticsearch")
# LLM
llm = CustomMistralLLM( llm = CustomMistralLLM(
api_key=mistral_api_key, api_key=mistral_api_key,
endpoint_url="https://api.mistral.ai/v1/chat/completions" endpoint_url="https://api.mistral.ai/v1/chat/completions"
) )
def process_query_with_mistral(query, k=10): def process_query_with_mistral(query, k=10):
logger.info("Обработка запроса началась.") logger.info("Обработка запроса началась.")
try: try:
# Elasticsearch LangChain
response = vectorstore.similarity_search(query, k=k) response = vectorstore.similarity_search(query, k=k)
if not response: if not response:
return {"summary": "Ничего не найдено", "links": [], "status_log": ["Ничего не найдено."]} return {"summary": "Ничего не найдено", "links": [], "status_log": ["Ничего не найдено."]}
@ -75,8 +97,12 @@ def process_query_with_mistral(query, k=10):
) )
summary = llm.generate_text(prompt=structured_prompt, max_tokens=512, temperature=0.7) summary = llm.generate_text(prompt=structured_prompt, max_tokens=512, temperature=0.7)
return {"summary": summary, "links": links, "status_log": ["Ответ получен от модели Mistral."]}
#TextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
split_summary = splitter.split_text(summary)
return {"summary": split_summary, "links": links, "status_log": ["Ответ получен от модели Mistral."]}
except Exception as e: except Exception as e:
logger.info(f"Ошибка: {str(e)}") logger.info(f"Ошибка: {str(e)}")
return {"summary": "Произошла ошибка", "links": [], "status_log": [f"Ошибка: {str(e)}"]} return {"summary": "Произошла ошибка", "links": [], "status_log": [f"Ошибка: {str(e)}"]}