implemented cloud requesting elasticsearch and more through langchain
This commit is contained in:
parent
a527489f43
commit
8b2aad77aa
Binary file not shown.
3
Backend/config.json
Normal file
3
Backend/config.json
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"useCloud" : false
|
||||||
|
}
|
41
Backend/indexCloud.py
Normal file
41
Backend/indexCloud.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
from elasticsearch import Elasticsearch
|
||||||
|
from langchain_huggingface import HuggingFaceEmbeddings
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
es = Elasticsearch(
|
||||||
|
cloud_id="tt:dXMtZWFzdC0yLmF3cy5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOGM3ODQ0ZWVhZTEyNGY3NmFjNjQyNDFhNjI4NmVhYzMkZTI3YjlkNTQ0ODdhNGViNmEyMTcxMjMxNmJhMWI0ZGU=",
|
||||||
|
basic_auth=("elastic", "sSz2BEGv56JRNjGFwoQ191RJ")
|
||||||
|
)
|
||||||
|
|
||||||
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
||||||
|
|
||||||
|
|
||||||
|
def load_drug_data(json_path):
|
||||||
|
with open(json_path, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def index_documents(data):
|
||||||
|
total_documents = len(data)
|
||||||
|
for i, item in enumerate(data, start=1):
|
||||||
|
doc_text = f"{item['link']} {item.get('pribalovy_letak', '')} {item.get('spc', '')}"
|
||||||
|
|
||||||
|
vector = embeddings.embed_query(doc_text)
|
||||||
|
|
||||||
|
es.index(index='drug_docs', id=i, body={
|
||||||
|
'text': doc_text,
|
||||||
|
'vector': vector,
|
||||||
|
'full_data': item
|
||||||
|
})
|
||||||
|
|
||||||
|
sys.stdout.write(f"\rПроиндексировано {i} из {total_documents} документов")
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
print("\nИндексирование завершено.")
|
||||||
|
|
||||||
|
|
||||||
|
data_path = "../../data_adc_databaza/cleaned_general_info_additional.json"
|
||||||
|
drug_data = load_drug_data(data_path)
|
||||||
|
index_documents(drug_data)
|
@ -2,10 +2,8 @@ import json
|
|||||||
from elasticsearch import Elasticsearch
|
from elasticsearch import Elasticsearch
|
||||||
from langchain_huggingface import HuggingFaceEmbeddings
|
from langchain_huggingface import HuggingFaceEmbeddings
|
||||||
|
|
||||||
|
|
||||||
es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}])
|
es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}])
|
||||||
|
|
||||||
|
|
||||||
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,11 @@
|
|||||||
import os
|
from elasticsearch import Elasticsearch
|
||||||
|
import json
|
||||||
import requests
|
import requests
|
||||||
|
from langchain.chains import SequentialChain
|
||||||
|
from langchain.chains import LLMChain, SequentialChain
|
||||||
from langchain_huggingface import HuggingFaceEmbeddings
|
from langchain_huggingface import HuggingFaceEmbeddings
|
||||||
from langchain_elasticsearch import ElasticsearchStore
|
from langchain_elasticsearch import ElasticsearchStore
|
||||||
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
||||||
@ -9,10 +13,11 @@ logging.basicConfig(level=logging.INFO)
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
mistral_api_key = "hXDC4RBJk1qy5pOlrgr01GtOlmyCBaNs"
|
mistral_api_key = "hXDC4RBJk1qy5pOlrgr01GtOlmyCBaNs"
|
||||||
if not mistral_api_key:
|
if not mistral_api_key:
|
||||||
raise ValueError("API ключ не найден. Убедитесь, что переменная MISTRAL_API_KEY установлена.")
|
raise ValueError("API ключ не найден.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class CustomMistralLLM:
|
class CustomMistralLLM:
|
||||||
@ -38,30 +43,47 @@ class CustomMistralLLM:
|
|||||||
return result.get("choices", [{}])[0].get("message", {}).get("content", "No response")
|
return result.get("choices", [{}])[0].get("message", {}).get("content", "No response")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
logger.info("Загрузка модели HuggingFaceEmbeddings...")
|
logger.info("Загрузка модели HuggingFaceEmbeddings...")
|
||||||
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
||||||
|
|
||||||
|
|
||||||
vectorstore = ElasticsearchStore(
|
config_file_path = "config.json"
|
||||||
es_url="http://localhost:9200",
|
|
||||||
index_name='drug_docs',
|
|
||||||
embedding=embeddings,
|
|
||||||
es_user='elastic',
|
|
||||||
es_password='sSz2BEGv56JRNjGFwoQ191RJ'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
with open(config_file_path, 'r') as config_file:
|
||||||
|
config = json.load(config_file)
|
||||||
|
|
||||||
|
# Cloud ID
|
||||||
|
if config.get("useCloud", False):
|
||||||
|
logger.info("CLOUD ELASTIC")
|
||||||
|
cloud_id = "tt:dXMtZWFzdC0yLmF3cy5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOGM3ODQ0ZWVhZTEyNGY3NmFjNjQyNDFhNjI4NmVhYzMkZTI3YjlkNTQ0ODdhNGViNmEyMTcxMjMxNmJhMWI0ZGU=" # Замените на ваш Cloud ID
|
||||||
|
vectorstore = ElasticsearchStore(
|
||||||
|
es_cloud_id=cloud_id,
|
||||||
|
index_name='drug_docs',
|
||||||
|
embedding=embeddings,
|
||||||
|
es_user = "elastic",
|
||||||
|
es_password = "sSz2BEGv56JRNjGFwoQ191RJ",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info("LOCAL ELASTIC")
|
||||||
|
vectorstore = ElasticsearchStore(
|
||||||
|
es_url="http://localhost:9200",
|
||||||
|
index_name='drug_docs',
|
||||||
|
embedding=embeddings,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Подключение установлено к {'облачному' if config.get('useCloud', False) else 'локальному'} Elasticsearch")
|
||||||
|
|
||||||
|
# LLM
|
||||||
llm = CustomMistralLLM(
|
llm = CustomMistralLLM(
|
||||||
api_key=mistral_api_key,
|
api_key=mistral_api_key,
|
||||||
endpoint_url="https://api.mistral.ai/v1/chat/completions"
|
endpoint_url="https://api.mistral.ai/v1/chat/completions"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def process_query_with_mistral(query, k=10):
|
def process_query_with_mistral(query, k=10):
|
||||||
logger.info("Обработка запроса началась.")
|
logger.info("Обработка запроса началась.")
|
||||||
try:
|
try:
|
||||||
|
# Elasticsearch LangChain
|
||||||
response = vectorstore.similarity_search(query, k=k)
|
response = vectorstore.similarity_search(query, k=k)
|
||||||
if not response:
|
if not response:
|
||||||
return {"summary": "Ничего не найдено", "links": [], "status_log": ["Ничего не найдено."]}
|
return {"summary": "Ничего не найдено", "links": [], "status_log": ["Ничего не найдено."]}
|
||||||
@ -75,8 +97,12 @@ def process_query_with_mistral(query, k=10):
|
|||||||
)
|
)
|
||||||
|
|
||||||
summary = llm.generate_text(prompt=structured_prompt, max_tokens=512, temperature=0.7)
|
summary = llm.generate_text(prompt=structured_prompt, max_tokens=512, temperature=0.7)
|
||||||
return {"summary": summary, "links": links, "status_log": ["Ответ получен от модели Mistral."]}
|
|
||||||
|
#TextSplitter
|
||||||
|
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
|
||||||
|
split_summary = splitter.split_text(summary)
|
||||||
|
|
||||||
|
return {"summary": split_summary, "links": links, "status_log": ["Ответ получен от модели Mistral."]}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.info(f"Ошибка: {str(e)}")
|
logger.info(f"Ошибка: {str(e)}")
|
||||||
return {"summary": "Произошла ошибка", "links": [], "status_log": [f"Ошибка: {str(e)}"]}
|
return {"summary": "Произошла ошибка", "links": [], "status_log": [f"Ошибка: {str(e)}"]}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user