implemented cloud requesting elasticsearch and more through langchain

2024-10-22 15:39:09 +02:00 · 2024-10-22 15:39:09 +02:00 · 8b2aad77aa
commit 8b2aad77aa
parent a527489f43
5 changed files with 85 additions and 17 deletions
--- a/Backend/pycache/model.cpython-311.pyc
+++ b/Backend/pycache/model.cpython-311.pyc
--- a/Backend/config.json
+++ b/Backend/config.json
@ -0,0 +1,3 @@
 {
  "useCloud" : false
 }
--- a/Backend/indexCloud.py
+++ b/Backend/indexCloud.py
@ -0,0 +1,41 @@
 from elasticsearch import Elasticsearch
 from langchain_huggingface import HuggingFaceEmbeddings
 import json
 import sys
 es = Elasticsearch(
    cloud_id="tt:dXMtZWFzdC0yLmF3cy5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOGM3ODQ0ZWVhZTEyNGY3NmFjNjQyNDFhNjI4NmVhYzMkZTI3YjlkNTQ0ODdhNGViNmEyMTcxMjMxNmJhMWI0ZGU=",
    basic_auth=("elastic", "sSz2BEGv56JRNjGFwoQ191RJ")
 )
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
 def load_drug_data(json_path):
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data
 def index_documents(data):
    total_documents = len(data)
    for i, item in enumerate(data, start=1):
        doc_text = f"{item['link']} {item.get('pribalovy_letak', '')} {item.get('spc', '')}"
        vector = embeddings.embed_query(doc_text)
        es.index(index='drug_docs', id=i, body={
            'text': doc_text,
            'vector': vector,
            'full_data': item
        })
        sys.stdout.write(f"\rПроиндексировано {i} из {total_documents} документов")
        sys.stdout.flush()
    print("\nИндексирование завершено.")
 data_path = "../../data_adc_databaza/cleaned_general_info_additional.json"
 drug_data = load_drug_data(data_path)
 index_documents(drug_data)
--- a/Backend/index_JSON.py
+++ b/Backend/index_JSON.py
@ -2,10 +2,8 @@ import json
 from elasticsearch import Elasticsearch
 from langchain_huggingface import HuggingFaceEmbeddings
 es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}])
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
--- a/Backend/model.py
+++ b/Backend/model.py
@ -1,7 +1,11 @@
-import os
+from elasticsearch import Elasticsearch
 import json
 import requests
 from langchain.chains import SequentialChain
 from langchain.chains import LLMChain, SequentialChain
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_elasticsearch import ElasticsearchStore
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import logging
@ -9,10 +13,11 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 mistral_api_key = "hXDC4RBJk1qy5pOlrgr01GtOlmyCBaNs"
 if not mistral_api_key:
-    raise ValueError("API ключ не найден. Убедитесь, что переменная MISTRAL_API_KEY установлена.")
+    raise ValueError("API ключ не найден.")
 class CustomMistralLLM:
@ -38,30 +43,47 @@ class CustomMistralLLM:
        return result.get("choices", [{}])[0].get("message", {}).get("content", "No response")
 logger.info("Загрузка модели HuggingFaceEmbeddings...")
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
-vectorstore = ElasticsearchStore(
+config_file_path = "config.json"
    es_url="http://localhost:9200",
    index_name='drug_docs',
    embedding=embeddings,
    es_user='elastic',
    es_password='sSz2BEGv56JRNjGFwoQ191RJ'
 )
 with open(config_file_path, 'r') as config_file:
    config = json.load(config_file)
 #  Cloud ID
 if config.get("useCloud", False):
    logger.info("CLOUD ELASTIC")
    cloud_id = "tt:dXMtZWFzdC0yLmF3cy5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOGM3ODQ0ZWVhZTEyNGY3NmFjNjQyNDFhNjI4NmVhYzMkZTI3YjlkNTQ0ODdhNGViNmEyMTcxMjMxNmJhMWI0ZGU="  # Замените на ваш Cloud ID
    vectorstore = ElasticsearchStore(
        es_cloud_id=cloud_id,
        index_name='drug_docs',
        embedding=embeddings,
        es_user = "elastic",
        es_password = "sSz2BEGv56JRNjGFwoQ191RJ",
    )
 else:
    logger.info("LOCAL ELASTIC")
    vectorstore = ElasticsearchStore(
        es_url="http://localhost:9200",
        index_name='drug_docs',
        embedding=embeddings,
    )
 logger.info(f"Подключение установлено к {'облачному' if config.get('useCloud', False) else 'локальному'} Elasticsearch")
 # LLM
 llm = CustomMistralLLM(
    api_key=mistral_api_key,
    endpoint_url="https://api.mistral.ai/v1/chat/completions"
 )
 def process_query_with_mistral(query, k=10):
    logger.info("Обработка запроса началась.")
    try:
        # Elasticsearch LangChain
        response = vectorstore.similarity_search(query, k=k)
        if not response:
            return {"summary": "Ничего не найдено", "links": [], "status_log": ["Ничего не найдено."]}
@ -75,8 +97,12 @@ def process_query_with_mistral(query, k=10):
        )
        summary = llm.generate_text(prompt=structured_prompt, max_tokens=512, temperature=0.7)
-        return {"summary": summary, "links": links, "status_log": ["Ответ получен от модели Mistral."]}
+
        #TextSplitter
        splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
        split_summary = splitter.split_text(summary)
        return {"summary": split_summary, "links": links, "status_log": ["Ответ получен от модели Mistral."]}
    except Exception as e:
        logger.info(f"Ошибка: {str(e)}")
        return {"summary": "Произошла ошибка", "links": [], "status_log": [f"Ошибка: {str(e)}"]}