From 8fbe671229a0262fafc4d5b195541fb9ba947462 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sun, 13 Apr 2025 10:48:13 +0000 Subject: [PATCH] upd model file --- sk1/Backend/model.py | 306 +++++++++++++++++++++++++------------------ 1 file changed, 175 insertions(+), 131 deletions(-) diff --git a/sk1/Backend/model.py b/sk1/Backend/model.py index 720a0f0..88ac20e 100644 --- a/sk1/Backend/model.py +++ b/sk1/Backend/model.py @@ -22,17 +22,36 @@ mistral_api_key = "hXDC4RBJk1qy5pOlrgr01GtOlmyCBaNs" if not mistral_api_key: raise ValueError("Mistral API key not found in configuration.") + ############################################################################### -# Jednoduché funkcie pre preklad (stub) +# Simple functions for translation (stub) ############################################################################### def translate_to_slovak(text: str) -> str: return text + def translate_preserving_medicine_names(text: str) -> str: return text + ############################################################################### -# Funkcia pre vyhodnotenie úplnosti odpovede +# Function for generating detailed evaluation description via Mistral +############################################################################### +def generate_detailed_description(query: str, answer: str, rating: float) -> str: + prompt = ( + f"Podrobne opíš, prečo odpoveď: '{answer}' na otázku: '{query}' dosiahla hodnotenie {rating} zo 10. " + "Uveď relevantné aspekty, ktoré ovplyvnili toto hodnotenie, vrátane úplnosti, presnosti a kvality vysvetlenia." + ) + try: + description = llm_small.generate_text(prompt=prompt, max_tokens=150, temperature=0.5) + return description.strip() + except Exception as e: + logger.error(f"Error generating detailed description: {e}") + return "Nie je dostupný podrobný popis." + + +############################################################################### +# Function for evaluating the completeness of the answer ############################################################################### def evaluate_complete_answer(query: str, answer: str) -> dict: evaluation_prompt = ( @@ -48,12 +67,13 @@ def evaluate_complete_answer(query: str, answer: str) -> dict: try: score = float(score_str.strip()) except Exception as e: - logger.error(f"Chyba pri parsovaní skóre: {e}") + logger.error(f"Error parsing evaluation score: {e}") score = 0.0 - return {"rating": round(score, 2), "explanation": "Vyhodnotenie na základe požadovaných kritérií."} + return {"rating": round(score, 2), "explanation": "Evaluation based on required criteria."} + ############################################################################### -# Funkcia pre validáciu logiky odpovede +# Function for validating the response logic ############################################################################### def validate_answer_logic(query: str, answer: str) -> str: validation_prompt = ( @@ -66,14 +86,39 @@ def validate_answer_logic(query: str, answer: str) -> str: ) try: validated_answer = llm_small.generate_text(prompt=validation_prompt, max_tokens=800, temperature=0.5) - logger.info(f"Validovaná odpoveď: {validated_answer}") + logger.info(f"Validated answer: {validated_answer}") return validated_answer except Exception as e: - logger.error(f"Chyba pri validácii odpovede: {e}") + logger.error(f"Error during answer validation: {e}") return answer + ############################################################################### -# Funkcia pre vytvorenie dynamického promptu s informáciami z dokumentov +# Function for logging the evaluation result to file +############################################################################### +def log_evaluation_to_file(model: str, search_type: str, rating: float, detailed_desc: str, answer: str): + # Nahradenie medzier podčiarkovníkmi pre názov modelu + safe_model = model.replace(" ", "_") + file_name = f"{safe_model}_{search_type}.txt" + timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + + log_entry = ( + f"Timestamp: {timestamp}\n" + f"Rating: {rating}/10\n" + f"Detailed description:\n{detailed_desc}\n" + f"Answer:\n{answer}\n" + + "=" * 80 + "\n\n" + ) + try: + with open(file_name, "a", encoding="utf-8") as f: + f.write(log_entry) + logger.info(f"Hodnotenie bolo zapísané do súboru {file_name}.") + except Exception as e: + logger.error(f"Error writing evaluation to file {file_name}: {e}") + + +############################################################################### +# Function for creating a dynamic prompt with information from documents ############################################################################### def build_dynamic_prompt(query: str, documents: list) -> str: documents_str = "\n".join(documents) @@ -90,8 +135,9 @@ def build_dynamic_prompt(query: str, documents: list) -> str: ) return prompt + ############################################################################### -# Funkcia na získanie používateľských dát z databázy prostredníctvom endpointu /api/get_user_data +# Function to get user data from the database via endpoint /api/get_user_data ############################################################################### def get_user_data_from_db(chat_id: str) -> str: try: @@ -100,13 +146,14 @@ def get_user_data_from_db(chat_id: str) -> str: data = response.json() return data.get("user_data", "") else: - logger.warning(f"Nepodarilo sa získať user_data, status: {response.status_code}") + logger.warning(f"Nezískané user_data, status: {response.status_code}") except Exception as e: - logger.error(f"Chyba pri získavaní user_data z DB: {e}", exc_info=True) + logger.error(f"Error retrieving user_data from DB: {e}", exc_info=True) return "" + ############################################################################### -# Trieda pre volanie Mistral LLM +# Class for calling Mistral LLM ############################################################################### class CustomMistralLLM: def __init__(self, api_key: str, endpoint_url: str, model_name: str): @@ -131,54 +178,72 @@ class CustomMistralLLM: response = requests.post(self.endpoint_url, headers=headers, json=payload) response.raise_for_status() result = response.json() - logger.info(f"Úplná odpoveď od modelu {self.model_name}: {result}") + logger.info(f"Full response from model {self.model_name}: {result}") return result.get("choices", [{}])[0].get("message", {}).get("content", "No response") except HTTPError as e: if response.status_code == 429: - logger.warning(f"Rate limit prekročený. Čakám {delay} sekúnd pred ďalšou skúškou.") + logger.warning(f"Rate limit exceeded. Waiting {delay} seconds before retry.") time.sleep(delay) attempt += 1 else: - logger.error(f"HTTP chyba: {e}") + logger.error(f"HTTP Error: {e}") raise e except Exception as ex: - logger.error(f"Chyba: {str(ex)}") + logger.error(f"Error: {str(ex)}") raise ex - raise Exception("Dosiahnutý maximálny počet pokusov pre API request") + raise Exception("Reached maximum number of retries for API request") + ############################################################################### -# Funkcia pre kontrolu, či správa súvisí s témou medicíny a liekov +# Initialisation of Embeddings and Elasticsearch ############################################################################### -def check_if_message_is_relevant(query: str) -> (bool, str): - # Ak je dotaz rovnaký s textami pre doplňujúce informácie, preskočíme kontrolu - missing_msgs = [ - "Prosím, uveďte vek pacienta.", - "Má pacient nejaké chronické ochorenia alebo alergie?", - "Ide o liek na predpis alebo voľnopredajný liek?" - ] - if query.strip() in missing_msgs: - return True, "Ano" +logger.info("Loading HuggingFaceEmbeddings model...") +embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") - prompt_relevance = ( - f"Pozri si nasledujúci dotaz užívateľa: '{query}'.\n" - "Patrí tento dotaz logicky do témy medicíny a odporúčaní liekov? " - "Ak áno, odpíš presne slovom 'Ano'. Ak nie, uveď dôvod, prečo sa dotaz netýka našej témy." +index_name = "drug_docs" +if config.get("useCloud", False): + logger.info("Using cloud Elasticsearch.") + cloud_id = "tt:dXMtZWFzdC0yLmF3cy5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOGM3ODQ0ZWVhZTEyNGY3NmFjNjQyNDFhNjI4NmVhYzMkZTI3YjlkNTQ0ODdhNGViNmEyMTcxMjMxNmJhMWI0ZGU=" + vectorstore = ElasticsearchStore( + es_cloud_id=cloud_id, + index_name=index_name, + embedding=embeddings, + es_user="elastic", + es_password="sSz2BEGv56JRNjGFwoQ191RJ" ) - response = llm_small.generate_text(prompt=prompt_relevance, max_tokens=200, temperature=0.3) - response = response.strip() - if response.lower() == "ano": - return True, "Ano" - else: - return False, response +else: + logger.info("Using local Elasticsearch.") + vectorstore = ElasticsearchStore( + es_url="http://elasticsearch:9200", + index_name=index_name, + embedding=embeddings, + ) + +logger.info("Connected to Elasticsearch.") ############################################################################### -# Funkcia pre klasifikáciu dopytu: vyhľadávanie vs. upresnenie +# Initialisation of LLM small & large +############################################################################### +llm_small = CustomMistralLLM( + api_key=mistral_api_key, + endpoint_url="https://api.mistral.ai/v1/chat/completions", + model_name="mistral-small-latest" +) +llm_large = CustomMistralLLM( + api_key=mistral_api_key, + endpoint_url="https://api.mistral.ai/v1/chat/completions", + model_name="mistral-large-latest" +) + + +############################################################################### +# Request classification function: vyhladavanie vs. upresnenie ############################################################################### def classify_query(query: str, chat_history: str = "") -> str: if not chat_history.strip(): return "vyhladavanie" prompt = ( - "Si zdravotnícky expert, ktorý analyzuje otázky používateľov. " + "Ty si zdravotnícky expert, ktorý analyzuje otázky používateľov. " "Analyzuj nasledujúci dopyt a urči, či ide o dopyt na vyhľadanie liekov alebo " "o upresnenie/doplnenie už poskytnutej odpovede.\n" "Ak dopyt obsahuje výrazy ako 'čo pit', 'aké lieky', 'odporuč liek', 'hľadám liek', " @@ -196,15 +261,16 @@ def classify_query(query: str, chat_history: str = "") -> str: return "upresnenie" return "vyhladavanie" + ############################################################################### -# Šablóna pre upresnenie dopytu +# Template for upresnenie dopytu ############################################################################### def build_upresnenie_prompt_no_history(chat_history: str, user_query: str) -> str: prompt = f""" -Si zdravotnícky expert. Máš k dispozícii históriu chatu a novú upresňujúcu otázku. +Ty si zdravotnícky expert. Máš k dispozícii históriu chatu a novú upresňujúcu otázku. Ak v histórii chatu už existuje jasná odpoveď na túto upresňujúcu otázku, napíš: -"FOUND_IN_HISTORY: <ľudský vysvetľujúci text>" +"FOUND_IN_HISTORY: <ľudský vysvetľajúci text>" Ak však v histórii chatu nie je dostatok informácií, napíš: "NO_ANSWER_IN_HISTORY: " @@ -220,8 +286,9 @@ Upresňujúca otázka od používateľa: """ return prompt + ############################################################################### -# Funkcia pre získanie posledného vyhľadávacieho dopytu z histórie +# Function for retrieving the last vyhladavacieho dopytu z histórie ############################################################################### def extract_last_vyhladavacie_query(chat_history: str) -> str: lines = chat_history.splitlines() @@ -232,8 +299,9 @@ def extract_last_vyhladavacie_query(chat_history: str) -> str: break return last_query + ############################################################################### -# Trieda pre agenta konverzácie (dátové ukladanie: vek, anamnéza, predpis, user_data, search_query) +# Agent class for data storage: vek, anamneza, predpis, user_data, search_query ############################################################################### class ConversationalAgent: def __init__(self): @@ -291,13 +359,15 @@ class ConversationalAgent: def ask_follow_up(self, missing_info: dict) -> str: return " ".join(missing_info.values()) + ############################################################################### -# Hlavná funkcia process_query_with_mistral s aktualizovanou logikou +# Main function process_query_with_mistral with updated logic and logging ############################################################################### CHAT_HISTORY_ENDPOINT = "http://localhost:5000/api/chat_history_detail" + def process_query_with_mistral(query: str, chat_id: str, chat_context: str, k=10): - logger.info("Spustenie spracovania dopytu.") + logger.info("Processing query started.") chat_history = "" if chat_context: @@ -319,17 +389,6 @@ def process_query_with_mistral(query: str, chat_id: str, chat_context: str, k=10 except Exception as e: logger.error(f"Chyba pri načítaní histórie: {e}") - # Kontrola relevancie správy - is_relevant, relevance_response = check_if_message_is_relevant(query) - if not is_relevant: - logger.info("Dotaz sa netýka témy medicíny, vraciam vysvetlenie.") - return { - "best_answer": relevance_response, - "model": "RelevanceCheck", - "rating": 0, - "explanation": "Dotaz sa netýka témy medicíny a odporúčaní liekov." - } - agent = ConversationalAgent() if chat_history: agent.load_memory_from_history(chat_history) @@ -348,11 +407,12 @@ def process_query_with_mistral(query: str, chat_id: str, chat_context: str, k=10 try: update_response = requests.post("http://localhost:5000/api/save_user_data", json=update_payload) if update_response.status_code == 200: - logger.info("Používateľské dáta boli úspešne aktualizované cez endpoint /api/save_user_data (data question flag).") + logger.info( + "User data was successfully updated via endpoint /api/save_user_data (data question flag).") else: - logger.warning(f"Neúspešná aktualizácia dát (data question flag): {update_response.text}") + logger.warning(f"Failed to update data (data question flag): {update_response.text}") except Exception as e: - logger.error(f"Chyba pri aktualizácii user_data cez endpoint (data question flag): {e}") + logger.error(f"Error when updating user_data via endpoint (data question flag): {e}") if missing_info: logger.info(f"Chýbajúce informácie: {missing_info}") @@ -363,30 +423,34 @@ def process_query_with_mistral(query: str, chat_id: str, chat_context: str, k=10 try: update_response = requests.post("http://localhost:5000/api/save_user_data", json=update_payload) if update_response.status_code == 200: - logger.info("Používateľské dáta boli úspešne aktualizované cez endpoint /api/save_user_data.") + logger.info("User data was successfully updated via endpoint /api/save_user_data.") else: - logger.warning(f"Neúspešná aktualizácia dát: {update_response.text}") + logger.warning(f"Failed to update the data: {update_response.text}") except Exception as e: - logger.error(f"Chyba pri aktualizácii user_data cez endpoint: {e}") + logger.error(f"Error when updating user_data via endpoint: {e}") return { "best_answer": combined_missing_text, "model": "FollowUp (new chat)", "rating": 0, - "explanation": "Pre pokračovanie je potrebné doplniť ďalšie údaje.", + "explanation": "Additional data pre pokračovanie is required.", "patient_data": query } qtype = classify_query(query, chat_history) logger.info(f"Typ dopytu: {qtype}") - logger.info(f"Časť histórie chatu: {chat_history[:200]}...") + logger.info(f"Chat context (snippet): {chat_history[:200]}...") + + # Určenie typu vyhľadávania: "vector" pre upresnenie, inak "text" + search_type = "vector" if qtype == "upresnenie" else "text" if qtype == "vyhladavanie": user_data_db = get_user_data_from_db(chat_id) if user_data_db: - query = query + " Údaje človeka: " + user_data_db + query = query + " Udaje cloveka: " + user_data_db agent.long_term_memory["search_query"] = query if qtype == "upresnenie": + # Kombinácia pôvodného vyhľadávacieho dopytu a upresňujúcej otázky original_search = agent.long_term_memory.get("search_query") if not original_search: original_search = extract_last_vyhladavacie_query(chat_history) @@ -395,21 +459,21 @@ def process_query_with_mistral(query: str, chat_id: str, chat_context: str, k=10 combined_query = (original_search + " " + query).strip() user_data_db = get_user_data_from_db(chat_id) if user_data_db: - combined_query += " Údaje človeka: " + user_data_db - logger.info(f"Kombinovaný dopyt pre vyhľadávanie: {combined_query}") + combined_query += " Udaje cloveka: " + user_data_db + logger.info(f"Combined query for search: {combined_query}") upres_prompt = build_upresnenie_prompt_no_history(chat_history, combined_query) response_str = llm_small.generate_text(upres_prompt, max_tokens=1200, temperature=0.5) normalized = response_str.strip() - logger.info(f"Odpoveď na prompt pre upresnenie: {normalized}") + logger.info(f"Upresnenie prompt response: {normalized}") if re.match(r"(?i)^found_in_history:\s*", normalized): - logger.info("Nájdené FOUND_IN_HISTORY – vykonávam vyhľadávanie s kombinovaným dopytom.") + logger.info("Zistený FOUND_IN_HISTORY – vykonávame vyhľadávanie s kombinovaným dopytom.") elif re.match(r"(?i)^no_answer_in_history:\s*", normalized): parts = re.split(r"(?i)^no_answer_in_history:\s*", normalized, maxsplit=1) if len(parts) >= 2: combined_query = parts[1].strip() - logger.info(f"Upravený vyhľadávací dopyt z NO_ANSWER_IN_HISTORY: {combined_query}") + logger.info(f"Upravený vyhľadávací dopyт z NO_ANSWER_IN_HISTORY: {combined_query}") vector_results = vectorstore.similarity_search(combined_query, k=k) max_docs = 5 @@ -420,7 +484,7 @@ def process_query_with_mistral(query: str, chat_id: str, chat_context: str, k=10 "best_answer": "Ľutujem, nenašli sa žiadne relevantné informácie.", "model": "Upresnenie-NoResults", "rating": 0, - "explanation": "Žiadne výsledky z vyhľadávania." + "explanation": "No results from search." } joined_docs = "\n".join(vector_docs) final_prompt = ( @@ -430,6 +494,7 @@ def process_query_with_mistral(query: str, chat_id: str, chat_context: str, k=10 "Vygeneruj odporúčanie liekov alebo vysvetlenie, ak je to relevantné.\n" "Prosím, odpovedaj stručne a dostatočne, bez nadmernej dĺžky." ) + # Volanie oboch modelov pre upresnenie (vectorový dopyt) ans_small = llm_small.generate_text(final_prompt, max_tokens=1200, temperature=0.7) ans_large = llm_large.generate_text(final_prompt, max_tokens=1200, temperature=0.7) val_small = validate_answer_logic(combined_query, ans_small) @@ -437,27 +502,25 @@ def process_query_with_mistral(query: str, chat_id: str, chat_context: str, k=10 eval_small = evaluate_complete_answer(combined_query, val_small) eval_large = evaluate_complete_answer(combined_query, val_large) candidates = [ - {"summary": val_small, "eval": eval_small, "model": "Mistral Small"}, - {"summary": val_large, "eval": eval_large, "model": "Mistral Large"}, + {"model": "Mistral Small", "summary": val_small, "eval": eval_small}, + {"model": "Mistral Large", "summary": val_large, "eval": eval_large}, ] + # Pre každého kandidáta vygenerujeme detailný popis a zapíšeme výsledok do príslušného súboru + for candidate in candidates: + detailed_desc = generate_detailed_description(combined_query, candidate["summary"], + candidate["eval"]["rating"]) + log_evaluation_to_file(candidate["model"], "vector", candidate["eval"]["rating"], detailed_desc, + candidate["summary"]) + best = max(candidates, key=lambda x: x["eval"]["rating"]) logger.info(f"Odpoveď od modelu {best['model']} má rating: {best['eval']['rating']}/10") - evaluation_table = "=== Výsledky hodnotenia odpovedí ===\n" - evaluation_table += "{:<15} | {:<6} | {:<60}\n".format("Model", "Rating", "Evaluovaný text") - evaluation_table += "-" * 100 + "\n" - for candidate in candidates: - model_name = candidate["model"] - rating = candidate["eval"]["rating"] - evaluated_text = candidate["summary"].replace("\n", " ") - evaluation_table += "{:<15} | {:<6} | {:<60}\n".format(model_name, rating, evaluated_text) - evaluation_table += "=" * 100 + "\n" - final_answer = translate_preserving_medicine_names(best["summary"]) memory_json = json.dumps(agent.long_term_memory) memory_block = f"[MEMORY]{memory_json}[/MEMORY]" final_answer_with_memory = final_answer + "\n\n" + return { "best_answer": final_answer_with_memory, "model": best["model"], @@ -465,6 +528,7 @@ def process_query_with_mistral(query: str, chat_id: str, chat_context: str, k=10 "explanation": best["eval"]["explanation"] } + # Vetva pre vyhľadávanie typu "vyhladavanie" (textový dopyt) vector_results = vectorstore.similarity_search(query, k=k) max_docs = 5 max_len = 1000 @@ -474,7 +538,7 @@ def process_query_with_mistral(query: str, chat_id: str, chat_context: str, k=10 "best_answer": "Ľutujem, nenašli sa žiadne relevantné informácie.", "model": "Vyhladavanie-NoDocs", "rating": 0, - "explanation": "Žiadne výsledky" + "explanation": "No results" } joined_docs = "\n".join(vector_docs) final_prompt = ( @@ -484,54 +548,34 @@ def process_query_with_mistral(query: str, chat_id: str, chat_context: str, k=10 "Vygeneruj odporúčanie liekov alebo vysvetlenie, ak je to relevantné.\n" "Prosím, odpovedaj stručne a dostatočne, bez nadmernej dĺžky." ) - answer = llm_small.generate_text(final_prompt, max_tokens=1200, temperature=0.7) + # Volanie oboch modelov pre textový dopyt + ans_small = llm_small.generate_text(final_prompt, max_tokens=1200, temperature=0.7) + ans_large = llm_large.generate_text(final_prompt, max_tokens=1200, temperature=0.7) + val_small = validate_answer_logic(query, ans_small) + val_large = validate_answer_logic(query, ans_large) + eval_small = evaluate_complete_answer(query, val_small) + eval_large = evaluate_complete_answer(query, val_large) + candidates = [ + {"model": "Mistral Small", "summary": val_small, "eval": eval_small}, + {"model": "Mistral Large", "summary": val_large, "eval": eval_large}, + ] + # Logovanie výsledkov do súborov pre textový dopyt + for candidate in candidates: + detailed_desc = generate_detailed_description(query, candidate["summary"], candidate["eval"]["rating"]) + log_evaluation_to_file(candidate["model"], "text", candidate["eval"]["rating"], detailed_desc, + candidate["summary"]) + + best = max(candidates, key=lambda x: x["eval"]["rating"]) + logger.info(f"Odpoveď od modelu {best['model']} má rating: {best['eval']['rating']}/10") + + final_answer = translate_preserving_medicine_names(best["summary"]) memory_json = json.dumps(agent.long_term_memory) memory_block = f"[MEMORY]{memory_json}[/MEMORY]" - answer_with_memory = answer + "\n\n" + final_answer_with_memory = final_answer + "\n\n" return { - "best_answer": answer_with_memory, - "model": "Vyhladavanie-Final", - "rating": 9, - "explanation": "Vyhľadávacia cesta" + "best_answer": final_answer_with_memory, + "model": best["model"], + "rating": best["eval"]["rating"], + "explanation": best["eval"]["explanation"] } -############################################################################### -# Inicializácia Embeddings a Elasticsearch -############################################################################### -logger.info("Načítavam model HuggingFaceEmbeddings...") -embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") - -index_name = "drug_docs" -if config.get("useCloud", False): - logger.info("Používam cloud Elasticsearch.") - cloud_id = "tt:dXMtZWFzdC0yLmF3cy5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOGM3ODQ0ZWVhZTEyNGY3NmFjNjQyNDFhNjI4NmVhYzMkZTI3YjlkNTQ0ODdhNGViNmEyMTcxMjMxNmJhMWI0ZGU=" - vectorstore = ElasticsearchStore( - es_cloud_id=cloud_id, - index_name=index_name, - embedding=embeddings, - es_user="elastic", - es_password="sSz2BEGv56JRNjGFwoQ191RJ" - ) -else: - logger.info("Používam lokálny Elasticsearch.") - vectorstore = ElasticsearchStore( - es_url="http://elasticsearch:9200", - index_name=index_name, - embedding=embeddings, - ) - -logger.info("Pripojenie k Elasticsearch bolo úspešné.") - -############################################################################### -# Inicializácia LLM small a large -############################################################################### -llm_small = CustomMistralLLM( - api_key=mistral_api_key, - endpoint_url="https://api.mistral.ai/v1/chat/completions", - model_name="mistral-small-latest" -) -llm_large = CustomMistralLLM( - api_key=mistral_api_key, - endpoint_url="https://api.mistral.ai/v1/chat/completions", - model_name="mistral-large-latest" -)