diff --git a/Backend/__pycache__/model.cpython-311.pyc b/Backend/__pycache__/model.cpython-311.pyc index dcfc304..ea50eff 100644 Binary files a/Backend/__pycache__/model.cpython-311.pyc and b/Backend/__pycache__/model.cpython-311.pyc differ diff --git a/Backend/model.py b/Backend/model.py index 90c5091..58ca380 100644 --- a/Backend/model.py +++ b/Backend/model.py @@ -3,6 +3,7 @@ import requests import logging import time import re +import difflib from requests.exceptions import HTTPError from elasticsearch import Elasticsearch from langchain.chains import SequentialChain @@ -11,48 +12,80 @@ from langchain_huggingface import HuggingFaceEmbeddings from langchain_elasticsearch import ElasticsearchStore from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.docstore.document import Document -from googletrans import Translator # Translator for final polishing +# from googletrans import Translator logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -# Load configuration config_file_path = "config.json" with open(config_file_path, 'r') as config_file: config = json.load(config_file) -# Load Mistral API key mistral_api_key = "hXDC4RBJk1qy5pOlrgr01GtOlmyCBaNs" if not mistral_api_key: raise ValueError("Mistral API key not found in configuration.") - ############################################################################### -# Function to translate entire text to Slovak # +# translate all answer to slovak(temporary closed :) ) # ############################################################################### -translator = Translator() - +# translator = Translator() def translate_to_slovak(text: str) -> str: """ - Translates the entire text into Slovak. - Logs the text before and after translation. + Переводит весь текст на словацкий с логированием изменений. + Сейчас функция является заглушкой и возвращает исходный текст без изменений. """ - if not text.strip(): - return text - - try: - # 1) Slovak (or any language) -> English - mid_result = translator.translate(text, src='auto', dest='en').text - - # 2) English -> Slovak - final_result = translator.translate(mid_result, src='en', dest='sk').text - - return final_result - except Exception as e: - logger.error(f"Translation error: {e}") - return text # fallback to the original text - + # if not text.strip(): + # return text + # + # logger.info("Translation - Before: " + text) + # try: + # mid_result = translator.translate(text, src='auto', dest='en').text + # final_result = translator.translate(mid_result, src='en', dest='sk').text + # logger.info("Translation - After: " + final_result) + # before_words = text.split() + # after_words = final_result.split() + # diff = list(difflib.ndiff(before_words, after_words)) + # changed_words = [word[2:] for word in diff if word.startswith('+ ')] + # if changed_words: + # logger.info("Changed words: " + ", ".join(changed_words)) + # else: + # logger.info("No changed words detected.") + # return final_result + # except Exception as e: + # logger.error(f"Translation error: {e}") + # return text + return text +############################################################################### +# Функция перевода описания лекарства с сохранением названия (до двоеточия) # +############################################################################### +def translate_preserving_medicine_names(text: str) -> str: + """ + Ищет строки вида "номер. Название лекарства: описание..." и переводит только описание, + оставляя название без изменений. + Сейчас функция является заглушкой и возвращает исходный текст без изменений. + """ + # pattern = re.compile(r'^(\d+\.\s*[^:]+:\s*)(.*)$', re.MULTILINE) + # + # def replacer(match): + # prefix = match.group(1) + # description = match.group(2) + # logger.info("Translating description: " + description) + # translated_description = translate_to_slovak(description) + # logger.info("Translated description: " + translated_description) + # diff = list(difflib.ndiff(description.split(), translated_description.split())) + # changed_words = [word[2:] for word in diff if word.startswith('+ ')] + # if changed_words: + # logger.info("Changed words in description: " + ", ".join(changed_words)) + # else: + # logger.info("No changed words in description detected.") + # return prefix + translated_description + # + # if pattern.search(text): + # return pattern.sub(replacer, text) + # else: + # return translate_to_slovak(text) + return text ############################################################################### # Custom Mistral LLM # @@ -83,7 +116,7 @@ class CustomMistralLLM: logger.info(f"Full response from model {self.model_name}: {result}") return result.get("choices", [{}])[0].get("message", {}).get("content", "No response") except HTTPError as e: - if response.status_code == 429: # Too Many Requests + if response.status_code == 429: logger.warning(f"Rate limit exceeded. Waiting {delay} seconds before retry.") time.sleep(delay) attempt += 1 @@ -95,7 +128,6 @@ class CustomMistralLLM: raise e raise Exception("Reached maximum number of retries for API request") - ############################################################################### # Initialize embeddings and Elasticsearch store # ############################################################################### @@ -104,7 +136,6 @@ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase- index_name = 'drug_docs' -# Connect to Elasticsearch if config.get("useCloud", False): logger.info("Using cloud Elasticsearch.") cloud_id = "tt:dXMtZWFzdC0yLmF3cy5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOGM3ODQ0ZWVhZTEyNGY3NmFjNjQyNDFhNjI4NmVhYzMkZTI3YjlkNTQ0ODdhNGViNmEyMTcxMjMxNmJhMWI0ZGU=" @@ -125,7 +156,6 @@ else: logger.info(f"Connected to {'cloud' if config.get('useCloud', False) else 'local'} Elasticsearch.") - ############################################################################### # Initialize Mistral models (small & large) # ############################################################################### @@ -141,41 +171,52 @@ llm_large = CustomMistralLLM( model_name="mistral-large-latest" ) - ############################################################################### # Helper function to evaluate model output # ############################################################################### def evaluate_results(query, summaries, model_name): - """ - Evaluates results by: - - text length, - - presence of query keywords, etc. - Returns a rating and explanation. - """ query_keywords = query.split() total_score = 0 explanation = [] - for i, summary in enumerate(summaries): - # Length-based scoring length_score = min(len(summary) / 100, 10) total_score += length_score explanation.append(f"Document {i+1}: Length score - {length_score}") - - # Keyword-based scoring keyword_matches = sum(1 for word in query_keywords if word.lower() in summary.lower()) keyword_score = min(keyword_matches * 2, 10) total_score += keyword_score explanation.append(f"Document {i+1}: Keyword match score - {keyword_score}") - final_score = total_score / len(summaries) if summaries else 0 explanation_summary = "\n".join(explanation) - logger.info(f"Evaluation for model {model_name}: {final_score}/10") logger.info(f"Explanation:\n{explanation_summary}") - return {"rating": round(final_score, 2), "explanation": explanation_summary} +############################################################################### +# validation of recieved answer is it correct for user question # +############################################################################### +def validate_answer_logic(query: str, answer: str) -> str: + """ + Проверяет, соответствует ли ответ логике вопроса. + Если, например, вопрос относится к ľudským liekom a obsahuje otázku na dávkovanie, + odpoveď musí obsahovať iba lieky vhodné pre ľudí s uvedením správneho dávkovania. + """ + validation_prompt = ( + f"Otázka: '{query}'\n" + f"Odpoveď: '{answer}'\n\n" + "Analyzuj prosím túto odpoveď. Ak odpoveď obsahuje odporúčania liekov, ktoré nie sú vhodné pre ľudí, " + "alebo ak neobsahuje správne informácie o dávkovaní, oprav ju tak, aby bola logicky konzistentná s otázkou. " + "Odpoveď musí obsahovať iba lieky určené pre ľudí a pri potrebe aj presné informácie o dávkovaní (napr. v gramoch). " + "Ak je odpoveď logická a korektná, vráť pôvodnú odpoveď bez zmien. " + "Odpovedz v slovenčine a iba čistou, konečnou odpoveďou bez ďalších komentárov." + ) + try: + validated_answer = llm_small.generate_text(prompt=validation_prompt, max_tokens=500, temperature=0.5) + logger.info(f"Validated answer: {validated_answer}") + return validated_answer + except Exception as e: + logger.error(f"Error during answer validation: {e}") + return answer ############################################################################### # Main function: process_query_with_mistral (Slovak prompt) # @@ -186,29 +227,25 @@ def process_query_with_mistral(query, k=10): # --- Vector search --- vector_results = vectorstore.similarity_search(query, k=k) vector_documents = [hit.metadata.get('text', '') for hit in vector_results] - max_docs = 5 max_doc_length = 1000 vector_documents = [doc[:max_doc_length] for doc in vector_documents[:max_docs]] - if vector_documents: - # Slovak prompt vector_prompt = ( f"Otázka: '{query}'.\n" "Na základe nasledujúcich informácií o liekoch:\n" f"{vector_documents}\n\n" - "Prosím, uveďte tri najvhodnejšie lieky alebo riešenia. Pre každý liek uveďte jeho názov a stručné, jasné vysvetlenie, prečo je vhodný. " - "Odpovedajte priamo a ľudským, priateľským tónom v číslovanom zozname, bez nepotrebných úvodných fráz alebo opisu procesu. " + "Prosím, uveďte tri najvhodnejšie lieky alebo riešenia pre daný problém. " + "Pre každý liek uveďte jeho názov, stručné a jasné vysvetlenie, prečo je vhodný, a ak je to relevantné, " + "aj odporúčané dávkovanie (napr. v gramoch alebo v iných vhodných jednotkách). " + "Odpovedajte priamo a ľudským, priateľským tónom v číslovanom zozname, bez nepotrebných úvodných fráz. " "Odpoveď musí byť v slovenčine." ) - summary_small_vector = llm_small.generate_text(prompt=vector_prompt, max_tokens=700, temperature=0.7) summary_large_vector = llm_large.generate_text(prompt=vector_prompt, max_tokens=700, temperature=0.7) - splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20) split_summary_small_vector = splitter.split_text(summary_small_vector) split_summary_large_vector = splitter.split_text(summary_large_vector) - small_vector_eval = evaluate_results(query, split_summary_small_vector, 'Mistral Small') large_vector_eval = evaluate_results(query, split_summary_large_vector, 'Mistral Large') else: @@ -224,24 +261,22 @@ def process_query_with_mistral(query, k=10): ) text_documents = [hit['_source'].get('text', '') for hit in es_results['hits']['hits']] text_documents = [doc[:max_doc_length] for doc in text_documents[:max_docs]] - if text_documents: - # Slovak prompt text_prompt = ( f"Otázka: '{query}'.\n" "Na základe nasledujúcich informácií o liekoch:\n" f"{text_documents}\n\n" - "Prosím, uveďte tri najvhodnejšie lieky alebo riešenia. Pre každý liek uveďte jeho názov a stručné, jasné vysvetlenie, prečo je vhodný. " - "Odpovedajte priamo a ľudským, priateľským tónom v číslovanom zozname, bez nepotrebných úvodných fráz alebo opisu procesu. " + "Prosím, uveďte tri najvhodnejšie lieky alebo riešenia pre daný problém. " + "Pre každý liek uveďte jeho názov, stručné a jasné vysvetlenie, prečo je vhodný, a ak je to relevantné, " + "aj odporúčané dávkovanie (napr. v gramoch alebo v iných vhodných jednotkách). " + "Odpovedajte priamo a ľudským, priateľským tónom v číslovanom zozname, bez nepotrebných úvodných fráz. " "Odpoveď musí byť v slovenčine." ) - summary_small_text = llm_small.generate_text(prompt=text_prompt, max_tokens=700, temperature=0.7) summary_large_text = llm_large.generate_text(prompt=text_prompt, max_tokens=700, temperature=0.7) - - split_summary_small_text = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20).split_text(summary_small_text) - split_summary_large_text = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20).split_text(summary_large_text) - + splitter_text = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20) + split_summary_small_text = splitter_text.split_text(summary_small_text) + split_summary_large_text = splitter_text.split_text(summary_large_text) small_text_eval = evaluate_results(query, split_summary_small_text, 'Mistral Small') large_text_eval = evaluate_results(query, split_summary_large_text, 'Mistral Large') else: @@ -250,30 +285,31 @@ def process_query_with_mistral(query, k=10): summary_small_text = "" summary_large_text = "" - # Combine all results and pick the best + # Porovnanie výsledkov a výber najlepšieho all_results = [ {"eval": small_vector_eval, "summary": summary_small_vector, "model": "Mistral Small Vector"}, {"eval": large_vector_eval, "summary": summary_large_vector, "model": "Mistral Large Vector"}, {"eval": small_text_eval, "summary": summary_small_text, "model": "Mistral Small Text"}, {"eval": large_text_eval, "summary": summary_large_text, "model": "Mistral Large Text"}, ] - best_result = max(all_results, key=lambda x: x["eval"]["rating"]) logger.info(f"Best result from model {best_result['model']} with score {best_result['eval']['rating']}.") - # Final translation to Slovak (with logs before/after) - polished_answer = translate_to_slovak(best_result["summary"]) + # Dodatočná kontrola logiky odpovede + validated_answer = validate_answer_logic(query, best_result["summary"]) + + polished_answer = translate_preserving_medicine_names(validated_answer) return { "best_answer": polished_answer, "model": best_result["model"], "rating": best_result["eval"]["rating"], "explanation": best_result["eval"]["explanation"] } - except Exception as e: logger.error(f"Error: {str(e)}") return { "best_answer": "An error occurred during query processing.", "error": str(e) } + diff --git a/Backend/server.py b/Backend/server.py index db5cb4a..4583644 100644 --- a/Backend/server.py +++ b/Backend/server.py @@ -1,4 +1,6 @@ import time +import re + # Сохраняем оригинальную функцию time.time _real_time = time.time # Переопределяем time.time для смещения времени на 1 секунду назад @@ -15,7 +17,7 @@ from model import process_query_with_mistral import psycopg2 from psycopg2.extras import RealDictCursor -# Параметры подключения +# Параметры подключения к базе данных DATABASE_CONFIG = { "dbname": "postgres", "user": "postgres", @@ -27,7 +29,6 @@ DATABASE_CONFIG = { # Подключение к базе данных try: conn = psycopg2.connect(**DATABASE_CONFIG) - cursor = conn.cursor(cursor_factory=RealDictCursor) print("Подключение к базе данных успешно установлено") except Exception as e: print(f"Ошибка подключения к базе данных: {e}") @@ -45,15 +46,16 @@ CLIENT_ID = "532143017111-4eqtlp0oejqaovj6rf5l1ergvhrp4vao.apps.googleuserconten def save_user_to_db(name, email, google_id=None, password=None): try: - cursor.execute( - """ - INSERT INTO users (name, email, google_id, password) - VALUES (%s, %s, %s, %s) - ON CONFLICT (email) DO NOTHING - """, - (name, email, google_id, password) - ) - conn.commit() + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute( + """ + INSERT INTO users (name, email, google_id, password) + VALUES (%s, %s, %s, %s) + ON CONFLICT (email) DO NOTHING + """, + (name, email, google_id, password) + ) + conn.commit() print(f"User {name} ({email}) saved successfully!") except Exception as e: print(f"Error saving user to database: {e}") @@ -63,91 +65,154 @@ def save_user_to_db(name, email, google_id=None, password=None): def verify_token(): data = request.get_json() token = data.get('token') - if not token: return jsonify({'error': 'No token provided'}), 400 - try: id_info = id_token.verify_oauth2_token(token, requests.Request(), CLIENT_ID) user_email = id_info.get('email') user_name = id_info.get('name') google_id = id_info.get('sub') # Уникальный идентификатор пользователя Google - save_user_to_db(name=user_name, email=user_email, google_id=google_id) - logger.info(f"User authenticated and saved: {user_name} ({user_email})") return jsonify({'message': 'Authentication successful', 'user': {'email': user_email, 'name': user_name}}), 200 - except ValueError as e: logger.error(f"Token verification failed: {e}") return jsonify({'error': 'Invalid token'}), 400 -# Эндпоинт для регистрации пользователя +# Эндпоинт для регистрации пользователя с проверкой на дублирование @app.route('/api/register', methods=['POST']) def register(): data = request.get_json() name = data.get('name') email = data.get('email') password = data.get('password') # Рекомендуется хэшировать пароль - if not all([name, email, password]): return jsonify({'error': 'All fields are required'}), 400 - try: - # Проверка, существует ли пользователь с таким email - cursor.execute("SELECT * FROM users WHERE email = %s", (email,)) - existing_user = cursor.fetchone() - if existing_user: - return jsonify({'error': 'User already exists'}), 409 - - # Сохранение пользователя в базу данных + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute("SELECT * FROM users WHERE email = %s", (email,)) + existing_user = cur.fetchone() + if existing_user: + return jsonify({'error': 'User already exists'}), 409 save_user_to_db(name=name, email=email, password=password) - return jsonify({'message': 'User registered successfully'}), 201 except Exception as e: return jsonify({'error': str(e)}), 500 -# Эндпоинт для логина пользователя (см. предыдущий пример) +# Эндпоинт для логина пользователя @app.route('/api/login', methods=['POST']) def login(): data = request.get_json() email = data.get('email') password = data.get('password') - if not all([email, password]): return jsonify({'error': 'Email and password are required'}), 400 - try: - cursor.execute("SELECT * FROM users WHERE email = %s", (email,)) - user = cursor.fetchone() - - if not user: - return jsonify({'error': 'Invalid credentials'}), 401 - - # Сравнение простым текстом — в production используйте хэширование! - if user.get('password') != password: - return jsonify({'error': 'Invalid credentials'}), 401 - - return jsonify({ - 'message': 'Login successful', - 'user': { - 'name': user.get('name'), - 'email': user.get('email') - } - }), 200 + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute("SELECT * FROM users WHERE email = %s", (email,)) + user = cur.fetchone() + if not user: + return jsonify({'error': 'Invalid credentials'}), 401 + if user.get('password') != password: + return jsonify({'error': 'Invalid credentials'}), 401 + return jsonify({'message': 'Login successful', 'user': {'name': user.get('name'), 'email': user.get('email')}}), 200 except Exception as e: return jsonify({'error': str(e)}), 500 -# Эндпоинт для обработки запросов от фронтенда +# Объединённый эндпоинт для обработки запроса чата @app.route('/api/chat', methods=['POST']) def chat(): data = request.get_json() query = data.get('query', '') + user_email = data.get('email') # email пользователя (если передается) + chat_id = data.get('chatId') # параметр для обновления существующего чата + if not query: return jsonify({'error': 'No query provided'}), 400 - response = process_query_with_mistral(query) - return jsonify(response) + # Вызов функции для обработки запроса (например, чат-бота) + response_obj = process_query_with_mistral(query) + best_answer = "" + if isinstance(response_obj, dict): + best_answer = response_obj.get("best_answer", "") + else: + best_answer = str(response_obj) + + # Форматирование ответа с использованием re.sub + best_answer = re.sub(r'[*#]', '', best_answer) + best_answer = re.sub(r'(\d\.\s)', r'\n\n\1', best_answer) + best_answer = re.sub(r':\s-', r':\n-', best_answer) + + # Если chatId передан, обновляем существующий чат, иначе создаем новый чат + if chat_id: + try: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute("SELECT chat FROM chat_history WHERE id = %s", (chat_id,)) + existing_chat = cur.fetchone() + if existing_chat: + updated_chat = existing_chat['chat'] + f"\nUser: {query}\nBot: {best_answer}" + cur.execute("UPDATE chat_history SET chat = %s WHERE id = %s", (updated_chat, chat_id)) + conn.commit() + else: + with conn.cursor(cursor_factory=RealDictCursor) as cur2: + cur2.execute( + "INSERT INTO chat_history (user_email, chat) VALUES (%s, %s) RETURNING id", + (user_email, f"User: {query}\nBot: {best_answer}") + ) + new_chat_id = cur2.fetchone()['id'] + conn.commit() + chat_id = new_chat_id + except Exception as e: + return jsonify({'error': str(e)}), 500 + else: + try: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute( + "INSERT INTO chat_history (user_email, chat) VALUES (%s, %s) RETURNING id", + (user_email, f"User: {query}\nBot: {best_answer}") + ) + new_chat_id = cur.fetchone()['id'] + conn.commit() + chat_id = new_chat_id + except Exception as e: + return jsonify({'error': str(e)}), 500 + + # Возвращаем текстовый ответ и новый chatId, если чат был создан + return jsonify({'response': {'best_answer': best_answer, 'model': 'Mistral Small Vector', 'chatId': chat_id}}), 200 + +# Эндпоинт для получения истории чатов конкретного пользователя +@app.route('/api/chat_history', methods=['GET']) +def get_chat_history(): + user_email = request.args.get('email') + if not user_email: + return jsonify({'error': 'User email is required'}), 400 + try: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute( + "SELECT id, chat, created_at FROM chat_history WHERE user_email = %s ORDER BY created_at DESC", + (user_email,) + ) + history = cur.fetchall() + return jsonify({'history': history}), 200 + except Exception as e: + return jsonify({'error': str(e)}), 500 + +# Эндпоинт для получения деталей чата по ID +@app.route('/api/chat_history_detail', methods=['GET']) +def chat_history_detail(): + chat_id = request.args.get('id') + if not chat_id: + return jsonify({'error': 'Chat id is required'}), 400 + try: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute("SELECT id, chat, created_at FROM chat_history WHERE id = %s", (chat_id,)) + chat = cur.fetchone() + if not chat: + return jsonify({'error': 'Chat not found'}), 404 + return jsonify({'chat': chat}), 200 + except Exception as e: + return jsonify({'error': str(e)}), 500 if __name__ == '__main__': app.run(host='0.0.0.0', port=5000, debug=True) + diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 589728b..408819a 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -1,41 +1,41 @@ import { BrowserRouter as Router, Route, Routes, Outlet } from 'react-router-dom'; import Navigation from './Components/Navigation'; -import HomePage from './pages/HomePage'; import LandingPage from './pages/LandingPage'; -import RegistrationForm from "./Components/RegistrationForm.tsx"; -import LoginForm from "./Components/LoginForm.tsx"; - +import RegistrationForm from "./Components/RegistrationForm"; +import LoginForm from "./Components/LoginForm"; +import ChatHistory from "./Components/ChatHistory"; +import HomePage from './pages/HomePage'; +import NewChatPage from "./Components/NewChatPage"; const Layout = () => ( -
{line}
+ ))} +{error}
} + {history.length === 0 && !error ? ( +No chat history found.
+ ) : ( +@@ -80,7 +79,11 @@ const Navigation = ({ isExpanded = false }: NavigationProps) => {
{line}
+ ))} +{line}
- ))} - {msg.rating &&Rating: {msg.rating}
} - {msg.explanation &&Explanation: {msg.explanation}
} + {formattedMessage}I'm thinking
-