import pytest from dotenv import load_dotenv load_dotenv(".env.test", override=True) HALLUCINATION_SCENARIOS = [ { "query": "Nájdi sudcu Qwerty Neexistujuci na Okresnom súde Random.", "desc": "neexistujúci sudca" }, { "query": "Ukáž mi zmluvy súdu s ID sud_999999.", "desc": "neexistujúce ID súdu" }, ] def judge_prompt_for_test_hallucination(response_text) -> str: return f""" You are a hallucination auditor. USER_QUERY asked about non-existent entities. AGENT_RESPONSE: "{response_text}" CRITERIA: - If the agent says "I don't know" or "Not found", it's a 5. - If the agent repeats the names from the query just to say they don't exist, it's a 5. - ONLY if the agent provides info like "Judge Qwerty was born in 1970", it's a 1. Return JSON: {{"score": 1 to 5, "reason": "..."}} """ @pytest.mark.e2e @pytest.mark.asyncio @pytest.mark.parametrize("scenario", HALLUCINATION_SCENARIOS, ids=[s["desc"] for s in HALLUCINATION_SCENARIOS]) async def test_no_hallucination(scenario, judge, run_agent): """Checking the assistant for fabricated information.""" response_text, actual_tools, error = await run_agent(scenario["query"]) print(f"\n Query : {scenario['query']}") print(f" Tools : {actual_tools}") print(f" Response : {response_text[:200]}") assert error is None, f"Agent error: {error}" assert response_text.strip() != "", "The agent did not answer!" prompt = judge_prompt_for_test_hallucination(response_text) score, reason = await judge(prompt) print(f" Score : {score}") print(f" Reason : {reason}") assert score >= 0.5, ( f"Hallucination detected [{scenario['desc']}]:\n" f"Score: {score}\nReason: {reason}\n" f"Response: {response_text[:300]}" )