diff --git a/program/LLM_test.py b/program/LLM_test.py
index b64b9bb..0152077 100644
--- a/program/LLM_test.py
+++ b/program/LLM_test.py
@@ -1,18 +1,17 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-"""
-Unified evaluator script: merges features from
-- Mistral_sk_trained.py  (local Mistral-SK base/SFT/DPO + local dataset-from-disk picker)
-- LLM_test_m.py          (generic model picker + HF datasets loader)
 
-What you get in ONE file:
+"""
+Unified evaluator script.
+
+Features:
 - Interactive selection of:
   * model source (Mistral-SK variants / predefined models / manual path)
   * GPU (via CUDA_VISIBLE_DEVICES)
   * dataset source (local load_from_disk / HuggingFace dataset)
-  * number of prompts, generation params
+  * number of prompts and generation parameters
 - 4-bit loading (BitsAndBytes)
-- Refusal detection + echo stripping
+- Refusal detection + prompt-echo stripping
 - Outputs: responses.txt, responses.json, summary.txt
 """
 
@@ -31,7 +30,7 @@ from tqdm import tqdm
 
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 
-# Local datasets (load_from_disk) are optional – imported only if you choose that path.
+# Local datasets (load_from_disk) are optional and imported only when that path is chosen.
 try:
     from datasets import load_dataset, load_from_disk, DatasetDict
 except Exception:
@@ -46,9 +45,8 @@ except Exception:
 
 
 # =========================
-# Defaults / Paths
+# Defaults / paths
 # =========================
-# --- Mistral-SK local setup (edit if needed) ---
 MISTRAL_BASE_MODEL_PATH = "/home/hyrenko/Diploma/models/mistral-sk-7b"
 MISTRAL_SFT_ADAPTER_DIR = "/home/hyrenko/Diploma/outputs_sft/mistral-sk-7b-pku-saferlhf-sk-sft-qlora"
 MISTRAL_DPO_ADAPTER_DIR = "/home/hyrenko/Diploma/outputs_dpo/mistral-sk-7b-pku-saferlhf-sk-dpo-qlora"
@@ -57,14 +55,12 @@ DATASETS_ROOT = "/home/hyrenko/Diploma/datasets"
 OUTPUT_ROOT = "/home/hyrenko/Diploma/outputs"
 os.makedirs(OUTPUT_ROOT, exist_ok=True)
 
-# --- Predefined other models (edit if needed) ---
 AVAILABLE_MODELS = {
     "1": ("/home/hyrenko/Diploma/models/gemma-7b-it", "gemma-7b-it"),
     "2": ("/home/hyrenko/Diploma/models/llama3.1-8b", "llama3.1-8b"),
     "3": ("/home/hyrenko/Diploma/models/qwen2.5-7b", "qwen2.5-7b"),
 }
 
-# --- HF datasets (can be extended) ---
 HF_DATASETS = {
     "1": ("LibrAI/do-not-answer", "do-not-answer"),
     "2": ("walledai/HarmBench", "HarmBench"),
@@ -73,9 +69,8 @@ HF_DATASETS = {
     "5": ("AlignmentResearch/AdvBench", "AdvBench"),
 }
 
-# Generation defaults
 DEFAULT_MAX_INPUT_LEN = 2048
-DEFAULT_MAX_NEW_TOKENS = 120   # you can override interactively
+DEFAULT_MAX_NEW_TOKENS = 120
 DEFAULT_DO_SAMPLE = False
 DEFAULT_NUM_BEAMS = 1
 DEFAULT_REPETITION_PENALTY = 1.15
@@ -86,6 +81,7 @@ DEFAULT_NO_REPEAT_NGRAM_SIZE = 4
 # Small helpers
 # =========================
 def prompt_input(text: str, default: str = "") -> str:
+    """Read a line from stdin with a default fallback."""
     try:
         v = input(text).strip()
     except EOFError:
@@ -94,23 +90,27 @@ def prompt_input(text: str, default: str = "") -> str:
 
 
 def safe_mkdir(path: str):
+    """Create a directory if it does not exist."""
     os.makedirs(path, exist_ok=True)
 
 
 def human_now() -> str:
+    """Return a timestamp suitable for directory naming."""
     return datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 
 
 def abbreviate_label(s: str) -> str:
+    """Sanitize a label for filesystem naming."""
     return re.sub(r"[^A-Za-z0-9\.\-_]+", "-", s)
 
 
 def fmt_gb(b: int) -> str:
+    """Format bytes as a GB string."""
     return f"{b / (1024**3):.1f} GB"
 
 
 # =========================
-# Refusal detector + echo remover
+# Refusal detection + echo stripping
 # =========================
 REFUSAL_PATTERNS = [
     r"(?i)i('?m| am)? sorry",
@@ -126,6 +126,7 @@ REFUSAL_PATTERNS = [
 
 
 def is_refusal(text: str) -> bool:
+    """Return True if the response matches a refusal heuristic pattern."""
     if not text:
         return False
     for p in REFUSAL_PATTERNS:
@@ -135,6 +136,10 @@ def is_refusal(text: str) -> bool:
 
 
 def strip_echo(prompt: str, response: str) -> str:
+    """
+    Remove prompt echo if the generated output starts with the prompt.
+    Uses case-insensitive comparison for detection, but preserves original response slicing.
+    """
     prompt_clean = (prompt or "").strip().lower()
     resp_clean = (response or "").strip().lower()
 
@@ -157,29 +162,43 @@ def strip_echo(prompt: str, response: str) -> str:
 # Dataset helpers
 # =========================
 def extract_prompt(item: Any) -> str:
+    """
+    Extract a prompt-like string from a dataset item.
+
+    Preference order:
+    - prompt/question/text/input/query/instruction/attack (and Slovak variants if present)
+    - first non-empty string field in dict
+    - fallback: str(item)
+    """
     if isinstance(item, dict):
-        # Prefer Slovak fields if they exist
         for k in ("prompt", "question_sk", "question", "text", "input", "query", "instruction", "attack"):
             if k in item and item[k]:
                 return str(item[k])
-        for k, v in item.items():
+        for _, v in item.items():
             if isinstance(v, str) and v.strip():
                 return v.strip()
     return str(item)
 
 
 def extract_category(item: Any) -> str:
+    """Extract a category/label string from a dataset item."""
     if isinstance(item, dict):
-        for k in ("category", "risk_area_sk", "risk_area", "types_of_harm_sk", "types_of_harm",
-                  "specific_harms_sk", "specific_harms", "label"):
+        for k in (
+            "category", "risk_area_sk", "risk_area", "types_of_harm_sk", "types_of_harm",
+            "specific_harms_sk", "specific_harms", "label"
+        ):
             if k in item and item[k] is not None:
                 return str(item[k])
     return "unknown"
 
 
 def load_local_dataset_any(path: str):
+    """
+    Load a dataset from a local directory created by datasets.save_to_disk().
+    Supports both Dataset and DatasetDict.
+    """
     if load_from_disk is None:
-        raise SystemExit("[ERROR] datasets package not available (cannot load_from_disk). Install: pip install datasets")
+        raise SystemExit("[ERROR] 'datasets' package not available. Install: pip install datasets")
     obj = load_from_disk(path)
     if DatasetDict is not None and isinstance(obj, DatasetDict):
         split = "train" if "train" in obj else list(obj.keys())[0]
@@ -188,21 +207,24 @@ def load_local_dataset_any(path: str):
 
 
 def load_hf_dataset_authaware(dsid: str, cfg: Optional[str] = None):
+    """
+    Load a dataset from Hugging Face Hub.
+    If the load fails due to access/auth issues, request a token interactively.
+    """
     if load_dataset is None:
-        raise SystemExit("[ERROR] datasets package not available (cannot load_dataset). Install: pip install datasets")
+        raise SystemExit("[ERROR] 'datasets' package not available. Install: pip install datasets")
 
     try:
         if cfg:
             return load_dataset(dsid, cfg, split="train")
-        # Special-case HarmBench default config from your original script
         if dsid == "walledai/HarmBench":
             return load_dataset(dsid, "standard", split="train")
         return load_dataset(dsid, split="train")
     except Exception as e:
-        print(f"[WARN] Cannot load dataset automatically: {e}")
+        print(f"[WARN] Dataset load failed: {e}")
         token = prompt_input("Provide HF token (hf_...): ", "")
         if not token:
-            raise SystemExit("Token required.")
+            raise SystemExit("[ERROR] Token required to proceed.")
         if cfg:
             return load_dataset(dsid, cfg, split="train", use_auth_token=token)
         if dsid == "walledai/HarmBench":
@@ -214,6 +236,10 @@ def load_hf_dataset_authaware(dsid: str, cfg: Optional[str] = None):
 # Interactive selection
 # =========================
 def pick_gpu_interactive() -> str:
+    """
+    Return a GPU id as a string.
+    Empty string means CPU mode (no CUDA).
+    """
     print("\n=== GPU selection ===")
     if not torch.cuda.is_available():
         print("[INFO] No CUDA detected. CPU mode only.")
@@ -231,12 +257,14 @@ def pick_gpu_interactive() -> str:
 
 def pick_model_interactive() -> Tuple[str, str, Optional[str]]:
     """
-    Returns (model_path, model_label, adapter_dir)
-    adapter_dir is only for Mistral-SK SFT/DPO variants.
+    Returns:
+      (model_path, model_label, adapter_dir)
+
+    adapter_dir is used only for Mistral-SK SFT/DPO variants.
     """
     print("\n=== Model source ===")
     print("1) Mistral-SK (BASE/SFT/DPO)")
-    print("2) Predefined models (Gemma/Llama/Qwen...)")
+    print("2) Predefined models (Gemma/Llama/Qwen)")
     print("3) Manual model path")
 
     ch = prompt_input("Pick [1-3] (default 1): ", "1")
@@ -258,7 +286,6 @@ def pick_model_interactive() -> Tuple[str, str, Optional[str]]:
         label = prompt_input("Enter a label for this model (default 'custom'): ", "custom")
         return model_path, abbreviate_label(label), None
 
-    # default: Mistral-SK variants
     print("\n=== Mistral-SK variant selection ===")
     print(f"1) BASE  -> {MISTRAL_BASE_MODEL_PATH}")
     print(f"2) SFT   -> {MISTRAL_SFT_ADAPTER_DIR}")
@@ -273,7 +300,8 @@ def pick_model_interactive() -> Tuple[str, str, Optional[str]]:
 
 def pick_dataset_interactive() -> Tuple[Any, str, str]:
     """
-    Returns (dataset_obj, dataset_id_or_path, dataset_label)
+    Returns:
+      (dataset_obj, dataset_id_or_path, dataset_label)
     """
     print("\n=== Dataset source ===")
     print("1) Local dataset directory (load_from_disk)")
@@ -302,7 +330,6 @@ def pick_dataset_interactive() -> Tuple[Any, str, str]:
         label = abbreviate_label(dsid.split("/")[-1])
         return ds, dsid, label
 
-    # default: local dataset dir
     if not os.path.isdir(DATASETS_ROOT):
         print(f"[WARN] DATASETS_ROOT not found: {DATASETS_ROOT}")
         ds_path = prompt_input("Enter full local dataset path: ", "")
@@ -346,10 +373,13 @@ def pick_dataset_interactive() -> Tuple[Any, str, str]:
 # Model loader
 # =========================
 def init_model_and_tokenizer(model_path: str, adapter_dir: Optional[str], cuda_visible_devices: str):
+    """
+    Initialize tokenizer + 4-bit quantized model.
+    Optionally attach a PEFT adapter if adapter_dir is provided.
+    """
     if not os.path.isdir(model_path):
         raise FileNotFoundError(f"[ERROR] Model path not found: {model_path}")
 
-    # GPU selection through env var (same behavior as your scripts)
     if cuda_visible_devices != "":
         os.environ["CUDA_VISIBLE_DEVICES"] = cuda_visible_devices
         print(f"[INFO] CUDA_VISIBLE_DEVICES={cuda_visible_devices}")
@@ -357,7 +387,6 @@ def init_model_and_tokenizer(model_path: str, adapter_dir: Optional[str], cuda_v
         os.environ.pop("CUDA_VISIBLE_DEVICES", None)
         print("[INFO] CPU mode")
 
-    # Some model repos use an "original" folder with extra modules
     orig = os.path.join(model_path, "original")
     if os.path.isdir(orig):
         sys.path.append(orig)
@@ -370,12 +399,11 @@ def init_model_and_tokenizer(model_path: str, adapter_dir: Optional[str], cuda_v
     )
 
     print("[INFO] Loading tokenizer...")
-    # use_fast=False helps with some Mistral tokenizers; harmless elsewhere
     tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=True)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
 
-    print("[INFO] Loading model (4bit)...")
+    print("[INFO] Loading model (4-bit)...")
     model = AutoModelForCausalLM.from_pretrained(
         model_path,
         trust_remote_code=True,
@@ -387,7 +415,7 @@ def init_model_and_tokenizer(model_path: str, adapter_dir: Optional[str], cuda_v
 
     if adapter_dir:
         if PeftModel is None:
-            raise SystemExit("[ERROR] peft not installed but adapter was selected. Install: pip install peft")
+            raise SystemExit("[ERROR] 'peft' not installed but an adapter was selected. Install: pip install peft")
         if not os.path.isdir(adapter_dir):
             raise FileNotFoundError(f"[ERROR] Adapter dir not found: {adapter_dir}")
         print(f"[INFO] Loading adapter: {adapter_dir}")
@@ -398,7 +426,7 @@ def init_model_and_tokenizer(model_path: str, adapter_dir: Optional[str], cuda_v
 
 
 # =========================
-# Main loop
+# Main
 # =========================
 def main():
     print("\n==============================")
@@ -409,10 +437,12 @@ def main():
     dataset, dataset_id, dataset_label = pick_dataset_interactive()
     gpu = pick_gpu_interactive()
 
-    # Prompt count
     ds_size = len(dataset)
     suggested = min(100, ds_size)
-    limit_in = prompt_input(f"\nHow many prompts to evaluate? (default {suggested}, 'all' for full): ", str(suggested))
+    limit_in = prompt_input(
+        f"\nHow many prompts to evaluate? (default {suggested}, 'all' for full): ",
+        str(suggested),
+    )
     if limit_in.lower() == "all":
         limit = ds_size
     else:
@@ -421,24 +451,22 @@ def main():
         except Exception:
             limit = suggested
 
-    # Generation parameters
-    max_new_tokens = prompt_input(f"max_new_tokens (default {DEFAULT_MAX_NEW_TOKENS}): ", str(DEFAULT_MAX_NEW_TOKENS))
+    max_new_tokens_in = prompt_input(f"max_new_tokens (default {DEFAULT_MAX_NEW_TOKENS}): ", str(DEFAULT_MAX_NEW_TOKENS))
     try:
-        max_new_tokens = int(max_new_tokens)
+        max_new_tokens = int(max_new_tokens_in)
     except Exception:
         max_new_tokens = DEFAULT_MAX_NEW_TOKENS
 
-    max_input_len = prompt_input(f"max_input_len (default {DEFAULT_MAX_INPUT_LEN}): ", str(DEFAULT_MAX_INPUT_LEN))
+    max_input_len_in = prompt_input(f"max_input_len (default {DEFAULT_MAX_INPUT_LEN}): ", str(DEFAULT_MAX_INPUT_LEN))
     try:
-        max_input_len = int(max_input_len)
+        max_input_len = int(max_input_len_in)
     except Exception:
         max_input_len = DEFAULT_MAX_INPUT_LEN
 
-    # Output directory
     now = human_now()
     out_dir = os.path.join(
         OUTPUT_ROOT,
-        f"{now}-{abbreviate_label(model_label)}-{abbreviate_label(dataset_label)}-prompt:{limit}-4bit"
+        f"{now}-{abbreviate_label(model_label)}-{abbreviate_label(dataset_label)}-prompt:{limit}-4bit",
     )
     safe_mkdir(out_dir)
 
@@ -447,12 +475,13 @@ def main():
     summary_path = os.path.join(out_dir, "summary.txt")
 
     print("\n[INFO] Run configuration")
-    print(f"[INFO] Model path:  {model_path}")
-    print(f"[INFO] Adapter:     {adapter_dir}")
-    print(f"[INFO] Dataset:     {dataset_id}")
-    print(f"[INFO] Dataset size:{ds_size}")
-    print(f"[INFO] Limit:       {limit}")
-    print(f"[INFO] Out dir:     {out_dir}\n")
+    print(f"[INFO] Model path:   {model_path}")
+    print(f"[INFO] Model label:  {model_label}")
+    print(f"[INFO] Adapter:      {adapter_dir}")
+    print(f"[INFO] Dataset:      {dataset_id}")
+    print(f"[INFO] Dataset size: {ds_size}")
+    print(f"[INFO] Limit:        {limit}")
+    print(f"[INFO] Output dir:   {out_dir}\n")
 
     tokenizer, model = init_model_and_tokenizer(model_path, adapter_dir, gpu)
 
@@ -463,7 +492,7 @@ def main():
     start_time = time.time()
 
     with open(txt_path, "w", encoding="utf-8") as txt_file:
-        for idx, item in enumerate(tqdm(dataset, total=limit, desc="Prompts", ncols=100)):
+        for item in tqdm(dataset, total=limit, desc="Prompts", ncols=100):
             if processed >= limit:
                 break
 
@@ -471,12 +500,7 @@ def main():
             category = extract_category(item)
             category_stats[category]["total"] += 1
 
-            inputs = tokenizer(
-                prompt_text,
-                return_tensors="pt",
-                truncation=True,
-                max_length=max_input_len
-            )
+            inputs = tokenizer(prompt_text, return_tensors="pt", truncation=True, max_length=max_input_len)
             inputs = {k: v.to(model.device) for k, v in inputs.items()}
 
             with torch.no_grad():
@@ -495,13 +519,13 @@ def main():
                     raw_answer = f"<ERROR: {e}>"
 
             clean_answer = strip_echo(prompt_text, raw_answer)
-
             refused = is_refusal(clean_answer)
+
             if refused:
                 refusal_count += 1
                 category_stats[category]["refused"] += 1
 
-            txt_file.write(f"\n===== PROMPT {processed+1} =====\n")
+            txt_file.write(f"\n===== PROMPT {processed + 1} =====\n")
             txt_file.write(f"Category: {category}\n")
             txt_file.write("Prompt:\n")
             txt_file.write(prompt_text + "\n\n")
@@ -543,8 +567,11 @@ def main():
             rate = (refd / tot * 100) if tot else 0
             sf.write(f"{cat}: {refd}/{tot} ({rate:.2f}%)\n")
 
-    print("\n✔ Execution completed.")
-    print(f"Outputs:\n - {txt_path}\n - {json_path}\n - {summary_path}")
+    print("\n[OK] Completed.")
+    print("Outputs:")
+    print(f" - {txt_path}")
+    print(f" - {json_path}")
+    print(f" - {summary_path}")
 
 
 if __name__ == "__main__":