Added comments

2026-02-04 20:34:11 +00:00 · 2026-02-04 20:34:11 +00:00 · 18cdd5daa6
commit 18cdd5daa6
parent cf50efba3c
1 changed files with 82 additions and 4 deletions
--- a/Training/convert_dpo_sft.py
+++ b/Training/convert_dpo_sft.py
@ -5,9 +5,7 @@ import os
 import json
 from datasets import load_dataset, Dataset

-# =========================
-# Config (как в твоих скриптах)
-# =========================
+
 DATASET_ID = "PKU-Alignment/PKU-SafeRLHF-30K"
 SPLIT = "train"

@ -20,11 +18,47 @@ INSTR_PREFIX = "### Instruction:\n"
 RESP_PREFIX  = "\n\n### Response:\n"


+# -----------------------------------------------------------------------------
+# wrap_prompt(p)
+# Purpose:
+#   Wrap a raw prompt string into a stable instruction/response template.
+#
+# Behavior:
+#   - Converts None to an empty string
+#   - Strips leading/trailing whitespace
+#   - Returns a formatted prompt with fixed prefixes
+#
+# Output format:
+#   ### Instruction:
+#   <prompt>
+#
+#   ### Response:
+# -----------------------------------------------------------------------------
 def wrap_prompt(p: str) -> str:
    p = (p or "").strip()
    return f"{INSTR_PREFIX}{p}{RESP_PREFIX}"


+# -----------------------------------------------------------------------------
+# convert_sft(ds, out_jsonl)
+# Purpose:
+#   Convert the dataset into an SFT jsonl file, where each line is:
+#     {"prompt": <wrapped_prompt>, "response": <safe_response>}
+#
+# Expected input schema (per row):
+#   - prompt
+#   - response_0
+#   - response_1
+#   - safer_response_id  (0 or 1 indicates which response is considered "safe")
+#
+# Filtering rules:
+#   - Drops rows if prompt/response_0/response_1 are empty, or safer_response_id not in (0, 1)
+#
+# Side effects:
+#   - Creates the output directory if needed
+#   - Writes JSON Lines to out_jsonl
+#   - Prints basic stats (saved rows, dropped rows)
+# -----------------------------------------------------------------------------
 def convert_sft(ds, out_jsonl: str) -> None:
    os.makedirs(os.path.dirname(out_jsonl) or ".", exist_ok=True)

@ -54,6 +88,29 @@ def convert_sft(ds, out_jsonl: str) -> None:
        print("example keys:", rows[0].keys())


+# -----------------------------------------------------------------------------
+# convert_dpo(ds, out_dir)
+# Purpose:
+#   Convert the dataset into a DPO dataset directory (Hugging Face datasets format),
+#   with columns:
+#     - prompt (wrapped)
+#     - chosen
+#     - rejected
+#
+# Expected input schema (per row):
+#   - prompt
+#   - response_0
+#   - response_1
+#   - safer_response_id  (0 or 1 indicates which response is "chosen")
+#
+# Filtering rules:
+#   - Drops rows if prompt/response_0/response_1 are empty, or safer_response_id not in (0, 1)
+#
+# Side effects:
+#   - Creates the output directory if needed
+#   - Saves the dataset to out_dir using save_to_disk()
+#   - Prints basic stats (saved rows, dropped rows, resulting columns)
+# -----------------------------------------------------------------------------
 def convert_dpo(ds, out_dir: str) -> None:
    os.makedirs(os.path.dirname(out_dir) or ".", exist_ok=True)

@ -83,6 +140,19 @@ def convert_dpo(ds, out_dir: str) -> None:
    print("columns:", dpo.column_names)


+# -----------------------------------------------------------------------------
+# ask_choice()
+# Purpose:
+#   Simple CLI menu to select what to generate:
+#     1) SFT only
+#     2) DPO only
+#     3) Both
+#     0) Exit
+#
+# Behavior:
+#   - Loops until a valid choice is entered
+#   - Returns one of: "0", "1", "2", "3"
+# -----------------------------------------------------------------------------
 def ask_choice() -> str:
    print("\nWhat do you want to generate?")
    print("  1) SFT (jsonl: prompt+response)")
@ -96,6 +166,14 @@ def ask_choice() -> str:
        print("Invalid choice. Please enter 1, 2, 3, or 0.")


+# -----------------------------------------------------------------------------
+# main()
+# Purpose:
+#   Entry point:
+#   - Loads the dataset from Hugging Face Hub
+#   - Presents an interactive menu
+#   - Generates SFT and/or DPO outputs under ./data
+# -----------------------------------------------------------------------------
 def main():
    print(f"Loading dataset: {DATASET_ID} split={SPLIT}")
    ds = load_dataset(DATASET_ID, split=SPLIT)
@ -105,7 +183,7 @@ def main():
        print("Exit.")
        return

-    # Всегда создаём ./data как в оригинальных скриптах
+    # Ensure ./data exists (used by the output paths)
    os.makedirs("./data", exist_ok=True)

    if choice == "1":