From 9e3c32ba3844d82db43e373706a79d4dbc1f84c8 Mon Sep 17 00:00:00 2001 From: Tetiana Mohorian Date: Mon, 12 May 2025 07:37:02 +0000 Subject: [PATCH] Aktualizovat lm-eval-harness/slovak_toxic_classification/__init__.py --- .../slovak_toxic_classification/__init__.py | 145 +++++++++--------- 1 file changed, 72 insertions(+), 73 deletions(-) diff --git a/lm-eval-harness/slovak_toxic_classification/__init__.py b/lm-eval-harness/slovak_toxic_classification/__init__.py index 79970b2..50bae80 100644 --- a/lm-eval-harness/slovak_toxic_classification/__init__.py +++ b/lm-eval-harness/slovak_toxic_classification/__init__.py @@ -1,73 +1,72 @@ -from lm_eval.api.task import Task -from datasets import load_dataset -from sklearn.metrics import precision_recall_fscore_support - -class SlovakToxicClassification(Task): - VERSION = 1 - DATASET_PATH = "TUKE-KEMT/hate_speech_slovak" - DATASET_NAME = None - - def __init__(self): - # Загружаем датасет - self.dataset = load_dataset(self.DATASET_PATH) - - def has_training_docs(self): - return False - - def has_validation_docs(self): - return True - - def has_test_docs(self): - return True - - def validation_docs(self): - return list(self.dataset["test"]) - - def test_docs(self): - return list(self.dataset["test"]) - - def doc_to_text(self, doc): - return doc["text"] - - def doc_to_target(self, doc): - return int(doc["label"]) - - def construct_requests(self, doc, ctx): - - return self.loglikelihood(ctx, " 0"), self.loglikelihood(ctx, " 1") - - def process_results(self, doc, results): - ll0, ll1 = results - pred = int(ll1 > ll0) - gold = self.doc_to_target(doc) - return { - "f1": (gold, pred), - "precision": (gold, pred), - "recall": (gold, pred), - } - - def aggregation(self): - return { - "f1": self.f1_score, - "precision": self.precision_score, - "recall": self.recall_score, - } - - def higher_is_better(self): - return { - "f1": True, - "precision": True, - "recall": True, - } - - def f1_score(self, gold_and_pred): - golds, preds = zip(*gold_and_pred) - return precision_recall_fscore_support(golds, preds, average="binary")[2] - - def precision_score(self, gold_and_pred): - golds, preds = zip(*gold_and_pred) - return precision_recall_fscore_support(golds, preds, average="binary")[0] - - def recall_score(self, gold_and_pred): - golds, preds = zip(*gold_and_pred) - return precision_recall_fscore_support(golds, preds, average="binary")[1] +from lm_eval.api.task import Task +from datasets import load_dataset +from sklearn.metrics import precision_recall_fscore_support + +class SlovakToxicClassification(Task): + VERSION = 1 + DATASET_PATH = "TUKE-KEMT/hate_speech_slovak" + DATASET_NAME = None + + def __init__(self): + self.dataset = load_dataset(self.DATASET_PATH) + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return True + + def has_test_docs(self): + return True + + def validation_docs(self): + return list(self.dataset["test"]) + + def test_docs(self): + return list(self.dataset["test"]) + + def doc_to_text(self, doc): + return doc["text"] + + def doc_to_target(self, doc): + return int(doc["label"]) + + def construct_requests(self, doc, ctx): + + return self.loglikelihood(ctx, " 0"), self.loglikelihood(ctx, " 1") + + def process_results(self, doc, results): + ll0, ll1 = results + pred = int(ll1 > ll0) + gold = self.doc_to_target(doc) + return { + "f1": (gold, pred), + "precision": (gold, pred), + "recall": (gold, pred), + } + + def aggregation(self): + return { + "f1": self.f1_score, + "precision": self.precision_score, + "recall": self.recall_score, + } + + def higher_is_better(self): + return { + "f1": True, + "precision": True, + "recall": True, + } + + def f1_score(self, gold_and_pred): + golds, preds = zip(*gold_and_pred) + return precision_recall_fscore_support(golds, preds, average="binary")[2] + + def precision_score(self, gold_and_pred): + golds, preds = zip(*gold_and_pred) + return precision_recall_fscore_support(golds, preds, average="binary")[0] + + def recall_score(self, gold_and_pred): + golds, preds = zip(*gold_and_pred) + return precision_recall_fscore_support(golds, preds, average="binary")[1]