From 79a188b193119987bf10034faf534b71d59d78a1 Mon Sep 17 00:00:00 2001 From: Tetiana Mohorian Date: Thu, 30 Jan 2025 20:59:44 +0000 Subject: [PATCH] =?UTF-8?q?P=C5=99idat=20few=5Fshot/few=5Fshot=5Feval=5Fsl?= =?UTF-8?q?ovak=5Ft5=5Fsmall.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- few_shot/few_shot_eval_slovak_t5_small.py | 180 ++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 few_shot/few_shot_eval_slovak_t5_small.py diff --git a/few_shot/few_shot_eval_slovak_t5_small.py b/few_shot/few_shot_eval_slovak_t5_small.py new file mode 100644 index 0000000..e0add4f --- /dev/null +++ b/few_shot/few_shot_eval_slovak_t5_small.py @@ -0,0 +1,180 @@ +import numpy as np +import torch +from datasets import load_dataset, concatenate_datasets +from sklearn.metrics import precision_recall_fscore_support, precision_recall_curve +from transformers import ( + AutoTokenizer, + AutoModelForSequenceClassification, + Trainer, + TrainingArguments, + set_seed +) + +# šŸ”¹ 1. Set seed for reproducibility +set_seed(42) + +# šŸ”¹ 2. Load dataset +ds = load_dataset("TUKE-KEMT/hate_speech_slovak") +label_0 = ds['train'].filter(lambda example: example['label'] == 0) +label_1 = ds['train'].filter(lambda example: example['label'] == 1) + +# Function to create stratified splits +def create_stratified_split(label_0, label_1, n_samples, seed=42): + few_shot_0 = label_0.shuffle(seed=seed).select(range(n_samples)) + few_shot_1 = label_1.shuffle(seed=seed).select(range(n_samples)) + return concatenate_datasets([few_shot_0, few_shot_1]).shuffle(seed=seed) + +# Create train, validation, and test splits +train_dataset = create_stratified_split(label_0, label_1, n_samples=40) +val_dataset = create_stratified_split(label_0, label_1, n_samples=10, seed=43) +test_dataset = create_stratified_split(label_0, label_1, n_samples=50, seed=44) + +# šŸ”¹ 3. Load tokenizer and model +model_name = "ApoTro/slovak-t5-small" +tokenizer = AutoTokenizer.from_pretrained(model_name, force_download=True) +model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) + +# Tokenization function +def tokenize(batch): + return tokenizer( + batch["text"], + padding="max_length", + truncation=True, + max_length=128 + ) + +# Function to prepare datasets +def prepare_dataset(dataset): + dataset = dataset.map(tokenize, batched=True, remove_columns=["text"]) + return dataset.rename_column("label", "labels") + +# Apply preparation to datasets +train_dataset = prepare_dataset(train_dataset) +val_dataset = prepare_dataset(val_dataset) +test_dataset = prepare_dataset(test_dataset) + +# Set device (GPU if available) +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +model.to(device) + +# šŸ”¹ 4. Define training arguments +training_args = TrainingArguments( + output_dir="./hate_speech_model", + per_device_train_batch_size=8, + per_device_eval_batch_size=16, + learning_rate=3e-5, + num_train_epochs=7, + evaluation_strategy="epoch", + save_strategy="epoch", + load_best_model_at_end=True, + metric_for_best_model="f1", + greater_is_better=True, + warmup_steps=100, + weight_decay=0.01, + report_to="none", + seed=42, + logging_steps=10, + gradient_accumulation_steps=2, + lr_scheduler_type="cosine", + logging_dir='./logs', +) + +# šŸ”¹ 5. Define evaluation metrics +def compute_metrics(pred): + logits = pred.predictions[0] + preds = logits.argmax(-1) + labels = pred.label_ids + precision, recall, f1, _ = precision_recall_fscore_support( + labels, preds, average='binary' + ) + return { + 'precision': precision, + 'recall': recall, + 'f1': f1 + } + +# šŸ”¹ 6. Create Trainer instance +trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_dataset, + eval_dataset=val_dataset, + compute_metrics=compute_metrics, +) + +# šŸ”¹ 7. Train the model +trainer.train() + +# šŸ”¹ 8. Save the trained model +trainer.save_model("./hate_speech_model/best_model") + +# šŸ”¹ 9. Load the trained model before testing +model = AutoModelForSequenceClassification.from_pretrained("./hate_speech_model/best_model") +trainer = Trainer( + model=model, + args=training_args, + eval_dataset=val_dataset, + compute_metrics=compute_metrics, +) + +# šŸ”¹ 10. Function to find the optimal threshold +def find_optimal_threshold(trainer, dataset): + predictions = trainer.predict(dataset) + logits = predictions.predictions + + # Ensure logits are properly shaped + if isinstance(logits, tuple): + logits = logits[0] + logits = torch.tensor(logits) + + # Apply softmax + probs = torch.nn.functional.softmax(logits, dim=-1) + positive_probs = probs[:, 1].numpy() + true_labels = predictions.label_ids + + # Compute Precision-Recall curve + precisions, recalls, thresholds = precision_recall_curve(true_labels, positive_probs) + f1_scores = 2 * (precisions * recalls) / (precisions + recalls + 1e-8) + + # Find the best threshold based on F1-score + optimal_idx = np.argmax(f1_scores[:-1]) + optimal_threshold = thresholds[optimal_idx] + + return optimal_threshold, precisions[optimal_idx], recalls[optimal_idx], f1_scores[optimal_idx] + +# šŸ”¹ 11. Function to evaluate the model using a custom threshold +def evaluate_with_threshold(trainer, dataset, threshold=0.5): + predictions = trainer.predict(dataset) + logits = predictions.predictions + + if isinstance(logits, tuple): + logits = logits[0] + logits = torch.tensor(logits) + + # Apply softmax + probs = torch.nn.functional.softmax(logits, dim=-1) + predicted_labels = (probs[:, 1] > threshold).numpy().astype(int) + true_labels = predictions.label_ids + + # Compute Precision, Recall, F1-score + precision, recall, f1, _ = precision_recall_fscore_support( + true_labels, predicted_labels, average='binary', zero_division=0 + ) + + return { + 'precision': precision, + 'recall': recall, + 'f1': f1 + } + +# šŸ”¹ 12. Find the optimal threshold using validation set +print("\nšŸ” Finding optimal threshold...") +optimal_threshold, best_precision, best_recall, best_f1 = find_optimal_threshold(trainer, val_dataset) +print(f"āœ… Optimal threshold: {optimal_threshold:.4f}") + +# šŸ”¹ 13. Final evaluation on the test set using the best threshold +print("\nšŸ“Š Evaluating on the test set with the optimal threshold:") +optimized_results = evaluate_with_threshold(trainer, test_dataset, threshold=optimal_threshold) +print(f"šŸŽÆ Precision: {optimized_results['precision']:.4f}") +print(f"šŸŽÆ Recall: {optimized_results['recall']:.4f}") +print(f"šŸŽÆ F1-score: {optimized_results['f1']:.4f}")