From 2f2725ca7d9ad962efa143e59b0067f57ebc115d Mon Sep 17 00:00:00 2001
From: Tetiana Mohorian <tetiana.mohorian@student.tuke.sk>
Date: Tue, 20 May 2025 11:18:41 +0000
Subject: [PATCH] Odstranit few_shot/few_shot_eval_mt5_3b.py

---
 few_shot/few_shot_eval_mt5_3b.py | 208 -------------------------------
 1 file changed, 208 deletions(-)
 delete mode 100644 few_shot/few_shot_eval_mt5_3b.py

diff --git a/few_shot/few_shot_eval_mt5_3b.py b/few_shot/few_shot_eval_mt5_3b.py
deleted file mode 100644
index 7587062..0000000
--- a/few_shot/few_shot_eval_mt5_3b.py
+++ /dev/null
@@ -1,208 +0,0 @@
-import sys
-import codecs
-from datasets import load_dataset, concatenate_datasets
-import numpy as np
-import torch
-
-from sklearn.metrics import precision_recall_fscore_support,precision_recall_curve
-from transformers import (
-    AutoTokenizer, 
-    AutoModelForSequenceClassification, 
-    Trainer, 
-    TrainingArguments,
-    AutoModelForSeq2SeqLM,
-    set_seed, 
-    T5Tokenizer
-)
-
-# Set seed for reproducibility
-set_seed(42)
-
-# Load and preprocess data
-ds = load_dataset("TUKE-KEMT/hate_speech_slovak")
-label_0 = ds['train'].filter(lambda example: example['label'] == 0)
-label_1 = ds['train'].filter(lambda example: example['label'] == 1)
-
-# Create stratified few-shot splits
-def create_stratified_split(label_0, label_1, n_samples, seed=42):
-    few_shot_0 = label_0.shuffle(seed=seed).select(range(n_samples))
-    few_shot_1 = label_1.shuffle(seed=seed).select(range(n_samples))
-    return concatenate_datasets([few_shot_0, few_shot_1]).shuffle(seed=seed)
-
-# Create train/val/test splits
-train_dataset = create_stratified_split(label_0, label_1, n_samples=40)
-val_dataset = create_stratified_split(label_0, label_1, n_samples=10, seed=43)
-test_dataset = create_stratified_split(label_0, label_1, n_samples=50, seed=44)
-
-# Initialize tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("unicamp-dl/mt5-3B-mmarco-en-pt", force_download=True)
-model = AutoModelForSequenceClassification.from_pretrained("unicamp-dl/mt5-3B-mmarco-en-pt", num_labels=2)
-
-
-
-# Tokenization function with padding
-def tokenize(batch):
-    return tokenizer(
-    batch["text"],
-    padding="max_length",
-    truncation=True,
-    max_length=256
-    )
-
-
-# Prepare datasets
-def prepare_dataset(dataset):
-    dataset = dataset.map(tokenize, batched=True, remove_columns=["text"])
-    print(dataset[0])
-    return dataset.rename_column("label", "labels")
-
-train_dataset = prepare_dataset(train_dataset)
-val_dataset = prepare_dataset(val_dataset)
-test_dataset = prepare_dataset(test_dataset)
-
-# Set device
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
-
-# Training arguments with improved settings
-training_args = TrainingArguments(
-    output_dir="./hate_speech_model",
-    per_device_train_batch_size=8,
-    per_device_eval_batch_size=16,
-    learning_rate=3e-5,  # Adjust as needed
-    num_train_epochs=7,  # Increased epochs for better training
-    eval_strategy="epoch",  # Use "epoch" for both strategies
-    save_strategy="epoch",  # Matching the evaluation strategy
-    load_best_model_at_end=True,
-    metric_for_best_model="f1",
-    greater_is_better=True,
-    warmup_steps=100,  # Increased warmup steps
-    weight_decay=0.01,
-    report_to="none",
-    seed=42,
-    logging_steps=10,
-    gradient_accumulation_steps=2,  # For more effective training on larger datasets
-    lr_scheduler_type="cosine",  # Using cosine scheduler for learning rate
-    logging_dir='./logs',
-)
-
-# Custom metrics computation
-def compute_metrics(pred):
-    logits = pred.predictions[0]  # Ensure only the logits are used
-    preds = logits.argmax(-1)
-    labels = pred.label_ids
-    precision, recall, f1, _ = precision_recall_fscore_support(
-        labels, 
-        preds, 
-        average='binary'
-    )
-    return {
-        'precision': precision,
-        'recall': recall,
-        'f1': f1
-    }
-
-# Initialize trainer with validation data and metrics
-trainer = Trainer(
-    model=model,
-    args=training_args,
-    train_dataset=train_dataset,
-    eval_dataset=val_dataset,
-    compute_metrics=compute_metrics,
-)
-
-# Train model
-trainer.train()
-
-# Evaluate on test set
-def find_optimal_threshold(trainer, dataset):
-    # Get predictions
-    predictions = trainer.predict(dataset)
-    
-    # Extract logits (the first element in the predictions tuple)
-    logits = predictions.predictions  # This is likely a tuple (logits, other_info)
-
-    # If logits is a tuple, extract only the logits
-    if isinstance(logits, tuple):
-        logits = logits[0]  # Extract the logits from the tuple
-    
-    # Check the shape of logits to debug the issue
-    print(f"Logits shape: {logits.shape}")
-    
-    # Ensure logits has the shape (batch_size, 2) for binary classification
-    if logits.shape[-1] != 2:
-        logits = logits[:, :2]  # Take only the first two columns (logits for the two classes)
-        print(f"Logits shape after slicing: {logits.shape}")
-
-    # Convert logits to tensor if necessary
-    if not isinstance(logits, torch.Tensor):
-        logits = torch.tensor(logits)  # Convert logits to a tensor if needed
-    
-    # Apply softmax to get probabilities
-    probs = torch.nn.functional.softmax(logits, dim=-1)
-    
-    # Get probabilities for the positive class (label=1)
-    positive_probs = probs[:, 1].numpy()  # The probabilities for the positive class (label=1)
-    
-    # Get true labels from predictions
-    true_labels = predictions.label_ids
-    
-    # Calculate precision-recall curve
-    precisions, recalls, thresholds = precision_recall_curve(true_labels, positive_probs)
-    f1_scores = 2 * (precisions * recalls) / (precisions + recalls + 1e-8)
-    
-    # Find the optimal threshold based on F1-score
-    optimal_idx = np.argmax(f1_scores[:-1])  # Exclude last threshold (it is always 1)
-    optimal_threshold = thresholds[optimal_idx]
-    
-    return optimal_threshold, precisions[optimal_idx], recalls[optimal_idx], f1_scores[optimal_idx]
-
-
-def evaluate_with_threshold(trainer, dataset, threshold=0.5):
-    predictions = trainer.predict(dataset)
-    
-    # Ensure that logits are properly reshaped to (batch_size, 2) before applying softmax
-    logits = predictions.predictions
-    if isinstance(logits, tuple):
-        logits = logits[0]  # Extract logits if it's a tuple
-
-    logits = torch.tensor(logits) if not isinstance(logits, torch.Tensor) else logits
-    
-    # Apply softmax to get probabilities
-    probs = torch.nn.functional.softmax(logits, dim=-1)
-    
-    # Get predicted labels based on the threshold for the positive class (label=1)
-    predicted_labels = (probs[:, 1] > threshold).numpy().astype(int)
-    
-    true_labels = predictions.label_ids
-    
-    # Calculate precision-recall-fscore
-    precision, recall, f1, _ = precision_recall_fscore_support(
-        true_labels, 
-        predicted_labels, 
-        average='binary',
-        zero_division=0
-    )
-    
-    return {
-        'precision': precision,
-        'recall': recall,
-        'f1': f1
-    }
-
-# Example usage:
-# Find the optimal threshold using validation data
-print("\nFinding optimal threshold...")
-optimal_threshold, best_precision, best_recall, best_f1 = find_optimal_threshold(trainer, val_dataset)
-print(f"Optimal threshold: {optimal_threshold:.4f}")
-
-# Evaluate with optimal threshold
-print("\nEvaluating with optimal threshold:")
-optimized_results = evaluate_with_threshold(trainer, test_dataset, threshold=optimal_threshold)
-print(f"Precision: {optimized_results['precision']:.4f}")
-print(f"Recall: {optimized_results['recall']:.4f}")
-print(f"F1: {optimized_results['f1']:.4f}")
-
-
-# Save the model
-#trainer.save_model("./hate_speech_model/best_model")
\ No newline at end of file