## IMPORT NESSESARY EQUIPMENTS from transformers import T5ForConditionalGeneration, T5Tokenizer,AutoTokenizer import torch #import evaluate # Bleu import json import random import statistics from sklearn.metrics import precision_score, recall_score, f1_score import warnings from tqdm import tqdm from datasets import load_dataset import evaluate from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction from sklearn.metrics import precision_score, recall_score, f1_score from sklearn.feature_extraction.text import CountVectorizer rouge = evaluate.load('rouge') warnings.filterwarnings("ignore") DEVICE ='cuda:0' #Prepare data first def prepare_data_english(data): articles = [] for item in tqdm(data["validation"],desc="Preparing validation datas"): context = item["context"] question = item["question"] try: start_position = item['answers']['answer_start'][0] except IndexError: continue text_length = len(item['answers']['text'][0]) target_text = context[start_position : start_position + text_length] inputs = {"input": context+''+question, "answer": target_text} articles.append(inputs) return articles #Load the pretrained model model_name = 'qa_model_T5-slovak' model_dir = '/home/omasta/T5_JUPYTER/qa_model' tokenizer_dir = '/home/omasta/T5_JUPYTER/qa_tokenizer' MODEL = T5ForConditionalGeneration.from_pretrained(model_dir, from_tf=False, return_dict=True).to(DEVICE) print("Model succesfully loaded!") TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_dir, use_fast=True) print("Tokenizer succesfully loaded!") Q_LEN = 512 TOKENIZER.add_tokens('') MODEL.resize_token_embeddings(len(TOKENIZER)) #Load datasets #dataset_english = load_dataset("squad_v2") dataset_slovak = load_dataset("TUKE-DeutscheTelekom/skquad") #dataset_polish = load_dataset("clarin-pl/poquad") #Prepare datas #data_english = prepare_data_english(dataset_english) #data_polish = prepare_data_english(dataset_polish) data_slovak = prepare_data_english(dataset_slovak) #Merge datasets #val_data = data_slovak + data_english + data_polish print("Val Samples : ",len(data_slovak)) def prediction_rouge(predictions, references): return rouge.compute(predictions=[predictions], references=[[references]]) def compute_bleu(reference, prediction): smoothie = SmoothingFunction().method4 return sentence_bleu([reference.split()],prediction.split(),smoothing_function=smoothie) def classic_metrics(sentence1, sentence2): if sentence1 == "" and sentence2 == "": return 0,0,0 else: # Vytvorenie "bag of words" vectorizer = CountVectorizer() try: bag_of_words = vectorizer.fit_transform([sentence1, sentence2]) except ValueError: return 0,0,0 # Získanie vektorov pre vety vector1 = bag_of_words.toarray()[0] vector2 = bag_of_words.toarray()[1] # Výpočet metrík precision = precision_score(vector1, vector2, average='weighted') recall = recall_score(vector1, vector2, average='weighted') f1 = f1_score(vector1, vector2, average='weighted') return float(precision), float(recall), float(f1) def predict_answer(input,ref_answer,language): inputs = TOKENIZER(input, max_length=512, padding="max_length", truncation=True, add_special_tokens=True) input_ids = torch.tensor(inputs["input_ids"], dtype=torch.long).to(DEVICE).unsqueeze(0) attention_mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).to(DEVICE).unsqueeze(0) outputs = MODEL.generate(input_ids=input_ids, attention_mask=attention_mask) predicted_answer = TOKENIZER.decode(outputs.flatten(), skip_special_tokens=True) ref_answer = ref_answer.lower() return {"pred":predicted_answer.lower(), "ref":ref_answer.lower(),"language":language} def predict_and_save(val_data,lang): predictions = list() for i in tqdm(range(len(val_data)),desc="predicting"): pred=predict_answer(val_data[i]["input"],val_data[i]["answer"],lang) predictions.append(pred) return predictions #Predict pred_slovak = predict_and_save(data_slovak,"sk") #pred_english = predict_and_save(data_english,"en") #pred_polish = predict_and_save(data_polish,"pl") #predictions = pred_slovak + pred_english + pred_polish #Save the results for later import json with open('predictions-t5.json', 'w') as json_file: json.dump(predictions, json_file) #Compute metrics import json with open("predictions-t5.json","r") as json_file: data = json.load(json_file) new_data = list() language="sk" for item in data: if item["language"]==language: new_data.append(item) bleu = list() rouges = list() precisions=list() recalls=list() f1s=list() for item in tqdm(new_data,desc="Evaluating"): bleu.append(compute_bleu(item["pred"],item["ref"])) rouges.append(prediction_rouge(item["pred"],item["ref"])) precision, recall, f1 =classic_metrics(item["pred"],item["ref"]) precisions.append(precision) recalls.append(recall) f1s.append(f1) #COMPUTATION OF METRICS rouge1_values = [rouge['rouge1'] for rouge in rouges] rouge2_values = [rouge['rouge2'] for rouge in rouges] rougeL_values = [rouge['rougeL'] for rouge in rouges] average_rouge1 = sum(rouge1_values) / len(rouges) average_rouge2 = sum(rouge2_values) / len(rouges) average_rougeL = sum(rougeL_values) / len(rouges) print("Model name :",model_name) print("Language :",language) print("BLEU: ",sum(bleu)/len(bleu)) print("Recall :",sum(recalls)/len(recalls)) print("F1 : ",sum(f1s)/len(f1s)) print("Precision :",sum(precisions)/len(precisions)) print("Rouge-1 :",average_rouge1) print("Rouge-2 :",average_rouge2) print("Rouge-L :",average_rougeL)