165 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			165 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
 | 
						|
 | 
						|
 | 
						|
## IMPORT NESSESARY EQUIPMENTS
 | 
						|
from transformers import T5ForConditionalGeneration, T5Tokenizer,AutoTokenizer
 | 
						|
import torch
 | 
						|
#import evaluate  # Bleu
 | 
						|
import json
 | 
						|
import random
 | 
						|
import statistics
 | 
						|
from sklearn.metrics import precision_score, recall_score, f1_score
 | 
						|
import warnings
 | 
						|
from tqdm import tqdm
 | 
						|
from datasets import load_dataset
 | 
						|
import evaluate
 | 
						|
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
 | 
						|
from sklearn.metrics import precision_score, recall_score, f1_score
 | 
						|
from sklearn.feature_extraction.text import CountVectorizer
 | 
						|
rouge = evaluate.load('rouge')
 | 
						|
warnings.filterwarnings("ignore")
 | 
						|
 | 
						|
DEVICE ='cuda:0'
 | 
						|
 | 
						|
#Prepare data first
 | 
						|
def prepare_data_english(data):
 | 
						|
    articles = []
 | 
						|
    for item in tqdm(data["validation"],desc="Preparing validation datas"):
 | 
						|
        context = item["context"]
 | 
						|
        question = item["question"]
 | 
						|
        try:
 | 
						|
            start_position = item['answers']['answer_start'][0]
 | 
						|
        except IndexError:
 | 
						|
            continue
 | 
						|
        text_length = len(item['answers']['text'][0])
 | 
						|
        target_text = context[start_position : start_position + text_length]
 | 
						|
        inputs = {"input": context+'<sep>'+question, "answer": target_text}
 | 
						|
        articles.append(inputs)
 | 
						|
    return articles
 | 
						|
 | 
						|
#Load the pretrained model
 | 
						|
 | 
						|
model_name = 'qa_model_T5-slovak'
 | 
						|
model_dir = '/home/omasta/T5_JUPYTER/qa_model'
 | 
						|
tokenizer_dir = '/home/omasta/T5_JUPYTER/qa_tokenizer'
 | 
						|
MODEL = T5ForConditionalGeneration.from_pretrained(model_dir, from_tf=False, return_dict=True).to(DEVICE)
 | 
						|
print("Model succesfully loaded!")
 | 
						|
TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_dir, use_fast=True)
 | 
						|
print("Tokenizer succesfully loaded!")
 | 
						|
Q_LEN = 512
 | 
						|
TOKENIZER.add_tokens('<sep>')
 | 
						|
MODEL.resize_token_embeddings(len(TOKENIZER))
 | 
						|
 | 
						|
#Load datasets
 | 
						|
#dataset_english = load_dataset("squad_v2")
 | 
						|
dataset_slovak = load_dataset("TUKE-DeutscheTelekom/skquad")
 | 
						|
#dataset_polish = load_dataset("clarin-pl/poquad")
 | 
						|
 | 
						|
#Prepare datas
 | 
						|
#data_english = prepare_data_english(dataset_english)
 | 
						|
#data_polish = prepare_data_english(dataset_polish)
 | 
						|
data_slovak = prepare_data_english(dataset_slovak)
 | 
						|
#Merge datasets
 | 
						|
#val_data = data_slovak + data_english + data_polish
 | 
						|
print("Val Samples : ",len(data_slovak))
 | 
						|
 | 
						|
 | 
						|
def prediction_rouge(predictions, references):
 | 
						|
    return rouge.compute(predictions=[predictions], references=[[references]])
 | 
						|
 | 
						|
def compute_bleu(reference, prediction):
 | 
						|
    smoothie = SmoothingFunction().method4
 | 
						|
    return sentence_bleu([reference.split()],prediction.split(),smoothing_function=smoothie)
 | 
						|
 | 
						|
def classic_metrics(sentence1, sentence2):
 | 
						|
    if sentence1 == "" and sentence2 == "":
 | 
						|
        return 0,0,0
 | 
						|
    else:
 | 
						|
        # Vytvorenie "bag of words"
 | 
						|
        vectorizer = CountVectorizer()
 | 
						|
        try:
 | 
						|
            bag_of_words = vectorizer.fit_transform([sentence1, sentence2])
 | 
						|
        except ValueError:
 | 
						|
            return 0,0,0
 | 
						|
        # Získanie vektorov pre vety
 | 
						|
        vector1 = bag_of_words.toarray()[0]
 | 
						|
        vector2 = bag_of_words.toarray()[1]
 | 
						|
 | 
						|
        # Výpočet metrík
 | 
						|
        precision = precision_score(vector1, vector2, average='weighted')
 | 
						|
        recall = recall_score(vector1, vector2, average='weighted')
 | 
						|
        f1 = f1_score(vector1, vector2, average='weighted')
 | 
						|
        return float(precision), float(recall), float(f1)
 | 
						|
 | 
						|
def predict_answer(input,ref_answer,language):
 | 
						|
    inputs = TOKENIZER(input, max_length=512, padding="max_length", truncation=True, add_special_tokens=True)
 | 
						|
    input_ids = torch.tensor(inputs["input_ids"], dtype=torch.long).to(DEVICE).unsqueeze(0)
 | 
						|
    attention_mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).to(DEVICE).unsqueeze(0)
 | 
						|
    outputs = MODEL.generate(input_ids=input_ids, attention_mask=attention_mask)
 | 
						|
    predicted_answer = TOKENIZER.decode(outputs.flatten(), skip_special_tokens=True)
 | 
						|
    ref_answer = ref_answer.lower()
 | 
						|
    return {"pred":predicted_answer.lower(), "ref":ref_answer.lower(),"language":language} 
 | 
						|
 | 
						|
def predict_and_save(val_data,lang):
 | 
						|
    predictions = list()
 | 
						|
    for i in tqdm(range(len(val_data)),desc="predicting"):
 | 
						|
        pred=predict_answer(val_data[i]["input"],val_data[i]["answer"],lang)
 | 
						|
        predictions.append(pred)
 | 
						|
    return predictions
 | 
						|
#Predict
 | 
						|
pred_slovak = predict_and_save(data_slovak,"sk")
 | 
						|
#pred_english = predict_and_save(data_english,"en")
 | 
						|
#pred_polish = predict_and_save(data_polish,"pl")
 | 
						|
 | 
						|
#predictions = pred_slovak + pred_english + pred_polish
 | 
						|
 | 
						|
 | 
						|
#Save the results for later
 | 
						|
import json
 | 
						|
with open('predictions-t5.json', 'w') as json_file:
 | 
						|
    json.dump(predictions, json_file)
 | 
						|
 | 
						|
 | 
						|
#Compute metrics
 | 
						|
import json
 | 
						|
with open("predictions-t5.json","r") as json_file:
 | 
						|
    data = json.load(json_file)
 | 
						|
 | 
						|
new_data = list()
 | 
						|
language="sk"
 | 
						|
for item in data:
 | 
						|
    if item["language"]==language:
 | 
						|
        new_data.append(item)
 | 
						|
 | 
						|
bleu = list()
 | 
						|
rouges = list()
 | 
						|
precisions=list()
 | 
						|
recalls=list()
 | 
						|
f1s=list()
 | 
						|
 | 
						|
for item in tqdm(new_data,desc="Evaluating"):
 | 
						|
   bleu.append(compute_bleu(item["pred"],item["ref"]))
 | 
						|
   rouges.append(prediction_rouge(item["pred"],item["ref"]))
 | 
						|
   precision, recall, f1 =classic_metrics(item["pred"],item["ref"])
 | 
						|
   precisions.append(precision)
 | 
						|
   recalls.append(recall)
 | 
						|
   f1s.append(f1)
 | 
						|
#COMPUTATION OF METRICS
 | 
						|
rouge1_values = [rouge['rouge1'] for rouge  in rouges]
 | 
						|
rouge2_values = [rouge['rouge2'] for rouge  in rouges]
 | 
						|
rougeL_values = [rouge['rougeL'] for rouge  in rouges]
 | 
						|
 | 
						|
average_rouge1 = sum(rouge1_values) / len(rouges)
 | 
						|
average_rouge2 = sum(rouge2_values) / len(rouges)
 | 
						|
average_rougeL = sum(rougeL_values) / len(rouges)
 | 
						|
print("Model name :",model_name)
 | 
						|
print("Language :",language)
 | 
						|
print("BLEU: ",sum(bleu)/len(bleu))
 | 
						|
print("Recall :",sum(recalls)/len(recalls))
 | 
						|
print("F1 : ",sum(f1s)/len(f1s))
 | 
						|
print("Precision :",sum(precisions)/len(precisions))
 | 
						|
print("Rouge-1 :",average_rouge1)
 | 
						|
print("Rouge-2 :",average_rouge2)
 | 
						|
print("Rouge-L :",average_rougeL)
 | 
						|
 |