160 lines
5.5 KiB
Python
160 lines
5.5 KiB
Python
## IMPORT NESSESARY EQUIPMENTS
|
|
from transformers import T5ForConditionalGeneration, T5Tokenizer,AutoTokenizer
|
|
import torch
|
|
import evaluate # Bleu
|
|
import json
|
|
import random
|
|
import statistics
|
|
from sklearn.metrics import precision_score, recall_score, f1_score
|
|
## TURN WARNINGS OFF
|
|
import warnings
|
|
warnings.filterwarnings("ignore")
|
|
##13/03/23 added
|
|
from rouge import Rouge
|
|
from tqdm import tqdm
|
|
from datasets import load_dataset
|
|
import re
|
|
##CUSTOM ROUGE METRIC - NEW TODO:
|
|
|
|
|
|
# Názov modelu
|
|
DEVICE ='cuda:0'
|
|
|
|
|
|
#T5 MODEL
|
|
#model_name = 'T5_SK_model'
|
|
#model_dir = "/home/omasta/T5_JUPYTER/qa_model"
|
|
#tokenizer_dir = "/home/omasta/T5_JUPYTER/qa_tokenizer"
|
|
|
|
#mT5 SMALL MODEL
|
|
model_name = 'qa_model'
|
|
model_dir = '/home/omasta/T5_JUPYTER/qa_model_mT5_polish'
|
|
tokenizer_dir = '/home/omasta/T5_JUPYTER/qa_tokenizer_mT5_polish'
|
|
|
|
#Načítanie modelu z adresára
|
|
MODEL = T5ForConditionalGeneration.from_pretrained(model_dir, from_tf=False, return_dict=True).to(DEVICE)
|
|
print("Model succesfully loaded!")
|
|
TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_dir, use_fast=True)
|
|
print("Tokenizer succesfully loaded!")
|
|
Q_LEN = 512
|
|
TOKENIZER.add_tokens('<sep>')
|
|
MODEL.resize_token_embeddings(len(TOKENIZER))
|
|
|
|
def nahradit_znaky(retezec):
|
|
novy_retezec = retezec.replace('[', ' ').replace(']', ' ')
|
|
return novy_retezec
|
|
|
|
|
|
def predict_answer(data, ref_answer=None,random=None):
|
|
predictions=[]
|
|
for i in tqdm(data,desc="predicting"):
|
|
inputs = TOKENIZER(i['input'], max_length=Q_LEN, padding="max_length", truncation=True, add_special_tokens=True)
|
|
input_ids = torch.tensor(inputs["input_ids"], dtype=torch.long).to(DEVICE).unsqueeze(0)
|
|
attention_mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).to(DEVICE).unsqueeze(0)
|
|
outputs = MODEL.generate(input_ids=input_ids, attention_mask=attention_mask)
|
|
predicted_answer = TOKENIZER.decode(outputs.flatten(), skip_special_tokens=True)
|
|
ref_answer = i['answer'].lower()
|
|
#print(ref_answer)
|
|
if ref_answer:
|
|
# Load the Bleu metric
|
|
#bleu = evaluate.load("google_bleu")
|
|
#print('debug')
|
|
#precision = list(precision_score(ref_answer, predicted_answer))
|
|
#recall = list(recall_score(ref_answer, predicted_answer))
|
|
#f1 = list(f1_score(ref_answer, predicted_answer))
|
|
#score = bleu.compute(predictions=[predicted_answer],
|
|
# references=[ref_answer])
|
|
predictions.append({'prediction':predicted_answer,'ref_answer':ref_answer})
|
|
return predictions
|
|
|
|
def prepare_data(data):
|
|
articles = []
|
|
for article in data["data"]:
|
|
for paragraph in article["paragraphs"]:
|
|
for qa in paragraph["qas"]:
|
|
question = qa["question"]
|
|
answer = qa["answers"][0]["text"]
|
|
inputs = {"input": paragraph["context"]+ "<sep>" + question, "answer": answer}
|
|
articles.append(inputs)
|
|
return articles
|
|
|
|
def prepare_polish_data(data):
|
|
arcs = list()
|
|
for i in range(len(data)):
|
|
questions=data[i]["question"]
|
|
try:
|
|
answer = nahradit_znaky(', '.join(data[i]["answers"]["text"]))
|
|
except KeyError:
|
|
continue
|
|
context = data[i]["context"]
|
|
inputs = {"input":context+"<sep>"+questions,"answer":answer}
|
|
arcs.append(inputs)
|
|
return arcs
|
|
|
|
|
|
#dataset = load_dataset("clarin-pl/poquad")
|
|
dataset = load_dataset("squad_v2")
|
|
dev_data = prepare_polish_data(dataset["validation"])
|
|
|
|
#print('data prepared')
|
|
print(f'Number of dev samples {len(dev_data)}')
|
|
#print(dev_data[0])
|
|
bleu_score = []
|
|
precisions=[]
|
|
f1_scores=[]
|
|
recall_scores=[]
|
|
rouge_1 = []
|
|
rouge_2 = []
|
|
#X = 150
|
|
evaluate = predict_answer(dev_data)
|
|
rouge = Rouge()
|
|
for item in tqdm(evaluate,desc="evaluating"):
|
|
try:
|
|
scores = rouge.get_scores(item['prediction'], item['ref_answer'])
|
|
precision=precision_score(list(item['ref_answer']), list(item['prediction']),average='macro')
|
|
recall=recall_score(list(item['ref_answer']), list(item['prediction']),average='macro')
|
|
f1=f1_score(list(item['ref_answer']), list(item['prediction']),average='macro')
|
|
except ValueError:
|
|
precision=0
|
|
recall=0
|
|
f1=0
|
|
precisions.append(precision)
|
|
f1_scores.append(f1)
|
|
recall_scores.append(recall)
|
|
|
|
|
|
def rouge_eval(dict_x):
|
|
rouge = Rouge()
|
|
rouge_scores=[]
|
|
for item in dict_x:
|
|
if item['prediction'] and item['ref_answer']:
|
|
rouge_score = rouge.get_scores(item['prediction'], item['ref_answer'])
|
|
rouge_scores.append(rouge_score)
|
|
else:
|
|
continue
|
|
return rouge_scores
|
|
|
|
|
|
print(f'VYHODNOTENIE VYSLEDKOV : ------------------------')
|
|
#print(evaluate)
|
|
#bleu_score_total = statistics.mean(bleu_score)
|
|
recall_score_total= statistics.mean(recall_scores)
|
|
f1_score_total = statistics.mean(f1_scores)
|
|
precision_total = statistics.mean(precisions)
|
|
#print(f'Bleu_score of model {model_name} : ',bleu_score_total)
|
|
print(f'Recall of model {model_name}: ',recall_score_total)
|
|
print(f'F1 of model {model_name} : ', f1_score_total)
|
|
print(f'Precision of model {model_name}: :',precision_total)
|
|
print(model_dir)
|
|
print(rouge_eval(evaluate))
|
|
print(f'{model_name} results')
|
|
rouge_scores = rouge_eval(evaluate)
|
|
rouge_values = [score[0]['rouge-1']['f'] for score in rouge_scores]
|
|
mean_rouge_score = statistics.mean(rouge_values)
|
|
print(f'Rouge mean score:{mean_rouge_score}')
|
|
|
|
rouge2_values = [score[0]['rouge-2']['f'] for score in rouge_scores]
|
|
mean_rouge_score =statistics.mean(rouge2_values)
|
|
print(f'Rouge-2 mean score:{mean_rouge_score}')
|
|
|