160 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			160 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| ## IMPORT NESSESARY EQUIPMENTS
 | |
| from transformers import T5ForConditionalGeneration, T5Tokenizer,AutoTokenizer
 | |
| import torch
 | |
| import evaluate  # Bleu
 | |
| import json
 | |
| import random
 | |
| import statistics
 | |
| from sklearn.metrics import precision_score, recall_score, f1_score
 | |
| ## TURN WARNINGS OFF
 | |
| import warnings
 | |
| warnings.filterwarnings("ignore")
 | |
| ##13/03/23 added 
 | |
| from rouge import Rouge
 | |
| from tqdm import tqdm
 | |
| from datasets import load_dataset
 | |
| import re
 | |
| ##CUSTOM ROUGE METRIC - NEW TODO:
 | |
| 
 | |
| 
 | |
| # Názov modelu
 | |
| DEVICE ='cuda:0'
 | |
| 
 | |
| 
 | |
| #T5 MODEL
 | |
| #model_name = 'T5_SK_model'
 | |
| #model_dir = "/home/omasta/T5_JUPYTER/qa_model"
 | |
| #tokenizer_dir = "/home/omasta/T5_JUPYTER/qa_tokenizer"
 | |
| 
 | |
| #mT5 SMALL MODEL
 | |
| model_name = 'qa_model'
 | |
| model_dir = '/home/omasta/T5_JUPYTER/qa_model_mT5_polish'
 | |
| tokenizer_dir = '/home/omasta/T5_JUPYTER/qa_tokenizer_mT5_polish'
 | |
| 
 | |
| #Načítanie modelu z adresára
 | |
| MODEL = T5ForConditionalGeneration.from_pretrained(model_dir, from_tf=False, return_dict=True).to(DEVICE)
 | |
| print("Model succesfully loaded!")
 | |
| TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_dir, use_fast=True)
 | |
| print("Tokenizer succesfully loaded!")
 | |
| Q_LEN = 512
 | |
| TOKENIZER.add_tokens('<sep>')
 | |
| MODEL.resize_token_embeddings(len(TOKENIZER))
 | |
| 
 | |
| def nahradit_znaky(retezec):
 | |
|     novy_retezec = retezec.replace('[', ' ').replace(']', ' ')
 | |
|     return novy_retezec
 | |
| 
 | |
| 
 | |
| def predict_answer(data, ref_answer=None,random=None):
 | |
|     predictions=[]
 | |
|     for i in tqdm(data,desc="predicting"):
 | |
|         inputs = TOKENIZER(i['input'], max_length=Q_LEN, padding="max_length", truncation=True, add_special_tokens=True)
 | |
|         input_ids = torch.tensor(inputs["input_ids"], dtype=torch.long).to(DEVICE).unsqueeze(0)
 | |
|         attention_mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).to(DEVICE).unsqueeze(0)
 | |
|         outputs = MODEL.generate(input_ids=input_ids, attention_mask=attention_mask)
 | |
|         predicted_answer = TOKENIZER.decode(outputs.flatten(), skip_special_tokens=True)
 | |
|         ref_answer = i['answer'].lower()
 | |
|         #print(ref_answer)
 | |
|         if ref_answer:
 | |
|             # Load the Bleu metric
 | |
|             #bleu = evaluate.load("google_bleu")
 | |
|             #print('debug')
 | |
|             #precision = list(precision_score(ref_answer, predicted_answer))
 | |
|             #recall = list(recall_score(ref_answer, predicted_answer))
 | |
|             #f1 = list(f1_score(ref_answer, predicted_answer))
 | |
|             #score = bleu.compute(predictions=[predicted_answer],
 | |
|             #               	 references=[ref_answer])
 | |
|             predictions.append({'prediction':predicted_answer,'ref_answer':ref_answer})
 | |
|     return predictions
 | |
| 
 | |
| def prepare_data(data):
 | |
|     articles = []
 | |
|     for article in data["data"]:
 | |
|         for paragraph in article["paragraphs"]:
 | |
|             for qa in paragraph["qas"]:
 | |
|                 question = qa["question"]
 | |
|                 answer = qa["answers"][0]["text"]
 | |
|                 inputs = {"input": paragraph["context"]+ "<sep>" + question, "answer": answer}
 | |
|                 articles.append(inputs)
 | |
|     return articles
 | |
| 
 | |
| def prepare_polish_data(data):
 | |
|     arcs = list()
 | |
|     for i in range(len(data)):
 | |
|         questions=data[i]["question"]
 | |
|         try:
 | |
|             answer = nahradit_znaky(', '.join(data[i]["answers"]["text"]))
 | |
|         except KeyError:
 | |
|             continue
 | |
|         context = data[i]["context"]
 | |
|         inputs = {"input":context+"<sep>"+questions,"answer":answer}
 | |
|         arcs.append(inputs)
 | |
|     return arcs
 | |
| 
 | |
| 
 | |
| #dataset = load_dataset("clarin-pl/poquad")
 | |
| dataset = load_dataset("squad_v2")
 | |
| dev_data = prepare_polish_data(dataset["validation"])
 | |
| 
 | |
| #print('data prepared')
 | |
| print(f'Number of dev samples {len(dev_data)}')
 | |
| #print(dev_data[0])
 | |
| bleu_score = []
 | |
| precisions=[]
 | |
| f1_scores=[]
 | |
| recall_scores=[]
 | |
| rouge_1 = []
 | |
| rouge_2 = []
 | |
| #X = 150
 | |
| evaluate = predict_answer(dev_data)
 | |
| rouge = Rouge()
 | |
| for item in tqdm(evaluate,desc="evaluating"):
 | |
|     try:
 | |
|         scores = rouge.get_scores(item['prediction'], item['ref_answer'])
 | |
|         precision=precision_score(list(item['ref_answer']), list(item['prediction']),average='macro')
 | |
|         recall=recall_score(list(item['ref_answer']), list(item['prediction']),average='macro')
 | |
|         f1=f1_score(list(item['ref_answer']), list(item['prediction']),average='macro')
 | |
|     except ValueError:
 | |
|         precision=0
 | |
|         recall=0
 | |
|         f1=0
 | |
|     precisions.append(precision)
 | |
|     f1_scores.append(f1)
 | |
|     recall_scores.append(recall)
 | |
| 
 | |
| 
 | |
| def rouge_eval(dict_x):
 | |
|     rouge = Rouge()
 | |
|     rouge_scores=[]
 | |
|     for item in dict_x:
 | |
|         if item['prediction'] and item['ref_answer']:
 | |
|             rouge_score =  rouge.get_scores(item['prediction'], item['ref_answer'])
 | |
|             rouge_scores.append(rouge_score)
 | |
|         else:
 | |
|             continue
 | |
|         return rouge_scores
 | |
| 
 | |
| 
 | |
| print(f'VYHODNOTENIE  VYSLEDKOV : ------------------------')
 | |
| #print(evaluate)
 | |
| #bleu_score_total = statistics.mean(bleu_score)
 | |
| recall_score_total= statistics.mean(recall_scores)
 | |
| f1_score_total = statistics.mean(f1_scores)
 | |
| precision_total = statistics.mean(precisions)
 | |
| #print(f'Bleu_score of model {model_name} : ',bleu_score_total)
 | |
| print(f'Recall of model {model_name}: ',recall_score_total)
 | |
| print(f'F1 of model {model_name} : ', f1_score_total)
 | |
| print(f'Precision of model {model_name}: :',precision_total)
 | |
| print(model_dir)
 | |
| print(rouge_eval(evaluate))
 | |
| print(f'{model_name} results')
 | |
| rouge_scores = rouge_eval(evaluate)
 | |
| rouge_values = [score[0]['rouge-1']['f'] for score in rouge_scores]
 | |
| mean_rouge_score = statistics.mean(rouge_values)
 | |
| print(f'Rouge mean score:{mean_rouge_score}')
 | |
| 
 | |
| rouge2_values = [score[0]['rouge-2']['f'] for score in rouge_scores]
 | |
| mean_rouge_score =statistics.mean(rouge2_values)
 | |
| print(f'Rouge-2 mean score:{mean_rouge_score}')
 | |
| 
 |