Compare commits

..

No commits in common. "UPDATE_2024" and "master" have entirely different histories.

12 changed files with 40 additions and 648 deletions

5
.env
View File

@ -1,5 +0,0 @@
URL="http://backend_inference:8000/predict"
PORT="8090"
HOST="localhost"
QA_MODEL="qa_model"
QA_TOKENIZER="qa_tokenizer"

View File

@ -1,55 +0,0 @@
FROM python:3.10-slim-bullseye AS base
WORKDIR /app
# Set utf-8 encoding for Python et al
ENV LANG=C.UTF-8 \
# Turn off writing .pyc files
PYTHONDONTWRITEBYTECODE=1 \
# Reduce the OS system calls for this tool it makes a difference
PYTHONUNBUFFERED=1 \
# Disables cache dir in pip
PIP_NO_CACHE_DIR=1 \
# Virtual environment
VENV="/opt/venv" \
# Add new user
APPUSER=appuser \
# Ensure that the python and pip executables used in the image
PATH="${VENV}/bin:$PATH"
FROM base as builder
COPY requirements.txt .
RUN apt-get update \
&& apt-get install -y git build-essential
RUN python -m venv ${VENV} \
&& . ${VENV}/bin/activate \
&& pip install --upgrade pip \
&& pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu \
&& pip install -r requirements.txt
FROM base as runner
COPY api.py .
COPY --from=builder ${VENV} ${VENV}
ENV PATH="${VENV}/bin:$PATH"
# Update permissions & change user to not run as root
RUN chgrp -R 0 /app \
&& chmod -R g=u /app \
&& groupadd -r ${APPUSER} \
&& useradd -r -g ${APPUSER} ${APPUSER} \
&& chown -R ${APPUSER}:${APPUSER} /app \
&& usermod -d /app ${APPUSER}
CMD ["/opt/venv/bin/uvicorn", "api:app", "--host", "0.0.0.0"]

View File

@ -1,57 +0,0 @@
import torch
import uvicorn
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import MT5Tokenizer,AutoTokenizer, AutoModel ,T5ForConditionalGeneration
import warnings
import json
import random
import torch.nn.functional as F
import os
from dotenv import load_dotenv
#from ece import compute_ECE
from torch.utils.data import DataLoader
from functools import reduce
warnings.filterwarnings("ignore")
DEVICE ='cpu'
load_dotenv()
host = os.getenv("HOST")
port = os.getenv("PORT")
model_dir = os.getenv("QA_MODEL")
#model_dir = "C:/Users/david/Desktop/T5_JUPYTER/qa_model"
tokenizer_dir = os.getenv("QA_TOKENIZER")
#tokenizer_dir = "C:/Users/david/Desktop/T5_JUPYTER/qa_tokenizer"
MODEL = T5ForConditionalGeneration.from_pretrained(model_dir, from_tf=False, return_dict=True).to(DEVICE)
print("Model succesfully loaded!")
TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_dir, use_fast=True)
print("Tokenizer succesfully loaded!")
Q_LEN = 512
TOKENIZER.add_tokens('<sep>')
print('model loaded')
app = FastAPI()
# BASE MODEL
class InputData(BaseModel):
context: str
question: str
@app.post("/predict")
async def predict(input_data: InputData):
inputs = TOKENIZER(input_data.question, input_data.context, max_length=512, padding="max_length", truncation=True, add_special_tokens=True)
input_ids = torch.tensor(inputs["input_ids"], dtype=torch.long).to(DEVICE).unsqueeze(0)
attention_mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).to(DEVICE).unsqueeze(0)
outputs = MODEL.generate(input_ids=input_ids, attention_mask=attention_mask, return_dict_in_generate=True,output_scores=True,max_length=512)
predicted_ids = outputs.sequences.numpy()
predicted_text = TOKENIZER.decode(predicted_ids[0], skip_special_tokens=True)
return {'prediction':predicted_text}
if __name__ == "__main__":
uvicorn.run(app, host=host, port=port)

View File

@ -1,5 +0,0 @@
uvicorn==0.23.2
fastapi==0.103.2
transformers==4.34.0
rank_bm25==0.2.2
python-dotenv

View File

@ -1,51 +0,0 @@
FROM python:3.10-slim-bullseye AS base
WORKDIR /app
# Set utf-8 encoding for Python et al
ENV LANG=C.UTF-8 \
# Turn off writing .pyc files
PYTHONDONTWRITEBYTECODE=1 \
# Reduce the OS system calls for this tool it makes a difference
PYTHONUNBUFFERED=1 \
# Disables cache dir in pip
PIP_NO_CACHE_DIR=1 \
# Virtual environment
VENV="/opt/venv" \
# Add new user
APPUSER=appuser \
# Ensure that the python and pip executables used in the image
PATH="${VENV}/bin:$PATH"
FROM base as builder
COPY requirements.txt .
RUN apt-get update \
&& python -m venv ${VENV} \
&& . ${VENV}/bin/activate \
&& pip install --upgrade pip \
&& pip install -r requirements.txt
FROM base as runner
COPY aplication.py .
COPY --from=builder ${VENV} ${VENV}
ENV PATH="${VENV}/bin:$PATH"
# Update permissions & change user to not run as root
RUN chgrp -R 0 /app \
&& chmod -R g=u /app \
&& groupadd -r ${APPUSER} \
&& useradd -r -g ${APPUSER} ${APPUSER} \
&& chown -R ${APPUSER}:${APPUSER} /app \
&& usermod -d /app ${APPUSER}
#$HEALTHCHECK CMD curl --fail http://localhost/_stcore/health
CMD ["streamlit", "run", "aplication.py", "--server.address=0.0.0.0"]

View File

@ -1,36 +0,0 @@
import requests
import json
import streamlit as st
import os
from dotenv import load_dotenv
load_dotenv()
def predict(context,question):
url = os.getenv("URL")
#url = 'http://localhost:8090/predict'
data = {'context': context,'question': question}
json_data = json.dumps(data)
headers = {'Content-type': 'application/json'}
response = requests.post(url, data=json_data, headers=headers)
result = response.json()
return result
def main():
st.title("T5 model inference")
# Vytvoríme polia pre zadanie hodnôt
context = st.text_input("context:")
question = st.text_input("question:")
prediction = predict(context,question)
# Vytvoríme tlačidlo pre vykonanie akcie
if st.button("Execute"):
st.json({
'context': context,
'question': question,
'prediciton':prediction
})
if __name__ == "__main__":
main()

View File

@ -1,3 +0,0 @@
requests
streamlit
python-dotenv

View File

@ -1,34 +0,0 @@
version: '3.3'
services:
backend:
#build: ./backend
image: backend:test
container_name: backend_inference
ports:
- 8090:8090
networks:
- semantic #dopis svoj nazov taskov
volumes:
- ./.env:/app/.env
- ./qa_model:/app/qa_model
- ./qa_tokenizer:/app/qa_tokenizer
restart: always
frontend:
#build: ./frontend
image: streamlit:dev
container_name: streamlit
ports:
- 8501:8501
depends_on:
- backend
links:
- backend
networks:
- semantic
restart: always
volumes:
- ./.env:/app/.env
networks:
semantic:

View File

@ -1,163 +0,0 @@
import torch
import json
from tqdm import tqdm
import torch.nn as nn
from torch.optim import Adam
import nltk
import string
from torch.utils.data import Dataset, DataLoader, RandomSampler
import pandas as pd
import numpy as np
import transformers
#from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, T5TokenizerFast
from transformers import AutoTokenizer, T5ForConditionalGeneration
import warnings
from sklearn.model_selection import train_test_split
warnings.filterwarnings("ignore")
print("Imports succesfully done")
DEVICE ='cuda:0'
TOKENIZER=AutoTokenizer.from_pretrained('google/umt5-small')
TOKENIZER.add_tokens('<sep>')
MODEL = T5ForConditionalGeneration.from_pretrained("google/mt5-small").to(DEVICE)
#pridam token
MODEL.resize_token_embeddings(len(TOKENIZER))
#lr = learning rate = 10-5
OPTIMIZER = Adam(MODEL.parameters(), lr=0.00001)
Q_LEN = 256 # Question Length
T_LEN = 32 # Target Length
BATCH_SIZE = 4 #dávka dát
print("Model succesfully loaded")
from datasets import load_dataset
dataset_english = load_dataset("squad_v2")
dataset_slovak = load_dataset("TUKE-DeutscheTelekom/skquad")
dataset_polish = load_dataset("clarin-pl/poquad")
def prepare_data_english(data):
articles = []
for item in tqdm(data["train"],desc="Preparing training datas"):
context = item["context"]
question = item["question"]
try:
start_position = item['answers']['answer_start'][0]
except IndexError:
continue
text_length = len(item['answers']['text'][0])
target_text = context[start_position : start_position + text_length]
inputs = {"input": context+'<sep>'+question, "answer": target_text}
articles.append(inputs)
return articles
data_english = prepare_data_english(dataset_english)
data_polish = prepare_data_english(dataset_polish)
data_slovak = prepare_data_english(dataset_slovak)
train_data = data_slovak + data_english + data_polish
print("Training Samples : ",len(train_data))
#Dataframe
data = pd.DataFrame(train_data)
class QA_Dataset(Dataset):
def __init__(self, tokenizer, dataframe, q_len, t_len):
self.tokenizer = tokenizer
self.q_len = q_len
self.t_len = t_len
self.data = dataframe
self.input = self.data['input']
#self.context = self.data["context"]
self.answer = self.data['answer']
def __len__(self):
return len(self.questions)
def __getitem__(self, idx):
input = self.input[idx]
answer = self.answer[idx]
input_tokenized = self.tokenizer(input, max_length=self.q_len, padding="max_length",
truncation=True, pad_to_max_length=True, add_special_tokens=True)
answer_tokenized = self.tokenizer(answer, max_length=self.t_len, padding="max_length",
truncation=True, pad_to_max_length=True, add_special_tokens=True)
labels = torch.tensor(answer_tokenized["input_ids"], dtype=torch.long)
labels[labels == 0] = -100
return {
"input_ids": torch.tensor(input_tokenized["input_ids"], dtype=torch.long),
"attention_mask": torch.tensor(input_tokenized["attention_mask"], dtype=torch.long),
"labels": labels,
"decoder_attention_mask": torch.tensor(answer_tokenized["attention_mask"], dtype=torch.long)
}
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)
train_sampler = RandomSampler(train_data.index)
val_sampler = RandomSampler(val_data.index)
qa_dataset = QA_Dataset(TOKENIZER, data, Q_LEN, T_LEN)
train_loader = DataLoader(qa_dataset, batch_size=BATCH_SIZE, sampler=train_sampler)
val_loader = DataLoader(qa_dataset, batch_size=BATCH_SIZE, sampler=val_sampler)
print("Loaders working fine")
### TRAINING (46MINS ACCORDING THE V1_DATA)
train_loss = 0
val_loss = 0
train_batch_count = 0
val_batch_count = 0
#TODO
# Make a great epochs number
# Evaluate results and find out how to calculate a real rouge metric
for epoch in range(2):
MODEL.train()
for batch in tqdm(train_loader, desc="Training batches"):
input_ids = batch["input_ids"].to(DEVICE)
attention_mask = batch["attention_mask"].to(DEVICE)
labels = batch["labels"].to(DEVICE)
decoder_attention_mask = batch["decoder_attention_mask"].to(DEVICE)
outputs = MODEL(
input_ids=input_ids,
attention_mask=attention_mask,
labels=labels,
decoder_attention_mask=decoder_attention_mask
)
OPTIMIZER.zero_grad()
outputs.loss.backward()
OPTIMIZER.step()
train_loss += outputs.loss.item()
train_batch_count += 1
#Evaluation
MODEL.eval()
for batch in tqdm(val_loader, desc="Validation batches"):
input_ids = batch["input_ids"].to(DEVICE)
attention_mask = batch["attention_mask"].to(DEVICE)
labels = batch["labels"].to(DEVICE)
decoder_attention_mask = batch["decoder_attention_mask"].to(DEVICE)
outputs = MODEL(
input_ids=input_ids,
attention_mask=attention_mask,
labels=labels,
decoder_attention_mask=decoder_attention_mask
)
OPTIMIZER.zero_grad()
outputs.loss.backward()
OPTIMIZER.step()
val_loss += outputs.loss.item()
val_batch_count += 1
print(f"{epoch+1}/{2} -> Train loss: {train_loss / train_batch_count}\tValidation loss: {val_loss/val_batch_count}")
print("Training done succesfully")
## SAVE FINE_TUNED MODEL
MODEL.save_pretrained("qa_model_umT5_small_3LANG")
TOKENIZER.save_pretrained('qa_tokenizer_umT5_small_3LANG')

View File

@ -1,164 +0,0 @@
## IMPORT NESSESARY EQUIPMENTS
from transformers import T5ForConditionalGeneration, T5Tokenizer,AutoTokenizer
import torch
#import evaluate # Bleu
import json
import random
import statistics
from sklearn.metrics import precision_score, recall_score, f1_score
import warnings
from tqdm import tqdm
from datasets import load_dataset
import evaluate
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.feature_extraction.text import CountVectorizer
rouge = evaluate.load('rouge')
warnings.filterwarnings("ignore")
DEVICE ='cuda:0'
#Prepare data first
def prepare_data_english(data):
articles = []
for item in tqdm(data["validation"],desc="Preparing validation datas"):
context = item["context"]
question = item["question"]
try:
start_position = item['answers']['answer_start'][0]
except IndexError:
continue
text_length = len(item['answers']['text'][0])
target_text = context[start_position : start_position + text_length]
inputs = {"input": context+'<sep>'+question, "answer": target_text}
articles.append(inputs)
return articles
#Load the pretrained model
model_name = 'qa_model_T5-slovak'
model_dir = '/home/omasta/T5_JUPYTER/qa_model'
tokenizer_dir = '/home/omasta/T5_JUPYTER/qa_tokenizer'
MODEL = T5ForConditionalGeneration.from_pretrained(model_dir, from_tf=False, return_dict=True).to(DEVICE)
print("Model succesfully loaded!")
TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_dir, use_fast=True)
print("Tokenizer succesfully loaded!")
Q_LEN = 512
TOKENIZER.add_tokens('<sep>')
MODEL.resize_token_embeddings(len(TOKENIZER))
#Load datasets
#dataset_english = load_dataset("squad_v2")
dataset_slovak = load_dataset("TUKE-DeutscheTelekom/skquad")
#dataset_polish = load_dataset("clarin-pl/poquad")
#Prepare datas
#data_english = prepare_data_english(dataset_english)
#data_polish = prepare_data_english(dataset_polish)
data_slovak = prepare_data_english(dataset_slovak)
#Merge datasets
#val_data = data_slovak + data_english + data_polish
print("Val Samples : ",len(data_slovak))
def prediction_rouge(predictions, references):
return rouge.compute(predictions=[predictions], references=[[references]])
def compute_bleu(reference, prediction):
smoothie = SmoothingFunction().method4
return sentence_bleu([reference.split()],prediction.split(),smoothing_function=smoothie)
def classic_metrics(sentence1, sentence2):
if sentence1 == "" and sentence2 == "":
return 0,0,0
else:
# Vytvorenie "bag of words"
vectorizer = CountVectorizer()
try:
bag_of_words = vectorizer.fit_transform([sentence1, sentence2])
except ValueError:
return 0,0,0
# Získanie vektorov pre vety
vector1 = bag_of_words.toarray()[0]
vector2 = bag_of_words.toarray()[1]
# Výpočet metrík
precision = precision_score(vector1, vector2, average='weighted')
recall = recall_score(vector1, vector2, average='weighted')
f1 = f1_score(vector1, vector2, average='weighted')
return float(precision), float(recall), float(f1)
def predict_answer(input,ref_answer,language):
inputs = TOKENIZER(input, max_length=512, padding="max_length", truncation=True, add_special_tokens=True)
input_ids = torch.tensor(inputs["input_ids"], dtype=torch.long).to(DEVICE).unsqueeze(0)
attention_mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).to(DEVICE).unsqueeze(0)
outputs = MODEL.generate(input_ids=input_ids, attention_mask=attention_mask)
predicted_answer = TOKENIZER.decode(outputs.flatten(), skip_special_tokens=True)
ref_answer = ref_answer.lower()
return {"pred":predicted_answer.lower(), "ref":ref_answer.lower(),"language":language}
def predict_and_save(val_data,lang):
predictions = list()
for i in tqdm(range(len(val_data)),desc="predicting"):
pred=predict_answer(val_data[i]["input"],val_data[i]["answer"],lang)
predictions.append(pred)
return predictions
#Predict
pred_slovak = predict_and_save(data_slovak,"sk")
#pred_english = predict_and_save(data_english,"en")
#pred_polish = predict_and_save(data_polish,"pl")
#predictions = pred_slovak + pred_english + pred_polish
#Save the results for later
import json
with open('predictions-t5.json', 'w') as json_file:
json.dump(predictions, json_file)
#Compute metrics
import json
with open("predictions-t5.json","r") as json_file:
data = json.load(json_file)
new_data = list()
language="sk"
for item in data:
if item["language"]==language:
new_data.append(item)
bleu = list()
rouges = list()
precisions=list()
recalls=list()
f1s=list()
for item in tqdm(new_data,desc="Evaluating"):
bleu.append(compute_bleu(item["pred"],item["ref"]))
rouges.append(prediction_rouge(item["pred"],item["ref"]))
precision, recall, f1 =classic_metrics(item["pred"],item["ref"])
precisions.append(precision)
recalls.append(recall)
f1s.append(f1)
#COMPUTATION OF METRICS
rouge1_values = [rouge['rouge1'] for rouge in rouges]
rouge2_values = [rouge['rouge2'] for rouge in rouges]
rougeL_values = [rouge['rougeL'] for rouge in rouges]
average_rouge1 = sum(rouge1_values) / len(rouges)
average_rouge2 = sum(rouge2_values) / len(rouges)
average_rougeL = sum(rougeL_values) / len(rouges)
print("Model name :",model_name)
print("Language :",language)
print("BLEU: ",sum(bleu)/len(bleu))
print("Recall :",sum(recalls)/len(recalls))
print("F1 : ",sum(f1s)/len(f1s))
print("Precision :",sum(precisions)/len(precisions))
print("Rouge-1 :",average_rouge1)
print("Rouge-2 :",average_rouge2)
print("Rouge-L :",average_rougeL)

View File

@ -37,20 +37,13 @@ Q_LEN = 256 # Question Length
T_LEN = 32 # Target Length
BATCH_SIZE = 4 #dávka dát
print("Model succesfully loaded")
from datasets import load_dataset
dataset = load_dataset("squad_v2")
print(dataset["train"][0])
#path_train = '/home/omasta/T5_JUPYTER/skquad-221017/train-v1.json'
path_train = "poquad-train.json"
path_train = '/home/omasta/T5_JUPYTER/skquad-221017/train-v1.json'
with open(path_train) as f:
data = json.load(f)
def nahradit_znaky(retezec):
novy_retezec = retezec.replace('[', ' ').replace(']', ' ')
return novy_retezec
def prepare_data(data):
articles = []
for article in data["data"]:
@ -67,28 +60,15 @@ def prepare_data(data):
articles.append(inputs)
return articles
def prep_data(data):
arcs = list()
for i in range(len(data)):
questions=data[i]["question"]
try:
answer = nahradit_znaky(', '.join(data[i]["answers"]["text"]))
except KeyError:
continue
context = data[i]["context"]
inputs = {"input":context+"<sep>"+questions,"answer":answer}
arcs.append(inputs)
return arcs
#print(dataset["train"][0]["answers"]["text"])
prepared_data=prep_data(dataset["train"])
#prepared_data = prepare_data(data)
prepared_data = prepare_data(data)
print(prepared_data[0])
#Dataframe
data = pd.DataFrame(prepared_data)
class QA_Dataset(Dataset):
def __init__(self, tokenizer, dataframe, q_len, t_len):
self.tokenizer = tokenizer
@ -133,13 +113,18 @@ train_loader = DataLoader(qa_dataset, batch_size=BATCH_SIZE, sampler=train_sampl
val_loader = DataLoader(qa_dataset, batch_size=BATCH_SIZE, sampler=val_sampler)
print("Loaders working fine")
### TRAINING (46MINS ACCORDING THE V1_DATA)
train_loss = 0
val_loss = 0
train_batch_count = 0
val_batch_count = 0
for epoch in range(2):
for epoch in range(4):
MODEL.train()
for batch in tqdm(train_loader, desc="Training batches"):
input_ids = batch["input_ids"].to(DEVICE)
@ -186,5 +171,5 @@ for epoch in range(2):
print("Training done succesfully")
## SAVE FINE_TUNED MODEL
MODEL.save_pretrained("qa_model_mT5_english")
TOKENIZER.save_pretrained('qa_tokenizer_mT5_english')
MODEL.save_pretrained("qa_model_mT5_small")
TOKENIZER.save_pretrained('qa_tokenizer_mT5_small')

View File

@ -11,11 +11,6 @@ import warnings
warnings.filterwarnings("ignore")
##13/03/23 added
from rouge import Rouge
from tqdm import tqdm
from datasets import load_dataset
import re
##CUSTOM ROUGE METRIC - NEW TODO:
# Názov modelu
DEVICE ='cuda:0'
@ -27,9 +22,9 @@ DEVICE ='cuda:0'
#tokenizer_dir = "/home/omasta/T5_JUPYTER/qa_tokenizer"
#mT5 SMALL MODEL
model_name = 'qa_model'
model_dir = '/home/omasta/T5_JUPYTER/qa_model_mT5_polish'
tokenizer_dir = '/home/omasta/T5_JUPYTER/qa_tokenizer_mT5_polish'
model_name = 'mT5_SMALL'
model_dir = '/home/omasta/T5_JUPYTER/qa_model_mT5_small'
tokenizer_dir = '/home/omasta/T5_JUPYTER/qa_tokenizer_mT5_small'
#Načítanie modelu z adresára
MODEL = T5ForConditionalGeneration.from_pretrained(model_dir, from_tf=False, return_dict=True).to(DEVICE)
@ -40,14 +35,9 @@ Q_LEN = 512
TOKENIZER.add_tokens('<sep>')
MODEL.resize_token_embeddings(len(TOKENIZER))
def nahradit_znaky(retezec):
novy_retezec = retezec.replace('[', ' ').replace(']', ' ')
return novy_retezec
def predict_answer(data, ref_answer=None,random=None):
predictions=[]
for i in tqdm(data,desc="predicting"):
for i in data:
inputs = TOKENIZER(i['input'], max_length=Q_LEN, padding="max_length", truncation=True, add_special_tokens=True)
input_ids = torch.tensor(inputs["input_ids"], dtype=torch.long).to(DEVICE).unsqueeze(0)
attention_mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).to(DEVICE).unsqueeze(0)
@ -57,14 +47,14 @@ def predict_answer(data, ref_answer=None,random=None):
#print(ref_answer)
if ref_answer:
# Load the Bleu metric
#bleu = evaluate.load("google_bleu")
bleu = evaluate.load("google_bleu")
#print('debug')
#precision = list(precision_score(ref_answer, predicted_answer))
#recall = list(recall_score(ref_answer, predicted_answer))
#f1 = list(f1_score(ref_answer, predicted_answer))
#score = bleu.compute(predictions=[predicted_answer],
# references=[ref_answer])
predictions.append({'prediction':predicted_answer,'ref_answer':ref_answer})
score = bleu.compute(predictions=[predicted_answer],
references=[ref_answer])
predictions.append({'prediction':predicted_answer,'ref_answer':ref_answer,'score':score['google_bleu']})
return predictions
def prepare_data(data):
@ -76,29 +66,19 @@ def prepare_data(data):
answer = qa["answers"][0]["text"]
inputs = {"input": paragraph["context"]+ "<sep>" + question, "answer": answer}
articles.append(inputs)
return articles
def prepare_polish_data(data):
arcs = list()
for i in range(len(data)):
questions=data[i]["question"]
try:
answer = nahradit_znaky(', '.join(data[i]["answers"]["text"]))
except KeyError:
continue
context = data[i]["context"]
inputs = {"input":context+"<sep>"+questions,"answer":answer}
arcs.append(inputs)
return arcs
dev_data_path = '/home/omasta/T5_JUPYTER/skquad-221017/dev-v1.json'
with open(dev_data_path,'r') as f:
data=json.load(f)
#print('data imported')
#dataset = load_dataset("clarin-pl/poquad")
dataset = load_dataset("squad_v2")
dev_data = prepare_polish_data(dataset["validation"])
dev_data = prepare_data(data)
#print('data prepared')
print(f'Number of dev samples {len(dev_data)}')
#print(dev_data[0])
print(dev_data[0])
bleu_score = []
precisions=[]
f1_scores=[]
@ -108,9 +88,10 @@ rouge_2 = []
#X = 150
evaluate = predict_answer(dev_data)
rouge = Rouge()
for item in tqdm(evaluate,desc="evaluating"):
for item in evaluate:
bleu_score.append(item['score'])
try:
scores = rouge.get_scores(item['prediction'], item['ref_answer'])
#scores = rouge.get_scores(item['prediction'], item['ref_answer'], avg=True)
precision=precision_score(list(item['ref_answer']), list(item['prediction']),average='macro')
recall=recall_score(list(item['ref_answer']), list(item['prediction']),average='macro')
f1=f1_score(list(item['ref_answer']), list(item['prediction']),average='macro')
@ -138,22 +119,21 @@ def rouge_eval(dict_x):
print(f'VYHODNOTENIE VYSLEDKOV : ------------------------')
#print(evaluate)
#bleu_score_total = statistics.mean(bleu_score)
recall_score_total= statistics.mean(recall_scores)
f1_score_total = statistics.mean(f1_scores)
precision_total = statistics.mean(precisions)
#recall_score_total= statistics.mean(recall_scores)
#f1_score_total = statistics.mean(f1_scores)
#precision_total = statistics.mean(precisions)
#print(f'Bleu_score of model {model_name} : ',bleu_score_total)
print(f'Recall of model {model_name}: ',recall_score_total)
print(f'F1 of model {model_name} : ', f1_score_total)
print(f'Precision of model {model_name}: :',precision_total)
print(model_dir)
print(rouge_eval(evaluate))
#print(f'Recall of model {model_name}: ',recall_score_total)
#print(f'F1 of model {model_name} : ', f1_score_total)
#print(f'Precision of model {model_name}: :',precision_total)
#print(rouge_eval(evaluate))
print(f'{model_name} results')
rouge_scores = rouge_eval(evaluate)
rouge_values = [score[0]['rouge-1']['f'] for score in rouge_scores]
mean_rouge_score = statistics.mean(rouge_values)
print(f'Rouge mean score:{mean_rouge_score}')
print(f'Rouge:{mean_rouge_score}')
rouge2_values = [score[0]['rouge-2']['f'] for score in rouge_scores]
mean_rouge_score =statistics.mean(rouge2_values)
print(f'Rouge-2 mean score:{mean_rouge_score}')
print(f'Rouge-2:{mean_rouge_score}')