update results svm_hand

This commit is contained in:
em474re 2021-09-09 11:41:11 +02:00
parent e754341abe
commit dbf7cd462e
3 changed files with 41 additions and 21 deletions

View File

@ -1,21 +1,25 @@
# covid-19 speech diagnosis # Automated COVID-19 diagnosis
This repo contains the code created by Elien Martens during IAESTE internship summer 2021, Technical University Kosice (Slowakia). This repo contains the code created by Elien Martens during IAESTE internship summer 2021, Technical University of Kosice (Slovakia).
## Dataset ## Data
The COVID-19 datasets can be obtained through The University of Cambridge. (see also compare.openaudio.eu, Interspeech Computational Paralinguistics Challenges 2021) The [Interspeech Computational Paralinguistics ChallengE (ComParE) 2021](vhttp://www.compare.openaudio.eu/now/) proposes two challenges related to COVID-19 detection based on audio samples. Such samples represent speech and cough audio from both healthy and infected speakers. The COVID-19 Speech Sub-Challenge (CSS) offers 3.24 hours of audio recordings containing speech samples, while the COVID19 Cough Sub-Challenge (CCS) provides 1.63 hours of cough samples.
The COVID-19 datasets can be obtained through The University of Cambridge.
## How it works ## How it works
- clone repo - clone repository
- add data (see Dataset section above), so that the structuring is the following: - add data (see [Data](##data) section), so that the structure is the following:
CovidSpeechChallenge ```
-> raw_files CovidSpeechChallenge
-> train |-- dist/
-> test |-- lab/
-> devel |-- wav/
-> labels |-- features/
-> vgg_features |-- results/
-> hand_features |-- src/
-> vggish |-- vggish/
... |-- run_experiments.sh
- run .\run_experiments.sh ```
- run .\run_experiments.sh
## Acknowledgements

View File

@ -197,6 +197,7 @@ if __name__ == "__main__":
x_data = [] x_data = []
y_label = [] y_label = []
names = []
# extract features for all audio samples from correct subset # extract features for all audio samples from correct subset
for file in sorted([f for f in os.listdir(path) if dataset in f]): for file in sorted([f for f in os.listdir(path) if dataset in f]):
@ -220,4 +221,5 @@ if __name__ == "__main__":
# save features and labels # save features and labels
df = pd.read_csv('./dist/lab/' + dataset + '.csv', sep =',') df = pd.read_csv('./dist/lab/' + dataset + '.csv', sep =',')
np.save(os.path.join('./features/hand_features',"x_" + dataset + "_data.npy"), np.array(x_data)) np.save(os.path.join('./features/hand_features',"x_" + dataset + "_data.npy"), np.array(x_data))
np.save(os.path.join('./features/hand_features',"y_" + dataset + "_label.npy"), df.label) np.save(os.path.join('./features/hand_features',"y_" + dataset + "_label.npy"), df.label)
np.save(os.path.join('./features/hand_features',dataset + "_names.npy"), df.filename)

View File

@ -4,10 +4,10 @@ from sklearn.pipeline import Pipeline
from sklearn.model_selection import PredefinedSplit, GridSearchCV from sklearn.model_selection import PredefinedSplit, GridSearchCV
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer, plot_confusion_matrix from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer, plot_confusion_matrix
# import pandas as pd import pandas as pd
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import os
RANDOM_SEED = 42 RANDOM_SEED = 42
@ -50,6 +50,14 @@ if __name__=='__main__':
"./features/hand_features/y_train_label.npy", allow_pickle=True "./features/hand_features/y_train_label.npy", allow_pickle=True
) )
devel_names = np.load(
"./features/hand_features/devel_names.npy", allow_pickle=True
)
test_names = np.load(
"./features/hand_features/test_names.npy", allow_pickle=True
)
num_train = train_X_hand.shape[0] num_train = train_X_hand.shape[0]
num_devel = devel_X_hand.shape[0] num_devel = devel_X_hand.shape[0]
split_indices = np.repeat([-1, 0], [num_train, num_devel]) split_indices = np.repeat([-1, 0], [num_train, num_devel])
@ -83,6 +91,9 @@ if __name__=='__main__':
# optional write grid_search to csv file # optional write grid_search to csv file
# pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False) # pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False)
df_predictions = pd.DataFrame({'filename': devel_names.tolist(), 'prediction': preds.tolist()})
df_predictions.to_csv(os.path.join('./results/svm_hand/', 'devel.predictions.csv'), index=False)
# test results # test results
print('TEST') print('TEST')
preds = best_estimator.predict(test_X_hand) preds = best_estimator.predict(test_X_hand)
@ -90,8 +101,11 @@ if __name__=='__main__':
cm = confusion_matrix(test_y, preds) cm = confusion_matrix(test_y, preds)
print(f'UAR: {uar}\n{classification_report(test_y, preds)}\n\nConfusion Matrix:\n\n{cm}') print(f'UAR: {uar}\n{classification_report(test_y, preds)}\n\nConfusion Matrix:\n\n{cm}')
df_predictions = pd.DataFrame({'filename': test_names.tolist(), 'prediction': preds.tolist()})
df_predictions.to_csv(os.path.join('./results/svm_hand/', 'test.predictions.csv'), index=False)
fig = plt.figure() fig = plt.figure()
plot_confusion_matrix(best_estimator,X= test_X_hand, y_true=test_y,cmap=plt.cm.Blues,display_labels=['Negative','Positive'],normalize='true') plot_confusion_matrix(best_estimator,X= test_X_hand, y_true=test_y,cmap=plt.cm.Blues,display_labels=['Negative','Positive'],normalize='true')
plt.ylabel('True Label') plt.ylabel('True Label')
plt.xlabel('Predicated Label') plt.xlabel('Predicated Label')
plt.savefig('cm_svm_hand.jpg') plt.savefig('./results/svm_hand/cm_svm_hand.jpg')