From dbf7cd462e97a4e0ab61c8c90f426c0c99b76d78 Mon Sep 17 00:00:00 2001 From: em474re Date: Thu, 9 Sep 2021 11:41:11 +0200 Subject: [PATCH] update results svm_hand --- README.md | 38 ++++++++++++++++------------- src/extract_handcrafted_features.py | 4 ++- src/svm_hand.py | 20 ++++++++++++--- 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 3488658..349f43b 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,25 @@ -# covid-19 speech diagnosis -This repo contains the code created by Elien Martens during IAESTE internship summer 2021, Technical University Kosice (Slowakia). +# Automated COVID-19 diagnosis +This repo contains the code created by Elien Martens during IAESTE internship summer 2021, Technical University of Kosice (Slovakia). -## Dataset +## Data -The COVID-19 datasets can be obtained through The University of Cambridge. (see also compare.openaudio.eu, Interspeech Computational Paralinguistics Challenges 2021) +The [Interspeech Computational Paralinguistics ChallengE (ComParE) 2021](vhttp://www.compare.openaudio.eu/now/) proposes two challenges related to COVID-19 detection based on audio samples. Such samples represent speech and cough audio from both healthy and infected speakers. The COVID-19 Speech Sub-Challenge (CSS) offers 3.24 hours of audio recordings containing speech samples, while the COVID19 Cough Sub-Challenge (CCS) provides 1.63 hours of cough samples. +The COVID-19 datasets can be obtained through The University of Cambridge. ## How it works -- clone repo -- add data (see Dataset section above), so that the structuring is the following: - CovidSpeechChallenge - -> raw_files - -> train - -> test - -> devel - -> labels - -> vgg_features - -> hand_features - -> vggish - ... -- run .\run_experiments.sh \ No newline at end of file +- clone repository +- add data (see [Data](##data) section), so that the structure is the following: +``` +CovidSpeechChallenge + |-- dist/ + |-- lab/ + |-- wav/ + |-- features/ + |-- results/ + |-- src/ + |-- vggish/ + |-- run_experiments.sh +``` +- run .\run_experiments.sh + +## Acknowledgements diff --git a/src/extract_handcrafted_features.py b/src/extract_handcrafted_features.py index ccc20ed..f85d49c 100644 --- a/src/extract_handcrafted_features.py +++ b/src/extract_handcrafted_features.py @@ -197,6 +197,7 @@ if __name__ == "__main__": x_data = [] y_label = [] + names = [] # extract features for all audio samples from correct subset for file in sorted([f for f in os.listdir(path) if dataset in f]): @@ -220,4 +221,5 @@ if __name__ == "__main__": # save features and labels df = pd.read_csv('./dist/lab/' + dataset + '.csv', sep =',') np.save(os.path.join('./features/hand_features',"x_" + dataset + "_data.npy"), np.array(x_data)) - np.save(os.path.join('./features/hand_features',"y_" + dataset + "_label.npy"), df.label) \ No newline at end of file + np.save(os.path.join('./features/hand_features',"y_" + dataset + "_label.npy"), df.label) + np.save(os.path.join('./features/hand_features',dataset + "_names.npy"), df.filename) diff --git a/src/svm_hand.py b/src/svm_hand.py index b94abef..584ef1a 100644 --- a/src/svm_hand.py +++ b/src/svm_hand.py @@ -4,10 +4,10 @@ from sklearn.pipeline import Pipeline from sklearn.model_selection import PredefinedSplit, GridSearchCV from sklearn.preprocessing import StandardScaler from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer, plot_confusion_matrix -# import pandas as pd +import pandas as pd import numpy as np import matplotlib.pyplot as plt - +import os RANDOM_SEED = 42 @@ -50,6 +50,14 @@ if __name__=='__main__': "./features/hand_features/y_train_label.npy", allow_pickle=True ) + devel_names = np.load( + "./features/hand_features/devel_names.npy", allow_pickle=True + ) + + test_names = np.load( + "./features/hand_features/test_names.npy", allow_pickle=True + ) + num_train = train_X_hand.shape[0] num_devel = devel_X_hand.shape[0] split_indices = np.repeat([-1, 0], [num_train, num_devel]) @@ -83,6 +91,9 @@ if __name__=='__main__': # optional write grid_search to csv file # pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False) + df_predictions = pd.DataFrame({'filename': devel_names.tolist(), 'prediction': preds.tolist()}) + df_predictions.to_csv(os.path.join('./results/svm_hand/', 'devel.predictions.csv'), index=False) + # test results print('TEST') preds = best_estimator.predict(test_X_hand) @@ -90,8 +101,11 @@ if __name__=='__main__': cm = confusion_matrix(test_y, preds) print(f'UAR: {uar}\n{classification_report(test_y, preds)}\n\nConfusion Matrix:\n\n{cm}') + df_predictions = pd.DataFrame({'filename': test_names.tolist(), 'prediction': preds.tolist()}) + df_predictions.to_csv(os.path.join('./results/svm_hand/', 'test.predictions.csv'), index=False) + fig = plt.figure() plot_confusion_matrix(best_estimator,X= test_X_hand, y_true=test_y,cmap=plt.cm.Blues,display_labels=['Negative','Positive'],normalize='true') plt.ylabel('True Label') plt.xlabel('Predicated Label') - plt.savefig('cm_svm_hand.jpg') \ No newline at end of file + plt.savefig('./results/svm_hand/cm_svm_hand.jpg') \ No newline at end of file