From 21cf2b224c8cece80bdb93a1e2fc7732389d09b4 Mon Sep 17 00:00:00 2001 From: em474re Date: Tue, 7 Sep 2021 14:42:41 +0200 Subject: [PATCH] vggish svm --- src/svm_hand_vgg.py | 139 ++++++++++++++++++++++++++++++++++++++++++++ src/svm_vgg.py | 101 ++++++++++++++++++++++++++++++++ 2 files changed, 240 insertions(+) create mode 100644 src/svm_hand_vgg.py create mode 100644 src/svm_vgg.py diff --git a/src/svm_hand_vgg.py b/src/svm_hand_vgg.py new file mode 100644 index 0000000..d2661c7 --- /dev/null +++ b/src/svm_hand_vgg.py @@ -0,0 +1,139 @@ +from sklearn.svm import LinearSVC +from sklearn.base import clone +from sklearn.pipeline import Pipeline +from sklearn.model_selection import PredefinedSplit, GridSearchCV +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer, plot_confusion_matrix +# import pandas as pd +import numpy as np +import matplotlib.pyplot as plt + + +RANDOM_SEED = 42 + +GRID = [ + {'scaler': [StandardScaler(), None], + 'estimator': [LinearSVC(random_state=RANDOM_SEED)], + 'estimator__loss': ['squared_hinge'], + 'estimator__C': np.logspace(-1, -5, num=5), + 'estimator__class_weight': ['balanced', None], + 'estimator__max_iter': [1000] + } +] + +PIPELINE = Pipeline([('scaler', None), ('estimator', LinearSVC(dual=True))]) + +if __name__ == '__main__': + + # load handcrafted and vggish features and labels + devel_X_vgg = np.load( + "./features/vgg_features/x_devel_data_vgg.npy", allow_pickle=True + ) + + test_X_vgg = np.load( + "./features/vgg_features/x_test_data_vgg.npy", allow_pickle=True + ) + + train_X_vgg = np.load( + "./features/vgg_features/x_train_data_vgg.npy", allow_pickle=True + ) + + devel_X_hand = np.load( + "./features/hand_features/x_devel_data.npy", allow_pickle=True + ) + + test_X_hand=np.load( + "./features/hand_features/x_test_data.npy", allow_pickle=True + ) + + train_X_hand=np.load( + "./features/hand_features/x_train_data.npy", allow_pickle=True + ) + + devel_y=np.load( + "./features/vgg_features/y_devel_label_vgg.npy", allow_pickle=True + ) + + test_y=np.load( + "./features/vgg_features/y_test_label_vgg.npy", allow_pickle=True + ) + + train_y=np.load( + "./features/vgg_features/y_train_label_vgg.npy", allow_pickle=True + ) + + num_train=train_X_vgg.shape[0] + num_devel=devel_X_vgg.shape[0] + split_indices=np.repeat([-1, 0], [num_train, num_devel]) + split=PredefinedSplit(split_indices) + + train_X_vgg=np.squeeze(train_X_vgg) + devel_X_vgg=np.squeeze(devel_X_vgg) + test_X_vgg=np.squeeze(test_X_vgg) + + devel_X=np.concatenate( + ( + devel_X_hand, + devel_X_vgg + ), + axis=1, + ) + + test_X=np.concatenate( + ( + test_X_hand, + test_X_vgg + ), + axis=1, + ) + + train_X=np.concatenate( + ( + train_X_hand, + train_X_vgg + ), + axis=1, + ) + + X=np.append(train_X, devel_X, axis=0) + y=np.append(train_y, devel_y, axis=0) + + grid_search=GridSearchCV(estimator=PIPELINE, param_grid=GRID, + scoring=make_scorer( + recall_score, average='macro'), + n_jobs=-1, cv=split, refit=True, verbose=1, + return_train_score=False) + + # find best estimator with grid search + grid_search.fit(X, y) + best_estimator=grid_search.best_estimator_ + + # fit clone of best estimator on train again for devel predictions + estimator=clone(best_estimator, safe=False) + estimator.fit(train_X, train_y) + preds=estimator.predict(devel_X) + + metrics={'dev': {}, 'test': {}} + + # devel results + print('DEVEL') + uar=recall_score(devel_y, preds, average='macro') + cm=confusion_matrix(devel_y, preds) + print( + f'UAR: {uar}\n{classification_report(devel_y, preds)}\n\nConfusion Matrix:\n\n{cm}') + + # optional write grid_search to csv file + # pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False) + + # test results + print('TEST') + preds=best_estimator.predict(test_X) + uar=recall_score(test_y, preds, average='macro') + cm=confusion_matrix(test_y, preds) + print(f'UAR: {uar}\n{classification_report(test_y, preds)}\n\nConfusion Matrix:\n\n{cm}') + + fig = plt.figure() + plot_confusion_matrix(best_estimator,X= test_X, y_true=test_y,cmap=plt.cm.Blues,display_labels=['Negative','Positive'],normalize='true') + plt.ylabel('True Label') + plt.xlabel('Predicated Label') + plt.savefig('cm_svm_hand_vgg.jpg') \ No newline at end of file diff --git a/src/svm_vgg.py b/src/svm_vgg.py new file mode 100644 index 0000000..dd5f464 --- /dev/null +++ b/src/svm_vgg.py @@ -0,0 +1,101 @@ +from sklearn.svm import LinearSVC +from sklearn.base import clone +from sklearn.pipeline import Pipeline +from sklearn.model_selection import PredefinedSplit, GridSearchCV +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer, plot_confusion_matrix +# import pandas as pd +import numpy as np +import matplotlib.pyplot as plt + + +RANDOM_SEED = 42 + +GRID = [ + {'scaler': [StandardScaler(), None], + 'estimator': [LinearSVC(random_state=RANDOM_SEED)], + 'estimator__loss': ['squared_hinge'], + 'estimator__C': np.logspace(-1, -5, num=5), + 'estimator__class_weight': ['balanced', None], + 'estimator__max_iter': [1000] + } +] + +PIPELINE = Pipeline([('scaler', None), ('estimator', LinearSVC(dual=True))]) + +if __name__=='__main__': + + # load vggish features and labels + devel_X_vgg = np.load( + "./features/vgg_features/x_devel_data_vgg.npy", allow_pickle=True + ) + + test_X_vgg = np.load( + "./features/vgg_features/x_test_data_vgg.npy", allow_pickle=True + ) + + train_X_vgg = np.load( + "./features/vgg_features/x_train_data_vgg.npy", allow_pickle=True + ) + + devel_y = np.load( + "./features/vgg_features/y_devel_label_vgg.npy", allow_pickle=True + ) + + test_y = np.load( + "./features/vgg_features/y_test_label_vgg.npy", allow_pickle=True + ) + + train_y = np.load( + "./features/vgg_features/y_train_label_vgg.npy", allow_pickle=True + ) + + num_train = train_X_vgg.shape[0] + num_devel = devel_X_vgg.shape[0] + split_indices = np.repeat([-1, 0], [num_train, num_devel]) + split = PredefinedSplit(split_indices) + + train_X_vgg = np.squeeze(train_X_vgg) + devel_X_vgg = np.squeeze(devel_X_vgg) + test_X_vgg = np.squeeze(test_X_vgg) + + X = np.append(train_X_vgg, devel_X_vgg, axis=0) + y = np.append(train_y, devel_y, axis=0) + + grid_search = GridSearchCV(estimator=PIPELINE, param_grid=GRID, + scoring=make_scorer(recall_score, average='macro'), + n_jobs=-1, cv=split, refit=True, verbose=1, + return_train_score=False) + + # find best estimator with grid search + grid_search.fit(X, y) + best_estimator = grid_search.best_estimator_ + + # fit clone of best estimator on train again for devel predictions + estimator = clone(best_estimator, safe=False) + estimator.fit(train_X_vgg, train_y) + preds = estimator.predict(devel_X_vgg) + + metrics = {'dev': {}, 'test': {}} + + # devel results + print('DEVEL') + uar = recall_score(devel_y, preds, average='macro') + cm = confusion_matrix(devel_y, preds) + print(f'UAR: {uar}\n{classification_report(devel_y, preds)}\n\nConfusion Matrix:\n\n{cm}') + + # optional write grid_search to csv file + # pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False) + + # test results + print('TEST') + preds = best_estimator.predict(test_X_vgg) + uar = recall_score(test_y, preds, average='macro') + cm = confusion_matrix(test_y, preds) + print(f'UAR: {uar}\n{classification_report(test_y, preds)}\n\nConfusion Matrix:\n\n{cm}') + + fig = plt.figure() + plot_confusion_matrix(best_estimator,X= test_X_vgg, y_true=test_y,cmap=plt.cm.Blues,display_labels=['Negative','Positive'],normalize='true') + plt.ylabel('True Label') + plt.xlabel('Predicated Label') + plt.savefig('cm_svm_vgg.jpg') \ No newline at end of file