vggish svm
This commit is contained in:
parent
f7cfe4b93b
commit
21cf2b224c
139
src/svm_hand_vgg.py
Normal file
139
src/svm_hand_vgg.py
Normal file
@ -0,0 +1,139 @@
|
||||
from sklearn.svm import LinearSVC
|
||||
from sklearn.base import clone
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.model_selection import PredefinedSplit, GridSearchCV
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer, plot_confusion_matrix
|
||||
# import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
RANDOM_SEED = 42
|
||||
|
||||
GRID = [
|
||||
{'scaler': [StandardScaler(), None],
|
||||
'estimator': [LinearSVC(random_state=RANDOM_SEED)],
|
||||
'estimator__loss': ['squared_hinge'],
|
||||
'estimator__C': np.logspace(-1, -5, num=5),
|
||||
'estimator__class_weight': ['balanced', None],
|
||||
'estimator__max_iter': [1000]
|
||||
}
|
||||
]
|
||||
|
||||
PIPELINE = Pipeline([('scaler', None), ('estimator', LinearSVC(dual=True))])
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# load handcrafted and vggish features and labels
|
||||
devel_X_vgg = np.load(
|
||||
"./features/vgg_features/x_devel_data_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
test_X_vgg = np.load(
|
||||
"./features/vgg_features/x_test_data_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
train_X_vgg = np.load(
|
||||
"./features/vgg_features/x_train_data_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
devel_X_hand = np.load(
|
||||
"./features/hand_features/x_devel_data.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
test_X_hand=np.load(
|
||||
"./features/hand_features/x_test_data.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
train_X_hand=np.load(
|
||||
"./features/hand_features/x_train_data.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
devel_y=np.load(
|
||||
"./features/vgg_features/y_devel_label_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
test_y=np.load(
|
||||
"./features/vgg_features/y_test_label_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
train_y=np.load(
|
||||
"./features/vgg_features/y_train_label_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
num_train=train_X_vgg.shape[0]
|
||||
num_devel=devel_X_vgg.shape[0]
|
||||
split_indices=np.repeat([-1, 0], [num_train, num_devel])
|
||||
split=PredefinedSplit(split_indices)
|
||||
|
||||
train_X_vgg=np.squeeze(train_X_vgg)
|
||||
devel_X_vgg=np.squeeze(devel_X_vgg)
|
||||
test_X_vgg=np.squeeze(test_X_vgg)
|
||||
|
||||
devel_X=np.concatenate(
|
||||
(
|
||||
devel_X_hand,
|
||||
devel_X_vgg
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
|
||||
test_X=np.concatenate(
|
||||
(
|
||||
test_X_hand,
|
||||
test_X_vgg
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
|
||||
train_X=np.concatenate(
|
||||
(
|
||||
train_X_hand,
|
||||
train_X_vgg
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
|
||||
X=np.append(train_X, devel_X, axis=0)
|
||||
y=np.append(train_y, devel_y, axis=0)
|
||||
|
||||
grid_search=GridSearchCV(estimator=PIPELINE, param_grid=GRID,
|
||||
scoring=make_scorer(
|
||||
recall_score, average='macro'),
|
||||
n_jobs=-1, cv=split, refit=True, verbose=1,
|
||||
return_train_score=False)
|
||||
|
||||
# find best estimator with grid search
|
||||
grid_search.fit(X, y)
|
||||
best_estimator=grid_search.best_estimator_
|
||||
|
||||
# fit clone of best estimator on train again for devel predictions
|
||||
estimator=clone(best_estimator, safe=False)
|
||||
estimator.fit(train_X, train_y)
|
||||
preds=estimator.predict(devel_X)
|
||||
|
||||
metrics={'dev': {}, 'test': {}}
|
||||
|
||||
# devel results
|
||||
print('DEVEL')
|
||||
uar=recall_score(devel_y, preds, average='macro')
|
||||
cm=confusion_matrix(devel_y, preds)
|
||||
print(
|
||||
f'UAR: {uar}\n{classification_report(devel_y, preds)}\n\nConfusion Matrix:\n\n{cm}')
|
||||
|
||||
# optional write grid_search to csv file
|
||||
# pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False)
|
||||
|
||||
# test results
|
||||
print('TEST')
|
||||
preds=best_estimator.predict(test_X)
|
||||
uar=recall_score(test_y, preds, average='macro')
|
||||
cm=confusion_matrix(test_y, preds)
|
||||
print(f'UAR: {uar}\n{classification_report(test_y, preds)}\n\nConfusion Matrix:\n\n{cm}')
|
||||
|
||||
fig = plt.figure()
|
||||
plot_confusion_matrix(best_estimator,X= test_X, y_true=test_y,cmap=plt.cm.Blues,display_labels=['Negative','Positive'],normalize='true')
|
||||
plt.ylabel('True Label')
|
||||
plt.xlabel('Predicated Label')
|
||||
plt.savefig('cm_svm_hand_vgg.jpg')
|
101
src/svm_vgg.py
Normal file
101
src/svm_vgg.py
Normal file
@ -0,0 +1,101 @@
|
||||
from sklearn.svm import LinearSVC
|
||||
from sklearn.base import clone
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.model_selection import PredefinedSplit, GridSearchCV
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer, plot_confusion_matrix
|
||||
# import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
RANDOM_SEED = 42
|
||||
|
||||
GRID = [
|
||||
{'scaler': [StandardScaler(), None],
|
||||
'estimator': [LinearSVC(random_state=RANDOM_SEED)],
|
||||
'estimator__loss': ['squared_hinge'],
|
||||
'estimator__C': np.logspace(-1, -5, num=5),
|
||||
'estimator__class_weight': ['balanced', None],
|
||||
'estimator__max_iter': [1000]
|
||||
}
|
||||
]
|
||||
|
||||
PIPELINE = Pipeline([('scaler', None), ('estimator', LinearSVC(dual=True))])
|
||||
|
||||
if __name__=='__main__':
|
||||
|
||||
# load vggish features and labels
|
||||
devel_X_vgg = np.load(
|
||||
"./features/vgg_features/x_devel_data_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
test_X_vgg = np.load(
|
||||
"./features/vgg_features/x_test_data_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
train_X_vgg = np.load(
|
||||
"./features/vgg_features/x_train_data_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
devel_y = np.load(
|
||||
"./features/vgg_features/y_devel_label_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
test_y = np.load(
|
||||
"./features/vgg_features/y_test_label_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
train_y = np.load(
|
||||
"./features/vgg_features/y_train_label_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
num_train = train_X_vgg.shape[0]
|
||||
num_devel = devel_X_vgg.shape[0]
|
||||
split_indices = np.repeat([-1, 0], [num_train, num_devel])
|
||||
split = PredefinedSplit(split_indices)
|
||||
|
||||
train_X_vgg = np.squeeze(train_X_vgg)
|
||||
devel_X_vgg = np.squeeze(devel_X_vgg)
|
||||
test_X_vgg = np.squeeze(test_X_vgg)
|
||||
|
||||
X = np.append(train_X_vgg, devel_X_vgg, axis=0)
|
||||
y = np.append(train_y, devel_y, axis=0)
|
||||
|
||||
grid_search = GridSearchCV(estimator=PIPELINE, param_grid=GRID,
|
||||
scoring=make_scorer(recall_score, average='macro'),
|
||||
n_jobs=-1, cv=split, refit=True, verbose=1,
|
||||
return_train_score=False)
|
||||
|
||||
# find best estimator with grid search
|
||||
grid_search.fit(X, y)
|
||||
best_estimator = grid_search.best_estimator_
|
||||
|
||||
# fit clone of best estimator on train again for devel predictions
|
||||
estimator = clone(best_estimator, safe=False)
|
||||
estimator.fit(train_X_vgg, train_y)
|
||||
preds = estimator.predict(devel_X_vgg)
|
||||
|
||||
metrics = {'dev': {}, 'test': {}}
|
||||
|
||||
# devel results
|
||||
print('DEVEL')
|
||||
uar = recall_score(devel_y, preds, average='macro')
|
||||
cm = confusion_matrix(devel_y, preds)
|
||||
print(f'UAR: {uar}\n{classification_report(devel_y, preds)}\n\nConfusion Matrix:\n\n{cm}')
|
||||
|
||||
# optional write grid_search to csv file
|
||||
# pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False)
|
||||
|
||||
# test results
|
||||
print('TEST')
|
||||
preds = best_estimator.predict(test_X_vgg)
|
||||
uar = recall_score(test_y, preds, average='macro')
|
||||
cm = confusion_matrix(test_y, preds)
|
||||
print(f'UAR: {uar}\n{classification_report(test_y, preds)}\n\nConfusion Matrix:\n\n{cm}')
|
||||
|
||||
fig = plt.figure()
|
||||
plot_confusion_matrix(best_estimator,X= test_X_vgg, y_true=test_y,cmap=plt.cm.Blues,display_labels=['Negative','Positive'],normalize='true')
|
||||
plt.ylabel('True Label')
|
||||
plt.xlabel('Predicated Label')
|
||||
plt.savefig('cm_svm_vgg.jpg')
|
Loading…
Reference in New Issue
Block a user