From 02feb535b3dc85481bbee40369633b0aa9d8a6a2 Mon Sep 17 00:00:00 2001
From: em474re <Elien.Martens@UGent.be>
Date: Tue, 7 Sep 2021 14:44:25 +0200
Subject: [PATCH] openl3 svm

---
 src/svm_openl3.py      | 143 ++++++++++++++++++++++++++++++
 src/svm_openl3_all.py  | 193 +++++++++++++++++++++++++++++++++++++++++
 src/svm_openl3_hand.py | 174 +++++++++++++++++++++++++++++++++++++
 src/svm_openl3_vgg.py  | 178 +++++++++++++++++++++++++++++++++++++
 4 files changed, 688 insertions(+)
 create mode 100644 src/svm_openl3.py
 create mode 100644 src/svm_openl3_all.py
 create mode 100644 src/svm_openl3_hand.py
 create mode 100644 src/svm_openl3_vgg.py

diff --git a/src/svm_openl3.py b/src/svm_openl3.py
new file mode 100644
index 0000000..26ebbad
--- /dev/null
+++ b/src/svm_openl3.py
@@ -0,0 +1,143 @@
+from sklearn.svm import LinearSVC
+from sklearn.base import clone
+from sklearn.pipeline import Pipeline
+from sklearn.model_selection import PredefinedSplit, GridSearchCV
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer, plot_confusion_matrix
+# import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import pandas as pd
+
+RANDOM_SEED = 42
+
+GRID = [
+    {'scaler': [StandardScaler(), None],
+     'estimator': [LinearSVC(random_state=RANDOM_SEED)],
+     'estimator__loss': ['squared_hinge'],
+     'estimator__C': np.logspace(-1, -5, num=5),
+     'estimator__class_weight': ['balanced', None],
+     'estimator__max_iter': [1000]
+     }
+]
+
+PIPELINE = Pipeline([('scaler', None), ('estimator', LinearSVC(dual=True))])
+
+
+def sta_fun_2(npdata):  # 1D np array
+    """Extract various statistical features from the numpy array provided as input.
+
+    :param np_data: the numpy array to extract the features from
+    :type np_data: numpy.ndarray
+    :return: The extracted features as a vector
+    :rtype: numpy.ndarray
+    """
+
+    # perform a sanity check
+    if npdata is None:
+        raise ValueError("Input array cannot be None")
+
+    # perform the feature extraction
+    Mean = np.mean(npdata, axis=0)
+    Std = np.std(npdata, axis=0)
+
+    # finally return the features in a concatenated array (as a vector)
+    return np.concatenate((Mean, Std), axis=0).reshape(1, -1)
+
+
+if __name__ == '__main__':
+
+    # load openL3 features and labels
+    files = os.listdir('./features/openl3/train/')
+    filenames = ['./features/openl3/train/' + f for f in files]
+
+    X_train = []
+
+    for i in range(len(filenames)):
+        emb = np.load(filenames[i])['embedding']
+        X_train.extend(sta_fun_2(emb))
+    X_train = np.array(X_train, dtype=object)
+
+    files = os.listdir('./features/openl3/test/')
+    filenames = ['./features/openl3/test/' + f for f in files]
+
+    X_test = []
+
+    for i in range(len(filenames)):
+        emb = np.load(filenames[i])['embedding']
+        X_test.extend(sta_fun_2(emb))
+    X_test = np.array(X_test, dtype=object)
+
+    files = os.listdir('./features/openl3/devel/')
+    filenames = ['./features/openl3/devel/' + f for f in files]
+    X_devel = [np.load(fname)['embedding'] for fname in filenames]
+
+    X_devel = []
+
+    for i in range(len(filenames)):
+        emb = np.load(filenames[i])['embedding']
+        X_devel.extend(sta_fun_2(emb))
+    X_devel = np.array(X_devel, dtype=object)
+
+    df = pd.read_csv('./dist/lab/train.csv', sep=',')
+    y_train = df.label
+
+    df = pd.read_csv('./dist/lab/test.csv', sep=',')
+    y_test = df.label
+
+    df = pd.read_csv('./dist/lab/devel.csv', sep=',')
+    y_devel = df.label
+
+    num_train = X_train.shape[0]
+    num_devel = X_devel.shape[0]
+    split_indices = np.repeat([-1, 0], [num_train, num_devel])
+    split = PredefinedSplit(split_indices)
+
+    train_X = np.squeeze(X_train)
+    devel_X = np.squeeze(X_devel)
+    test_X = np.squeeze(X_test)
+
+    X = np.append(train_X, devel_X, axis=0)
+    y = np.append(y_train, y_devel, axis=0)
+
+    grid_search = GridSearchCV(estimator=PIPELINE, param_grid=GRID,
+                               scoring=make_scorer(
+                                   recall_score, average='macro'),
+                               n_jobs=-1, cv=split, refit=True, verbose=1,
+                               return_train_score=False)
+
+    # find best estimator with grid search
+    grid_search.fit(np.asarray(X), y)
+    best_estimator = grid_search.best_estimator_
+
+    # fit clone of best estimator on train again for devel predictions
+    estimator = clone(best_estimator, safe=False)
+    estimator.fit(train_X, y_train)
+    preds = estimator.predict(devel_X)
+
+    metrics = {'dev': {}, 'test': {}}
+
+    # devel results
+    print('DEVEL')
+    uar = recall_score(y_devel, preds, average='macro')
+    cm = confusion_matrix(y_devel, preds)
+    print(
+        f'UAR: {uar}\n{classification_report(y_devel, preds)}\n\nConfusion Matrix:\n\n{cm}')
+
+    # optional write grid_search to csv file
+    # pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False)
+
+    # test results
+    print('TEST')
+    preds = best_estimator.predict(test_X)
+    uar = recall_score(y_test, preds, average='macro')
+    cm = confusion_matrix(y_test, preds)
+    print(f'UAR: {uar}\n{classification_report(y_test, preds)}\n\nConfusion Matrix:\n\n{cm}')
+
+    fig = plt.figure()
+    plot_confusion_matrix(best_estimator, X=test_X, y_true=y_test, cmap=plt.cm.Blues, display_labels=[
+                          'Negative', 'Positive'], normalize='true')
+    plt.ylabel('True Label')
+    plt.xlabel('Predicated Label')
+    plt.savefig('cm_svm_openL3.jpg')
diff --git a/src/svm_openl3_all.py b/src/svm_openl3_all.py
new file mode 100644
index 0000000..ac41144
--- /dev/null
+++ b/src/svm_openl3_all.py
@@ -0,0 +1,193 @@
+from sklearn.svm import LinearSVC
+from sklearn.base import clone
+from sklearn.pipeline import Pipeline
+from sklearn.model_selection import PredefinedSplit, GridSearchCV
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer, plot_confusion_matrix
+# import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import pandas as pd
+
+RANDOM_SEED = 42
+
+GRID = [
+    {'scaler': [StandardScaler(), None],
+     'estimator': [LinearSVC(random_state=RANDOM_SEED)],
+     'estimator__loss': ['squared_hinge'],
+     'estimator__C': np.logspace(-1, -5, num=5),
+     'estimator__class_weight': ['balanced', None],
+     'estimator__max_iter': [1000]
+     }
+]
+
+PIPELINE = Pipeline([('scaler', None), ('estimator', LinearSVC(dual=True))])
+
+def sta_fun_2(npdata):  # 1D np array
+    """Extract various statistical features from the numpy array provided as input.
+
+    :param np_data: the numpy array to extract the features from
+    :type np_data: numpy.ndarray
+    :return: The extracted features as a vector
+    :rtype: numpy.ndarray
+    """
+
+    # perform a sanity check
+    if npdata is None:
+        raise ValueError("Input array cannot be None")
+
+    # perform the feature extraction
+    Mean = np.mean(npdata, axis=0)
+    Std = np.std(npdata, axis=0)
+
+    # finally return the features in a concatenated array (as a vector)
+    return np.concatenate((Mean, Std), axis=0).reshape(1, -1)
+
+if __name__=='__main__':
+
+    # load features and labels
+    files = os.listdir('./features/openl3/train/')
+    filenames = ['./features/openl3/train/' + f for f in files]
+
+    X_train_openl3 = []
+
+    for i in range(len(filenames)):
+        emb = np.load(filenames[i])['embedding']
+        X_train_openl3.extend(sta_fun_2(emb))
+    X_train_openl3 = np.array(X_train_openl3,dtype=object)
+
+    files = os.listdir('./features/openl3/test/')
+    filenames = ['./features/openl3/test/' + f for f in files]
+
+    X_test_openl3 = []
+
+    for i in range(len(filenames)):
+        emb = np.load(filenames[i])['embedding']
+        X_test_openl3.extend(sta_fun_2(emb))
+    X_test_openl3 = np.array(X_test_openl3,dtype=object)
+
+    files = os.listdir('./features/openl3/devel/')
+    filenames = ['./features/openl3/devel/' + f for f in files]
+
+    X_devel_openl3 = []
+
+    for i in range(len(filenames)):
+        emb = np.load(filenames[i])['embedding']
+        X_devel_openl3.extend(sta_fun_2(emb))
+    X_devel_openl3 = np.array(X_devel_openl3,dtype=object)
+
+    df = pd.read_csv('./dist/lab/train.csv', sep =',')
+    y_train = df.label
+
+    df = pd.read_csv('./dist/lab/test.csv', sep =',')
+    y_test = df.label
+
+    df = pd.read_csv('./dist/lab/devel.csv', sep =',')
+    y_devel = df.label
+
+    devel_X_vgg = np.load(
+    "./features/vgg_features/x_devel_data_vgg.npy", allow_pickle=True
+    )
+
+    test_X_vgg = np.load(
+    "./features/vgg_features/x_test_data_vgg.npy", allow_pickle=True
+    )
+
+    train_X_vgg = np.load(
+    "./features/vgg_features/x_train_data_vgg.npy", allow_pickle=True
+    )
+
+    devel_X_hand = np.load(
+    "./features/hand_features/x_devel_data.npy", allow_pickle=True
+    )
+
+    test_X_hand = np.load(
+    "./features/hand_features/x_test_data.npy", allow_pickle=True
+    )
+
+    train_X_hand = np.load(
+    "./features/hand_features/x_train_data.npy", allow_pickle=True
+    )
+
+    num_train = train_X_vgg.shape[0]
+    num_devel = devel_X_vgg.shape[0]
+    split_indices = np.repeat([-1, 0], [num_train, num_devel])
+    split = PredefinedSplit(split_indices)
+
+    train_X_openl3 = np.squeeze(X_train_openl3)
+    devel_X_openl3 = np.squeeze(X_devel_openl3)
+    test_X_openl3 = np.squeeze(X_test_openl3)
+
+    train_X_vgg = np.squeeze(train_X_vgg)
+    devel_X_vgg = np.squeeze(devel_X_vgg)
+    test_X_vgg = np.squeeze(test_X_vgg)    
+    
+    devel_X = np.concatenate(
+        (
+            devel_X_hand,
+            devel_X_vgg,
+            devel_X_openl3
+        ),
+        axis=1,
+    )
+
+    test_X = np.concatenate(
+        (
+            test_X_hand,
+            test_X_vgg,
+            test_X_openl3
+        ),
+        axis=1,
+    )
+
+    train_X = np.concatenate(
+        (
+            train_X_hand,
+            train_X_vgg,
+            train_X_openl3
+        ),
+        axis=1,
+    )
+
+    X = np.append(train_X, devel_X, axis=0)
+    y = np.append(y_train, y_devel, axis=0)
+
+    grid_search = GridSearchCV(estimator=PIPELINE, param_grid=GRID, 
+                                scoring=make_scorer(recall_score, average='macro'), 
+                                n_jobs=-1, cv=split, refit=True, verbose=1, 
+                                return_train_score=False)
+    
+    # find best estimator with grid search
+    grid_search.fit(X,y)
+    best_estimator = grid_search.best_estimator_
+
+    # fit clone of best estimator on train again for devel predictions
+    estimator = clone(best_estimator, safe=False)
+    estimator.fit(train_X, y_train)
+    preds = estimator.predict(devel_X)
+
+    metrics = {'dev': {}, 'test': {}}
+
+    # devel results
+    print('DEVEL')
+    uar = recall_score(y_devel, preds, average='macro')
+    cm = confusion_matrix(y_devel, preds)
+    print(f'UAR: {uar}\n{classification_report(y_devel, preds)}\n\nConfusion Matrix:\n\n{cm}') 
+    
+    # optional write grid_search to csv file
+    # pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False)
+
+    # test results
+    print('TEST')
+    preds = best_estimator.predict(test_X)
+    uar = recall_score(y_test, preds, average='macro')
+    cm = confusion_matrix(y_test, preds)
+    print(f'UAR: {uar}\n{classification_report(y_test, preds)}\n\nConfusion Matrix:\n\n{cm}') 
+
+    fig = plt.figure()
+    plot_confusion_matrix(best_estimator, X=test_X, y_true=y_test, cmap=plt.cm.Blues, display_labels=[
+                          'Negative', 'Positive'], normalize='true')
+    plt.ylabel('True Label')
+    plt.xlabel('Predicated Label')
+    plt.savefig('cm_svm_all.jpg')
\ No newline at end of file
diff --git a/src/svm_openl3_hand.py b/src/svm_openl3_hand.py
new file mode 100644
index 0000000..e84ec5d
--- /dev/null
+++ b/src/svm_openl3_hand.py
@@ -0,0 +1,174 @@
+from sklearn.svm import LinearSVC
+from sklearn.base import clone
+from sklearn.pipeline import Pipeline
+from sklearn.model_selection import PredefinedSplit, GridSearchCV
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer, plot_confusion_matrix
+# import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import pandas as pd
+
+RANDOM_SEED = 42
+
+GRID = [
+    {'scaler': [StandardScaler(), None],
+     'estimator': [LinearSVC(random_state=RANDOM_SEED)],
+     'estimator__loss': ['squared_hinge'],
+     'estimator__C': np.logspace(-1, -5, num=5),
+     'estimator__class_weight': ['balanced', None],
+     'estimator__max_iter': [1000]
+     }
+]
+
+PIPELINE = Pipeline([('scaler', None), ('estimator', LinearSVC(dual=True))])
+
+def sta_fun_2(npdata):  # 1D np array
+    """Extract various statistical features from the numpy array provided as input.
+
+    :param np_data: the numpy array to extract the features from
+    :type np_data: numpy.ndarray
+    :return: The extracted features as a vector
+    :rtype: numpy.ndarray
+    """
+
+    # perform a sanity check
+    if npdata is None:
+        raise ValueError("Input array cannot be None")
+
+    # perform the feature extraction
+    Mean = np.mean(npdata, axis=0)
+    Std = np.std(npdata, axis=0)
+
+    # finally return the features in a concatenated array (as a vector)
+    return np.concatenate((Mean, Std), axis=0).reshape(1, -1)
+
+if __name__=='__main__':
+
+    # load handcrafted and openL3 features and labels
+    files = os.listdir('./features/openl3/train/')
+    filenames = ['./features/openl3/train/' + f for f in files]
+
+    X_train_openl3 = []
+
+    for i in range(len(filenames)):
+        emb = np.load(filenames[i])['embedding']
+        X_train_openl3.extend(sta_fun_2(emb))
+    X_train_openl3 = np.array(X_train_openl3,dtype=object)
+
+    files = os.listdir('./features/openl3/test/')
+    filenames = ['./features/openl3/test/' + f for f in files]
+
+    X_test_openl3 = []
+
+    for i in range(len(filenames)):
+        emb = np.load(filenames[i])['embedding']
+        X_test_openl3.extend(sta_fun_2(emb))
+    X_test_openl3 = np.array(X_test_openl3,dtype=object)
+
+    files = os.listdir('./features/openl3/devel/')
+    filenames = ['./features/openl3/devel/' + f for f in files]
+
+    X_devel_openl3 = []
+
+    for i in range(len(filenames)):
+        emb = np.load(filenames[i])['embedding']
+        X_devel_openl3.extend(sta_fun_2(emb))
+    X_devel_openl3 = np.array(X_devel_openl3,dtype=object)
+
+    df = pd.read_csv('./dist/lab/train.csv', sep =',')
+    y_train = df.label
+
+    df = pd.read_csv('./dist/lab/test.csv', sep =',')
+    y_test = df.label
+
+    df = pd.read_csv('./dist/lab/devel.csv', sep =',')
+    y_devel = df.label
+
+    devel_X_hand = np.load(
+    "./features/hand_features/x_devel_data.npy", allow_pickle=True
+    )
+
+    test_X_hand = np.load(
+    "./features/hand_features/x_test_data.npy", allow_pickle=True
+    )
+
+    train_X_hand = np.load(
+    "./features/hand_features/x_train_data.npy", allow_pickle=True
+    )
+
+    num_train = train_X_hand.shape[0]
+    num_devel = devel_X_hand.shape[0]
+    split_indices = np.repeat([-1, 0], [num_train, num_devel])
+    split = PredefinedSplit(split_indices)
+
+    train_X_openl3 = np.squeeze(X_train_openl3)
+    devel_X_openl3 = np.squeeze(X_devel_openl3)
+    test_X_openl3 = np.squeeze(X_test_openl3)   
+    
+    devel_X = np.concatenate(
+        (
+            devel_X_hand,
+            devel_X_openl3
+        ),
+        axis=1,
+    )
+
+    test_X = np.concatenate(
+        (
+            test_X_hand,
+            test_X_openl3
+        ),
+        axis=1,
+    )
+
+    train_X = np.concatenate(
+        (
+            train_X_hand,
+            train_X_openl3
+        ),
+        axis=1,
+    )
+
+    X = np.append(train_X, devel_X, axis=0)
+    y = np.append(y_train, y_devel, axis=0)
+
+    grid_search = GridSearchCV(estimator=PIPELINE, param_grid=GRID, 
+                                scoring=make_scorer(recall_score, average='macro'), 
+                                n_jobs=-1, cv=split, refit=True, verbose=1, 
+                                return_train_score=False)
+    
+    # find best estimator with grid search
+    grid_search.fit(X,y)
+    best_estimator = grid_search.best_estimator_
+
+    # fit clone of best estimator on train again for devel predictions
+    estimator = clone(best_estimator, safe=False)
+    estimator.fit(train_X, y_train)
+    preds = estimator.predict(devel_X)
+
+    metrics = {'dev': {}, 'test': {}}
+
+    # devel results
+    print('DEVEL')
+    uar = recall_score(y_devel, preds, average='macro')
+    cm = confusion_matrix(y_devel, preds)
+    print(f'UAR: {uar}\n{classification_report(y_devel, preds)}\n\nConfusion Matrix:\n\n{cm}') 
+    
+    # optional write grid_search to csv file
+    # pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False)
+
+    # test results
+    print('TEST')
+    preds = best_estimator.predict(test_X)
+    uar = recall_score(y_test, preds, average='macro')
+    cm = confusion_matrix(y_test, preds)
+    print(f'UAR: {uar}\n{classification_report(y_test, preds)}\n\nConfusion Matrix:\n\n{cm}') 
+
+    fig = plt.figure()
+    plot_confusion_matrix(best_estimator, X=test_X, y_true=y_test, cmap=plt.cm.Blues, display_labels=[
+                          'Negative', 'Positive'], normalize='true')
+    plt.ylabel('True Label')
+    plt.xlabel('Predicated Label')
+    plt.savefig('cm_svm_openL3_hand.jpg')
\ No newline at end of file
diff --git a/src/svm_openl3_vgg.py b/src/svm_openl3_vgg.py
new file mode 100644
index 0000000..df691f6
--- /dev/null
+++ b/src/svm_openl3_vgg.py
@@ -0,0 +1,178 @@
+from sklearn.svm import LinearSVC
+from sklearn.base import clone
+from sklearn.pipeline import Pipeline
+from sklearn.model_selection import PredefinedSplit, GridSearchCV
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer, plot_confusion_matrix
+# import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import pandas as pd
+
+RANDOM_SEED = 42
+
+GRID = [
+    {'scaler': [StandardScaler(), None],
+     'estimator': [LinearSVC(random_state=RANDOM_SEED)],
+     'estimator__loss': ['squared_hinge'],
+     'estimator__C': np.logspace(-1, -5, num=5),
+     'estimator__class_weight': ['balanced', None],
+     'estimator__max_iter': [1000]
+     }
+]
+
+PIPELINE = Pipeline([('scaler', None), ('estimator', LinearSVC(dual=True))])
+
+def sta_fun_2(npdata):  # 1D np array
+    """Extract various statistical features from the numpy array provided as input.
+
+    :param np_data: the numpy array to extract the features from
+    :type np_data: numpy.ndarray
+    :return: The extracted features as a vector
+    :rtype: numpy.ndarray
+    """
+
+    # perform a sanity check
+    if npdata is None:
+        raise ValueError("Input array cannot be None")
+
+    # perform the feature extraction
+    Mean = np.mean(npdata, axis=0)
+    Std = np.std(npdata, axis=0)
+
+    # finally return the features in a concatenated array (as a vector)
+    return np.concatenate((Mean, Std), axis=0).reshape(1, -1)
+
+if __name__=='__main__':
+
+    # load handcrafted and openL3 features and labels
+    files = os.listdir('./features/openl3/train/')
+    filenames = ['./features/openl3/train/' + f for f in files]
+
+    X_train_openl3 = []
+
+    for i in range(len(filenames)):
+        emb = np.load(filenames[i])['embedding']
+        X_train_openl3.extend(sta_fun_2(emb))
+    X_train_openl3 = np.array(X_train_openl3,dtype=object)
+
+    files = os.listdir('./features/openl3/test/')
+    filenames = ['./features/openl3/test/' + f for f in files]
+
+    X_test_openl3 = []
+
+    for i in range(len(filenames)):
+        emb = np.load(filenames[i])['embedding']
+        X_test_openl3.extend(sta_fun_2(emb))
+    X_test_openl3 = np.array(X_test_openl3,dtype=object)
+
+    files = os.listdir('./features/openl3/devel/')
+    filenames = ['./features/openl3/devel/' + f for f in files]
+
+    X_devel_openl3 = []
+
+    for i in range(len(filenames)):
+        emb = np.load(filenames[i])['embedding']
+        X_devel_openl3.extend(sta_fun_2(emb))
+    X_devel_openl3 = np.array(X_devel_openl3,dtype=object)
+
+    df = pd.read_csv('./dist/lab/train.csv', sep =',')
+    y_train = df.label
+
+    df = pd.read_csv('./dist/lab/test.csv', sep =',')
+    y_test = df.label
+
+    df = pd.read_csv('./dist/lab/devel.csv', sep =',')
+    y_devel = df.label
+
+    devel_X_vgg = np.load(
+    "./features/vgg_features/x_devel_data_vgg.npy", allow_pickle=True
+    )
+
+    test_X_vgg = np.load(
+    "./features/vgg_features/x_test_data_vgg.npy", allow_pickle=True
+    )
+
+    train_X_vgg = np.load(
+    "./features/vgg_features/x_train_data_vgg.npy", allow_pickle=True
+    )
+
+    num_train = train_X_vgg.shape[0]
+    num_devel = devel_X_vgg.shape[0]
+    split_indices = np.repeat([-1, 0], [num_train, num_devel])
+    split = PredefinedSplit(split_indices)
+
+    train_X_openl3 = np.squeeze(X_train_openl3)
+    devel_X_openl3 = np.squeeze(X_devel_openl3)
+    test_X_openl3 = np.squeeze(X_test_openl3)   
+    
+    train_X_vgg = np.squeeze(train_X_vgg)
+    devel_X_vgg = np.squeeze(devel_X_vgg)
+    test_X_vgg = np.squeeze(test_X_vgg)
+
+    devel_X = np.concatenate(
+        (
+            devel_X_vgg,
+            devel_X_openl3
+        ),
+        axis=1,
+    )
+
+    test_X = np.concatenate(
+        (
+            test_X_vgg,
+            test_X_openl3
+        ),
+        axis=1,
+    )
+
+    train_X = np.concatenate(
+        (
+            train_X_vgg,
+            train_X_openl3
+        ),
+        axis=1,
+    )
+
+    X = np.append(train_X, devel_X, axis=0)
+    y = np.append(y_train, y_devel, axis=0)
+
+    grid_search = GridSearchCV(estimator=PIPELINE, param_grid=GRID, 
+                                scoring=make_scorer(recall_score, average='macro'), 
+                                n_jobs=-1, cv=split, refit=True, verbose=1, 
+                                return_train_score=False)
+    
+    # find best estimator with grid search
+    grid_search.fit(X,y)
+    best_estimator = grid_search.best_estimator_
+
+    # fit clone of best estimator on train again for devel predictions
+    estimator = clone(best_estimator, safe=False)
+    estimator.fit(train_X, y_train)
+    preds = estimator.predict(devel_X)
+
+    metrics = {'dev': {}, 'test': {}}
+
+    # devel results
+    print('DEVEL')
+    uar = recall_score(y_devel, preds, average='macro')
+    cm = confusion_matrix(y_devel, preds)
+    print(f'UAR: {uar}\n{classification_report(y_devel, preds)}\n\nConfusion Matrix:\n\n{cm}') 
+    
+    # optional write grid_search to csv file
+    # pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False)
+
+    # test results
+    print('TEST')
+    preds = best_estimator.predict(test_X)
+    uar = recall_score(y_test, preds, average='macro')
+    cm = confusion_matrix(y_test, preds)
+    print(f'UAR: {uar}\n{classification_report(y_test, preds)}\n\nConfusion Matrix:\n\n{cm}') 
+
+    fig = plt.figure()
+    plot_confusion_matrix(best_estimator, X=test_X, y_true=y_test, cmap=plt.cm.Blues, display_labels=[
+                          'Negative', 'Positive'], normalize='true')
+    plt.ylabel('True Label')
+    plt.xlabel('Predicated Label')
+    plt.savefig('cm_svm_openL3_vgg.jpg')
\ No newline at end of file