from sklearn.svm import LinearSVC from sklearn.base import clone from sklearn.pipeline import Pipeline from sklearn.utils import resample from sklearn.model_selection import PredefinedSplit, GridSearchCV from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer from joblib import Parallel, delayed import pandas as pd import scipy import os, yaml import json import sys import arff import numpy as np from tqdm import tqdm from glob import glob RANDOM_SEED = 42 GRID = [ {'scaler': [StandardScaler(), None], 'estimator': [LinearSVC(random_state=RANDOM_SEED)], 'estimator__loss': ['squared_hinge'], 'estimator__C': np.logspace(-1, -5, num=5), 'estimator__class_weight': ['balanced', None], 'estimator__max_iter': [100000] } ] PIPELINE = Pipeline([('scaler', None), ('estimator', LinearSVC())]) if __name__=='__main__': # load features and labels devel_X_vgg = np.load( "vgg_features\\x_devel_data_vgg.npy", allow_pickle=True ) test_X_vgg = np.load( "vgg_features\\x_test_data_vgg.npy", allow_pickle=True ) train_X_vgg = np.load( "vgg_features\\x_train_data_vgg.npy", allow_pickle=True ) devel_X_hand = np.load( "hand_features\\x_devel_data.npy", allow_pickle=True ) test_X_hand = np.load( "hand_features\\x_test_data.npy", allow_pickle=True ) train_X_hand = np.load( "hand_features\\x_train_data.npy", allow_pickle=True ) devel_y = np.load( "vgg_features\\y_devel_label_vgg.npy", allow_pickle=True ) test_y = np.load( "vgg_features\\y_test_label_vgg.npy", allow_pickle=True ) train_y = np.load( "vgg_features\\y_train_label_vgg.npy", allow_pickle=True ) num_train = train_X_vgg.shape[0] num_devel = devel_X_vgg.shape[0] split_indices = np.repeat([-1, 0], [num_train, num_devel]) split = PredefinedSplit(split_indices) train_X_vgg = np.squeeze(train_X_vgg) devel_X_vgg = np.squeeze(devel_X_vgg) test_X_vgg = np.squeeze(test_X_vgg) devel_X = np.concatenate( ( devel_X_hand, devel_X_vgg ), axis=1, ) test_X = np.concatenate( ( test_X_hand, test_X_vgg ), axis=1, ) train_X = np.concatenate( ( train_X_hand, train_X_vgg ), axis=1, ) X = np.append(train_X, devel_X, axis=0) y = np.append(train_y, devel_y, axis=0) grid_search = GridSearchCV(estimator=PIPELINE, param_grid=GRID, scoring=make_scorer(recall_score, average='macro'), n_jobs=-1, cv=split, refit=True, verbose=1, return_train_score=False) # fit on data. train -> devel first, then train+devel implicit grid_search.fit(X, y) best_estimator = grid_search.best_estimator_ # fit clone of best estimator on train again for devel predictions estimator = clone(best_estimator, safe=False) estimator.fit(train_X, train_y) preds = estimator.predict(devel_X) metrics = {'dev': {}, 'test': {}} # devel metrics print('DEVEL') uar = recall_score(devel_y, preds, average='macro') cm = confusion_matrix(devel_y, preds) print(f'UAR: {uar}\n{classification_report(devel_y, preds)}\n\nConfusion Matrix:\n\n{cm}') pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False) # test metrics print('TEST') preds = best_estimator.predict(test_X) uar = recall_score(test_y, preds, average='macro') cm = confusion_matrix(test_y, preds) print(f'UAR: {uar}\n{classification_report(test_y, preds)}\n\nConfusion Matrix:\n\n{cm}')