feat: make func for cross validation based on session split
This commit is contained in:
		
							parent
							
								
									c009b0cdb2
								
							
						
					
					
						commit
						e0e88abf41
					
				| @ -4,9 +4,11 @@ import numpy as np | |||||||
| from sklearn.model_selection import train_test_split | from sklearn.model_selection import train_test_split | ||||||
| import tensorflow as tf | import tensorflow as tf | ||||||
| import tensorflow.keras as keras | import tensorflow.keras as keras | ||||||
|  | from keras import backend as K | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| import pandas as pd | import pandas as pd | ||||||
| import matplotlib.pyplot as plt | import matplotlib.pyplot as plt | ||||||
|  | import statistics | ||||||
| 
 | 
 | ||||||
| # Path to json file that stores MFCCs and subject labels for each processed sample | # Path to json file that stores MFCCs and subject labels for each processed sample | ||||||
| DATA_PATH_MFCC = str(Path.cwd()) + "/mfcc_data.json" | DATA_PATH_MFCC = str(Path.cwd()) + "/mfcc_data.json" | ||||||
| @ -14,7 +16,7 @@ DATA_PATH_MFCC = str(Path.cwd()) + "/mfcc_data.json" | |||||||
| # Loads data from the json file and reshapes X_data(samples, 1, 208) and y_data(samples, 1) | # Loads data from the json file and reshapes X_data(samples, 1, 208) and y_data(samples, 1) | ||||||
| # Input: JSON path | # Input: JSON path | ||||||
| # Ouput: X(mfcc data), y(labels), session_lengths | # Ouput: X(mfcc data), y(labels), session_lengths | ||||||
| def load_data_from_json(data_path):  | def load_data_from_json(data_path, nr_classes):  | ||||||
| 
 | 
 | ||||||
|     with open(data_path, "r") as fp: |     with open(data_path, "r") as fp: | ||||||
|         data = json.load(fp) |         data = json.load(fp) | ||||||
| @ -23,12 +25,12 @@ def load_data_from_json(data_path): | |||||||
|     X = np.array(data['mfcc']) |     X = np.array(data['mfcc']) | ||||||
|     #print(X.shape) |     #print(X.shape) | ||||||
|     X = X.reshape(X.shape[0], 1, X.shape[1]) |     X = X.reshape(X.shape[0], 1, X.shape[1]) | ||||||
|     #print(X.shape) |     print(X.shape) | ||||||
|      |      | ||||||
|     y = np.array(data["labels"]) |     y = np.array(data["labels"]) | ||||||
|     #print(y.shape) |     #print(y.shape) | ||||||
|     y = y.reshape(y.shape[0], 1) |     y = keras.utils.to_categorical(y, nr_classes) | ||||||
|     #print(y.shape) |     print(y.shape) | ||||||
| 
 | 
 | ||||||
|     session_lengths = np.array(data['session_lengths']) |     session_lengths = np.array(data['session_lengths']) | ||||||
|     #print(session_lengths.shape) |     #print(session_lengths.shape) | ||||||
| @ -188,18 +190,26 @@ def RNN_LSTM(input_shape, nr_classes=5): | |||||||
| # Trains the model  | # Trains the model  | ||||||
| # Input: Keras.model, batch_size, nr epochs, training, and validation data | # Input: Keras.model, batch_size, nr epochs, training, and validation data | ||||||
| # Ouput: History | # Ouput: History | ||||||
| def train(model, X_train, X_validation, y_train, y_validation, batch_size=64, epochs=30): | def train(model, X_train, y_train, verbose, batch_size=64, epochs=30, X_validation=None, y_validation=None): | ||||||
| 
 | 
 | ||||||
|     optimiser = keras.optimizers.Adam(learning_rate=0.0001) |     optimiser = keras.optimizers.Adam(learning_rate=0.0001) | ||||||
|     model.compile(optimizer=optimiser, |     model.compile(optimizer=optimiser, | ||||||
|                   loss='sparse_categorical_crossentropy', |                   loss='categorical_crossentropy', | ||||||
|                   metrics=['accuracy']) |                   metrics=['accuracy']) | ||||||
| 
 | 
 | ||||||
|  |     if X_validation != None: | ||||||
|         history = model.fit(X_train,  |         history = model.fit(X_train,  | ||||||
|                             y_train,  |                             y_train,  | ||||||
|                             validation_data=(X_validation, y_validation),  |                             validation_data=(X_validation, y_validation),  | ||||||
|                             batch_size=batch_size,  |                             batch_size=batch_size,  | ||||||
|                         epochs=epochs) |                             epochs=epochs,  | ||||||
|  |                             verbose=verbose) | ||||||
|  |     else: | ||||||
|  |         history = model.fit(X_train,   | ||||||
|  |                             y_train,   | ||||||
|  |                             batch_size=batch_size,  | ||||||
|  |                             epochs=epochs,  | ||||||
|  |                             verbose=verbose) | ||||||
|     return history |     return history | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -224,32 +234,94 @@ def print_session_train_data(X_train, X_test, y_train, y_test, session_lengths, | |||||||
|     print('Datapoints in session ' + str(session_nr) + ':', get_nr_in_session(session_lengths, session_nr)) |     print('Datapoints in session ' + str(session_nr) + ':', get_nr_in_session(session_lengths, session_nr)) | ||||||
|     print('Should be remaining:', 2806 - get_nr_in_session(session_lengths, session_nr)) |     print('Should be remaining:', 2806 - get_nr_in_session(session_lengths, session_nr)) | ||||||
| 
 | 
 | ||||||
|  | # Reshapes training og test data into batches  | ||||||
|  | # Input: training, test data (and validation), batch_size | ||||||
|  | # Ouput: training, test data (and validation) | ||||||
|  | def batch_formatting(X_train, X_test, y_train, y_test, batch_size=64, nr_classes=5, X_validation=None, y_validation=None): | ||||||
|  |      | ||||||
|  |     train_splits = X_train.shape[0] // batch_size | ||||||
|  |     train_rest = X_train.shape[0] % batch_size | ||||||
|  |     test_splits = X_test.shape[0] // batch_size | ||||||
|  |     test_rest = X_test.shape[0] % batch_size | ||||||
|  |      | ||||||
|  |     X_train = X_train[:-train_rest] | ||||||
|  |     y_train = y_train[:-train_rest] | ||||||
|  |     X_test = X_test[:-test_rest] | ||||||
|  |     y_test = y_test[:-test_rest] | ||||||
|  |      | ||||||
|  |     X_train_batch = np.reshape(X_train, (batch_size, train_splits, 208)) | ||||||
|  |     y_train_batch = np.reshape(y_train, (batch_size, train_splits, nr_classes)) | ||||||
|  |     X_test_batch = np.reshape(X_test, (batch_size, test_splits, 208)) | ||||||
|  |     y_test_batch = np.reshape(y_test, (batch_size, test_splits, nr_classes)) | ||||||
|  | 
 | ||||||
|  |     if X_validation != None: | ||||||
|  |         val_splits = X_validation.shape[0] // batch_size | ||||||
|  |         val_rest = X_validation.shape[0] % batch_size | ||||||
|  |         X_validation = X_validation[:-val_rest] | ||||||
|  |         y_validation = y_validation[:-val_rest] | ||||||
|  |         X_val_batch = np.reshape(X_validation, (batch_size, val_splits, 208)) | ||||||
|  |         y_val_batch = np.reshape(y_validation, (batch_size, val_splits)) | ||||||
|  |         return X_train_batch, X_test_batch, y_train_batch, y_test_batch, X_val_batch, y_val_batch | ||||||
|  | 
 | ||||||
|  |     return X_train_batch, X_test_batch, y_train_batch, y_test_batch | ||||||
|  |          | ||||||
|  | def session_cross_validation(X, y, session_lengths, nr_sessions, batch_size=64, epochs=30): | ||||||
|  |     session_training_results = [] | ||||||
|  |     for i in range(nr_sessions): | ||||||
|  |         model = RNN_LSTM(input_shape=(1, 208)) | ||||||
|  |         X_train_session, X_test_session, y_train_session, y_test_session = prepare_datasets_sessions(X, y, session_lengths, i) | ||||||
|  |         train(model, X_train_session, y_train_session, verbose=0, batch_size=batch_size, epochs=epochs) | ||||||
|  |         test_loss, test_acc = model.evaluate(X_test_session, y_test_session, verbose=2) | ||||||
|  |         session_training_results.append(test_acc) | ||||||
|  |         del model | ||||||
|  |         K.clear_session() | ||||||
|  |         print('Session', i, 'as test data gives accuracy:', test_acc) | ||||||
|  |     average_result = statistics.mean((session_training_results)) | ||||||
|  |     return average_result, session_training_results | ||||||
|  |          | ||||||
| 
 | 
 | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
| 
 | 
 | ||||||
|     # Load data |     # ----- Load data ------ | ||||||
|     X, y, session_lengths = load_data_from_json(DATA_PATH_MFCC) |     # X.shape = (2806, 1, 208) | ||||||
|  |     # y.shape = (2806, 5) | ||||||
|  |     # session_lengths.shape = (5, 4) | ||||||
|  |     X, y, session_lengths = load_data_from_json(DATA_PATH_MFCC, nr_classes=5) | ||||||
|  | 
 | ||||||
|  |     # Parameters: | ||||||
|  |     NR_SUBJECTS = 5 | ||||||
|  |     NR_SESSIONS = 4 | ||||||
|  |     BATCH_SIZE = 64 | ||||||
|  |     EPOCHS = 30 | ||||||
|  |      | ||||||
|  |     # ----- Get prepared data: train, validation, and test ------ | ||||||
|  |     ''' | ||||||
|      |      | ||||||
|     # Get prepared data: train, validation, and test |  | ||||||
|     session_nr = 4 |  | ||||||
|     X_train, X_test, y_train, y_test = prepare_datasets_sessions(X, y, session_lengths, session_nr) |     X_train, X_test, y_train, y_test = prepare_datasets_sessions(X, y, session_lengths, session_nr) | ||||||
|  |     print(X_train.shape) | ||||||
|  |     print(y_train.shape) | ||||||
|  |     print(X_test.shape) | ||||||
|  |     print(y_test.shape) | ||||||
|     #print_session_train_data(X_train, X_test, y_train, y_test, session_lengths, session_nr) |     #print_session_train_data(X_train, X_test, y_train, y_test, session_lengths, session_nr) | ||||||
|  |     ''' | ||||||
| 
 | 
 | ||||||
|  |     #''' | ||||||
|  |     # ----- Make model ------ | ||||||
|  |     #model = RNN_LSTM(input_shape=(1, 208)) # (timestep, coefficients) | ||||||
|  |     #model.summary() | ||||||
|      |      | ||||||
|     # Make model |     # ----- Train network ------ | ||||||
|     model = RNN_LSTM(input_shape=(1, 208)) |     #history = train(model, X_train, y_train, batch_size=batch_size, epochs=30) | ||||||
|     model.summary() |     average = session_cross_validation(X, y, session_lengths, NR_SESSIONS, BATCH_SIZE, EPOCHS) | ||||||
|      |     print('\nCrossvalidated:', average) | ||||||
|     # Train network |  | ||||||
|     history = train(model, X_train, X_validation, y_train, y_validation, batch_size=64, epochs=30) |  | ||||||
|      |      | ||||||
|     # plot accuracy/error for training and validation |     # plot accuracy/error for training and validation | ||||||
|     plot_history(history) |     #plot_history(history) | ||||||
| 
 |  | ||||||
|     # evaluate model on test set |  | ||||||
|     test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2) |  | ||||||
|     print('\nTest accuracy:', test_acc) |  | ||||||
| 
 | 
 | ||||||
|  |     # ----- Evaluate model on test set ------ | ||||||
|  |     #test_loss, test_acc = model.evaluate(X_test, y_test, verbose=1) | ||||||
|  |     #print('\nTest accuracy:', test_acc) | ||||||
|  |     #''' | ||||||
|      |      | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user