chore: improve code quality

This commit is contained in:
Skudalen 2021-07-06 16:33:35 +02:00
parent 155816235e
commit 5c256143b1
7 changed files with 27262 additions and 36668 deletions

15
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}

View File

@ -1,3 +1,4 @@
{ {
"python.pythonPath": "/Users/Markus/opt/Anaconda3/python" "python.pythonPath": "/Users/Markus/opt/anaconda3/envs/Slovakia 2021/bin/python",
//"terminal.integrated.inheritEnv": false
} }

View File

@ -501,7 +501,7 @@ class DL_data_handler:
def __init__(self, csv_handler:CSV_handler) -> None: def __init__(self, csv_handler:CSV_handler) -> None:
self.csv_handler = csv_handler self.csv_handler = csv_handler
# Should med 4 sessions * split nr of samples per person. Each sample is structured like [sample_df, samplerate] # Should med 4 sessions * split nr of samples per person. Each sample is structured like this: [sample_df, samplerate]
self.samples_per_subject = {1: [], self.samples_per_subject = {1: [],
2: [], 2: [],
3: [], 3: [],
@ -573,6 +573,8 @@ class DL_data_handler:
self.samples_per_subject[subject_nr+1] = subj_samples self.samples_per_subject[subject_nr+1] = subj_samples
def reshape_session_df_to_signal(self, df:DataFrame): def reshape_session_df_to_signal(self, df:DataFrame):
main_df = df[['timestamp', 1]].rename(columns={1: 'emg'}) main_df = df[['timestamp', 1]].rename(columns={1: 'emg'})
for i in range(2, 17): for i in range(2, 17):
@ -593,30 +595,42 @@ class DL_data_handler:
raw_data_dict = self.get_samples_dict() raw_data_dict = self.get_samples_dict()
# loop through all subjects to get samples # loop through all subjects to get samples
mfcc_list = []
mfcc_frame_list = []
for key, value in raw_data_dict.items(): for key, value in raw_data_dict.items():
# save genre label (i.e., sub-folder name) in the mapping # save subject label in the mapping
subject_label = 'Subject ' + str(key) subject_label = 'Subject ' + str(key)
data["mapping"].append(subject_label) data["mapping"].append(subject_label)
print("\nProcessing: {}".format(subject_label)) print("\nProcessing: {}".format(subject_label))
# process all audio files in genre sub-dir # process all samples per subject
for i, (sample) in enumerate(value): for i, (sample) in enumerate(value):
# load audio file # load signal from sample
signal, sample_rate = sample[0], sample[1] signal, sample_rate = sample[0], sample[1]
signal = signal['emg'].to_numpy() signal = signal['emg'].to_numpy()
test_df_for_bugs(signal, key, i) test_df_for_bugs(signal, key, i)
#print(sample_rate)
# extract mfcc # extract mfcc
mfcc = mfcc_custom(signal, sample_rate, MFCC_WINDOWSIZE, MFCC_STEPSIZE, NR_COEFFICIENTS, NR_MEL_BINS) mfcc = mfcc_custom(signal, sample_rate, MFCC_WINDOWSIZE, MFCC_STEPSIZE, NR_COEFFICIENTS, NR_MEL_BINS)
mfcc = mfcc.T
data["mfcc"].append(mfcc.tolist()) mfcc_list.append(mfcc.tolist())
mfcc_frame_list.append(mfcc.shape[0])
#data["mfcc"].append(mfcc.tolist())
data["labels"].append(key) data["labels"].append(key)
print("sample:{}".format(i+1)) print("sample:{} is done".format(i+1))
minimum = min(mfcc_frame_list)
for mfcc_data in mfcc_list:
data["mfcc"].append(mfcc_data[:minimum])
print(np.array(mfcc_data[:minimum]).shape)
# save MFCCs to json file # save MFCCs to json file
with open(json_path, "w") as fp: with open(json_path, "w") as fp:

View File

@ -1,8 +1,9 @@
import json import json
from python_speech_features.python_speech_features.base import mfcc
import numpy as np import numpy as np
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import tensorflow as tf import tensorflow as tf
import tensorflow.keras as keras
from pathlib import Path from pathlib import Path
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
@ -15,12 +16,16 @@ def load_data(data_path):
with open(data_path, "r") as fp: with open(data_path, "r") as fp:
data = json.load(fp) data = json.load(fp)
# convert lists to numpy arrays # convert lists to numpy arraysls
X = np.array(data["mfcc"]) #print('\n', data['mfcc'], '\n')
X = np.array(data['mfcc'])
print(X.shape)
#print((len(X), len(X[0]), len(X[0][0])))
#print((len(X), len(X[5]), len(X[5][0])))
y = np.array(data["labels"]) y = np.array(data["labels"])
#X = np.asarray(X).astype('float32') print(y.shape)
#y = np.asarray(y).astype('float32')
#y = tf.expand_dims(y, axis=1)
print("Data succesfully loaded!") print("Data succesfully loaded!")
@ -102,17 +107,9 @@ if __name__ == "__main__":
# get train, validation, test splits # get train, validation, test splits
X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2) X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)
print(X_train.shape[1], X_train.shape[2])
# create network # create network
input_shape = (X_train.shape[1], X_train.shape[2]) # 18, 13
print(X_train.shape)
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
#X_validation = np.reshape(X_validation, (X_test.shape[0], 1, X_test.shape[1]))
print(X_train.shape)
print(X_train.shape[0])
print(X_train.shape[1])
input_shape = (X_train.shape[1], X_train.shape[2]) # 300, 13
model = build_model(input_shape) model = build_model(input_shape)
# compile model # compile model
@ -124,7 +121,7 @@ if __name__ == "__main__":
model.summary() model.summary()
# train model # train model
history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=16, epochs=30) history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=100, epochs=30)
# plot accuracy/error for training and validation # plot accuracy/error for training and validation
plot_history(history) plot_history(history)
@ -133,3 +130,4 @@ if __name__ == "__main__":
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2) test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc) print('\nTest accuracy:', test_acc)

View File

@ -221,11 +221,18 @@ def main():
csv_handler = CSV_handler() csv_handler = CSV_handler()
csv_handler.load_data('soft') csv_handler.load_data('soft')
dl_data_handler = DL_data_handler(csv_handler) dl_data_handler = DL_data_handler(csv_handler)
dl_data_handler.store_samples(15) dl_data_handler.store_samples(5)
#dict = dl_data_handler.samples_per_subject dict = dl_data_handler.samples_per_subject
#print(dict.get(1)[10][0], dict.get(1)[10][1]) #print(dict.get(1)[2][0], dict.get(1)[10][1])
#print(dict.get(1)[12][0], dict.get(1)[12][1]) #print(dict.get(1)[15][0], dict.get(1)[10][1])
#print(dict.get(1))
'''
print(len(dict.get(1)))
print(len(dict.get(2)))
print(len(dict.get(3)))
print(len(dict.get(4)))
print(len(dict.get(5)))
'''
dl_data_handler.save_mfcc() dl_data_handler.save_mfcc()

File diff suppressed because it is too large Load Diff