fix: try to implement the NN correctly

This commit is contained in:
Skudalen 2021-07-05 12:43:45 +02:00
parent 91bd13ee1d
commit 155816235e
12 changed files with 1310 additions and 1190 deletions

8
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/

8
.idea/Slovakia 2021.iml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

4
.idea/encodings.xml Normal file
View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" addBOMForNewFiles="with NO BOM" />
</project>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

4
.idea/misc.xml Normal file
View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (Slovakia 2021)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Slovakia 2021.iml" filepath="$PROJECT_DIR$/.idea/Slovakia 2021.iml" />
</modules>
</component>
</project>

6
.idea/other.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PySciProjectComponent">
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
</component>
</project>

7
.idea/vcs.xml Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="$PROJECT_DIR$/python_speech_features" vcs="Git" />
</component>
</project>

View File

@ -610,15 +610,10 @@ class DL_data_handler:
test_df_for_bugs(signal, key, i)
# extract mfcc
#n_fft = MFCC_WINDOWSIZE * sample_rate
#hop_length = MFCC_STEPSIZE * sample_rate
#mfcc = librosa.feature.mfcc(signal, sample_rate, n_mfcc=NR_COEFFICIENTS, n_fft=n_fft, hop_length=hop_length)
mfcc = mfcc_custom(signal, sample_rate, MFCC_WINDOWSIZE, MFCC_STEPSIZE, NR_COEFFICIENTS, NR_MEL_BINS)
mfcc = mfcc.T
#print(len(mfcc))
# store only mfcc feature with expected number of vectors
#if len(mfcc) == num_mfcc_vectors_per_segment:
data["mfcc"].append(mfcc.tolist())
data["labels"].append(key)
print("sample:{}".format(i+1))

View File

@ -1,12 +1,14 @@
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import tensorflow as tf
import tf.keras as keras
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
# path to json file that stores MFCCs and genre labels for each processed segment
DATA_PATH = str(Path.cwd()) + "mfcc_data.json"
# path to json file that stores MFCCs and subject labels for each processed sample
DATA_PATH = str(Path.cwd()) + "/mfcc_data.json"
def load_data(data_path):
@ -16,38 +18,102 @@ def load_data(data_path):
# convert lists to numpy arrays
X = np.array(data["mfcc"])
y = np.array(data["labels"])
#X = np.asarray(X).astype('float32')
#y = np.asarray(y).astype('float32')
#y = tf.expand_dims(y, axis=1)
print("Data succesfully loaded!")
return X, y
def plot_history(history):
"""Plots accuracy/loss for training/validation set as a function of the epochs
:param history: Training history of model
:return:
"""
if __name__ == "__main__":
fig, axs = plt.subplots(2)
# create accuracy sublpot
axs[0].plot(history.history["accuracy"], label="train accuracy")
axs[0].plot(history.history["val_accuracy"], label="test accuracy")
axs[0].set_ylabel("Accuracy")
axs[0].legend(loc="lower right")
axs[0].set_title("Accuracy eval")
# create error sublpot
axs[1].plot(history.history["loss"], label="train error")
axs[1].plot(history.history["val_loss"], label="test error")
axs[1].set_ylabel("Error")
axs[1].set_xlabel("Epoch")
axs[1].legend(loc="upper right")
axs[1].set_title("Error eval")
plt.show()
def prepare_datasets(test_size=0.25, validation_size=0.2):
"""Loads data and splits it into train, validation and test sets.
:param test_size (float): Value in [0, 1] indicating percentage of data set to allocate to test split
:param validation_size (float): Value in [0, 1] indicating percentage of train set to allocate to validation split
:return X_train (ndarray): Input training set
:return X_validation (ndarray): Input validation set
:return X_test (ndarray): Input test set
:return y_train (ndarray): Target training set
:return y_validation (ndarray): Target validation set
:return y_test (ndarray): Target test set
"""
# load data
X, y = load_data(DATA_PATH)
# create train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# create train, validation and test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)
return X_train, X_validation, X_test, y_train, y_validation, y_test
def build_model(input_shape, nr_classes=5):
"""Generates RNN-LSTM model
:param input_shape (tuple): Shape of input set
:return model: RNN-LSTM model
"""
# build network topology
model = keras.Sequential([
model = keras.Sequential()
# input layer
keras.layers.Flatten(input_shape=(X.shape[1], X.shape[2])),
# 2 LSTM layers
model.add(keras.layers.LSTM(64, input_shape=input_shape, return_sequences=True))
model.add(keras.layers.LSTM(64))
# 1st dense layer
keras.layers.Dense(512, activation='relu'),
# 2nd dense layer
keras.layers.Dense(256, activation='relu'),
# 3rd dense layer
keras.layers.Dense(64, activation='relu'),
# dense layer
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dropout(0.3))
# output layer
keras.layers.Dense(10, activation='softmax')
])
model.add(keras.layers.Dense(nr_classes, activation='softmax'))
return model
if __name__ == "__main__":
# get train, validation, test splits
X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)
# create network
print(X_train.shape)
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
#X_validation = np.reshape(X_validation, (X_test.shape[0], 1, X_test.shape[1]))
print(X_train.shape)
print(X_train.shape[0])
print(X_train.shape[1])
input_shape = (X_train.shape[1], X_train.shape[2]) # 300, 13
model = build_model(input_shape)
# compile model
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
@ -58,4 +124,12 @@ if __name__ == "__main__":
model.summary()
# train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=50)
history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=16, epochs=30)
# plot accuracy/error for training and validation
plot_history(history)
# evaluate model on test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)

File diff suppressed because it is too large Load Diff