fix: try to implement the NN correctly
This commit is contained in:
parent
91bd13ee1d
commit
155816235e
8
.idea/.gitignore
vendored
Normal file
8
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
|
# Editor-based HTTP Client requests
|
||||||
|
/httpRequests/
|
8
.idea/Slovakia 2021.iml
Normal file
8
.idea/Slovakia 2021.iml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
4
.idea/encodings.xml
Normal file
4
.idea/encodings.xml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="Encoding" addBOMForNewFiles="with NO BOM" />
|
||||||
|
</project>
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (Slovakia 2021)" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/Slovakia 2021.iml" filepath="$PROJECT_DIR$/.idea/Slovakia 2021.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
6
.idea/other.xml
Normal file
6
.idea/other.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="PySciProjectComponent">
|
||||||
|
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
|
||||||
|
</component>
|
||||||
|
</project>
|
7
.idea/vcs.xml
Normal file
7
.idea/vcs.xml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
<mapping directory="$PROJECT_DIR$/python_speech_features" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
@ -610,15 +610,10 @@ class DL_data_handler:
|
|||||||
test_df_for_bugs(signal, key, i)
|
test_df_for_bugs(signal, key, i)
|
||||||
|
|
||||||
# extract mfcc
|
# extract mfcc
|
||||||
#n_fft = MFCC_WINDOWSIZE * sample_rate
|
|
||||||
#hop_length = MFCC_STEPSIZE * sample_rate
|
|
||||||
#mfcc = librosa.feature.mfcc(signal, sample_rate, n_mfcc=NR_COEFFICIENTS, n_fft=n_fft, hop_length=hop_length)
|
|
||||||
mfcc = mfcc_custom(signal, sample_rate, MFCC_WINDOWSIZE, MFCC_STEPSIZE, NR_COEFFICIENTS, NR_MEL_BINS)
|
mfcc = mfcc_custom(signal, sample_rate, MFCC_WINDOWSIZE, MFCC_STEPSIZE, NR_COEFFICIENTS, NR_MEL_BINS)
|
||||||
mfcc = mfcc.T
|
mfcc = mfcc.T
|
||||||
#print(len(mfcc))
|
|
||||||
|
|
||||||
# store only mfcc feature with expected number of vectors
|
|
||||||
#if len(mfcc) == num_mfcc_vectors_per_segment:
|
|
||||||
data["mfcc"].append(mfcc.tolist())
|
data["mfcc"].append(mfcc.tolist())
|
||||||
data["labels"].append(key)
|
data["labels"].append(key)
|
||||||
print("sample:{}".format(i+1))
|
print("sample:{}".format(i+1))
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
import json
|
import json
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
import tensorflow.keras as keras
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import tf.keras as keras
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
# path to json file that stores MFCCs and genre labels for each processed segment
|
# path to json file that stores MFCCs and subject labels for each processed sample
|
||||||
DATA_PATH = str(Path.cwd()) + "mfcc_data.json"
|
DATA_PATH = str(Path.cwd()) + "/mfcc_data.json"
|
||||||
|
|
||||||
def load_data(data_path):
|
def load_data(data_path):
|
||||||
|
|
||||||
@ -16,38 +18,102 @@ def load_data(data_path):
|
|||||||
# convert lists to numpy arrays
|
# convert lists to numpy arrays
|
||||||
X = np.array(data["mfcc"])
|
X = np.array(data["mfcc"])
|
||||||
y = np.array(data["labels"])
|
y = np.array(data["labels"])
|
||||||
|
#X = np.asarray(X).astype('float32')
|
||||||
|
#y = np.asarray(y).astype('float32')
|
||||||
|
#y = tf.expand_dims(y, axis=1)
|
||||||
|
|
||||||
print("Data succesfully loaded!")
|
print("Data succesfully loaded!")
|
||||||
|
|
||||||
return X, y
|
return X, y
|
||||||
|
|
||||||
|
def plot_history(history):
|
||||||
|
"""Plots accuracy/loss for training/validation set as a function of the epochs
|
||||||
|
:param history: Training history of model
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
if __name__ == "__main__":
|
fig, axs = plt.subplots(2)
|
||||||
|
|
||||||
|
# create accuracy sublpot
|
||||||
|
axs[0].plot(history.history["accuracy"], label="train accuracy")
|
||||||
|
axs[0].plot(history.history["val_accuracy"], label="test accuracy")
|
||||||
|
axs[0].set_ylabel("Accuracy")
|
||||||
|
axs[0].legend(loc="lower right")
|
||||||
|
axs[0].set_title("Accuracy eval")
|
||||||
|
|
||||||
|
# create error sublpot
|
||||||
|
axs[1].plot(history.history["loss"], label="train error")
|
||||||
|
axs[1].plot(history.history["val_loss"], label="test error")
|
||||||
|
axs[1].set_ylabel("Error")
|
||||||
|
axs[1].set_xlabel("Epoch")
|
||||||
|
axs[1].legend(loc="upper right")
|
||||||
|
axs[1].set_title("Error eval")
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_datasets(test_size=0.25, validation_size=0.2):
|
||||||
|
"""Loads data and splits it into train, validation and test sets.
|
||||||
|
:param test_size (float): Value in [0, 1] indicating percentage of data set to allocate to test split
|
||||||
|
:param validation_size (float): Value in [0, 1] indicating percentage of train set to allocate to validation split
|
||||||
|
:return X_train (ndarray): Input training set
|
||||||
|
:return X_validation (ndarray): Input validation set
|
||||||
|
:return X_test (ndarray): Input test set
|
||||||
|
:return y_train (ndarray): Target training set
|
||||||
|
:return y_validation (ndarray): Target validation set
|
||||||
|
:return y_test (ndarray): Target test set
|
||||||
|
"""
|
||||||
|
|
||||||
# load data
|
# load data
|
||||||
X, y = load_data(DATA_PATH)
|
X, y = load_data(DATA_PATH)
|
||||||
|
|
||||||
# create train/test split
|
# create train, validation and test split
|
||||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
|
||||||
|
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)
|
||||||
|
|
||||||
|
return X_train, X_validation, X_test, y_train, y_validation, y_test
|
||||||
|
|
||||||
|
|
||||||
|
def build_model(input_shape, nr_classes=5):
|
||||||
|
"""Generates RNN-LSTM model
|
||||||
|
:param input_shape (tuple): Shape of input set
|
||||||
|
:return model: RNN-LSTM model
|
||||||
|
"""
|
||||||
|
|
||||||
# build network topology
|
# build network topology
|
||||||
model = keras.Sequential([
|
model = keras.Sequential()
|
||||||
|
|
||||||
# input layer
|
# 2 LSTM layers
|
||||||
keras.layers.Flatten(input_shape=(X.shape[1], X.shape[2])),
|
model.add(keras.layers.LSTM(64, input_shape=input_shape, return_sequences=True))
|
||||||
|
model.add(keras.layers.LSTM(64))
|
||||||
|
|
||||||
# 1st dense layer
|
# dense layer
|
||||||
keras.layers.Dense(512, activation='relu'),
|
model.add(keras.layers.Dense(64, activation='relu'))
|
||||||
|
model.add(keras.layers.Dropout(0.3))
|
||||||
# 2nd dense layer
|
|
||||||
keras.layers.Dense(256, activation='relu'),
|
|
||||||
|
|
||||||
# 3rd dense layer
|
|
||||||
keras.layers.Dense(64, activation='relu'),
|
|
||||||
|
|
||||||
# output layer
|
# output layer
|
||||||
keras.layers.Dense(10, activation='softmax')
|
model.add(keras.layers.Dense(nr_classes, activation='softmax'))
|
||||||
])
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
# get train, validation, test splits
|
||||||
|
X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)
|
||||||
|
|
||||||
|
# create network
|
||||||
|
|
||||||
|
print(X_train.shape)
|
||||||
|
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
|
||||||
|
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
|
||||||
|
#X_validation = np.reshape(X_validation, (X_test.shape[0], 1, X_test.shape[1]))
|
||||||
|
print(X_train.shape)
|
||||||
|
print(X_train.shape[0])
|
||||||
|
print(X_train.shape[1])
|
||||||
|
|
||||||
|
input_shape = (X_train.shape[1], X_train.shape[2]) # 300, 13
|
||||||
|
model = build_model(input_shape)
|
||||||
|
|
||||||
# compile model
|
# compile model
|
||||||
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
|
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
|
||||||
@ -58,4 +124,12 @@ if __name__ == "__main__":
|
|||||||
model.summary()
|
model.summary()
|
||||||
|
|
||||||
# train model
|
# train model
|
||||||
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=50)
|
history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=16, epochs=30)
|
||||||
|
|
||||||
|
# plot accuracy/error for training and validation
|
||||||
|
plot_history(history)
|
||||||
|
|
||||||
|
# evaluate model on test set
|
||||||
|
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
|
||||||
|
print('\nTest accuracy:', test_acc)
|
||||||
|
|
||||||
|
Binary file not shown.
2328
mfcc_data.json
2328
mfcc_data.json
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user