chore: move mfcc func and add

skeleton for saving mfcc data
This commit is contained in:
Skudalen 2021-07-02 10:13:27 +02:00
parent 76aeb10ea7
commit c36ddf1609
5 changed files with 109 additions and 48 deletions

View File

@ -5,8 +5,18 @@ from pathlib import Path
import numpy as np import numpy as np
from pandas.core.frame import DataFrame from pandas.core.frame import DataFrame
from math import floor from math import floor
import sys
sys.path.insert(0, '/Users/Markus/Prosjekter git/Slovakia 2021/python_speech_features/python_speech_features')
from python_speech_features.python_speech_features import *
import json
#from Present_data import get_data #from Present_data import get_data
# Global variables for MFCC
MFCC_STEPSIZE = 0.5 # Seconds
MFCC_WINDOWSIZE = 2 # Seconds
NR_COEFFICIENTS = 13 # Number of coefficients
NR_MEL_BINS = 40 # Number of mel-filter-bins
class Data_container: class Data_container:
def __init__(self, subject_nr:int, subject_name:str): def __init__(self, subject_nr:int, subject_name:str):
@ -488,6 +498,12 @@ class CSV_handler:
class DL_data_handler: class DL_data_handler:
JSON_PATH = "mfcc_data.json"
SAMPLE_RATE = None
TRACK_DURATION = None # measured in seconds
#SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION
def __init__(self, csv_handler:CSV_handler) -> None: def __init__(self, csv_handler:CSV_handler) -> None:
self.csv_handler = csv_handler self.csv_handler = csv_handler
# Should med 4 sessions * split nr of samples per person. Each sample is structured like [sample_df, samplerate] # Should med 4 sessions * split nr of samples per person. Each sample is structured like [sample_df, samplerate]
@ -497,6 +513,7 @@ class DL_data_handler:
4: [], 4: [],
5: [] 5: []
} }
def get_samples_dict(self): def get_samples_dict(self):
return self.samples_per_subject return self.samples_per_subject
@ -568,8 +585,49 @@ class DL_data_handler:
main_df = pd.concat([main_df, adding_df], ignore_index=True) main_df = pd.concat([main_df, adding_df], ignore_index=True)
samplerate = get_samplerate(main_df) samplerate = get_samplerate(main_df)
return main_df, samplerate return main_df, samplerate
'''
def save_mfcc(raw_data_dict, json_path, samples_per_subject):
# dictionary to store mapping, labels, and MFCCs
data = {
"mapping": [],
"labels": [],
"mfcc": []
}
#hop_length = MFCC_STEPSIZE * sample_rate
#num_mfcc_vectors_per_segment = math.ceil(samples_per_subject / hop_length)
# loop through all subjects to get samples
for key, value in raw_data_dict.items():
# save genre label (i.e., sub-folder name) in the mapping
subject_label = 'Subject ' + key
data["mapping"].append(subject_label)
print("\nProcessing: {}".format(subject_label))
# process all audio files in genre sub-dir
for sample in value:
# load audio file
signal, sample_rate = sample[0], sample[1]
# extract mfcc
mfcc = mfcc_custom(signal, sample_rate, MFCC_WINDOWSIZE, MFCC_STEPSIZE, NR_COEFFICIENTS, NR_MEL_BINS)
mfcc = mfcc.T
print(len(mfcc))
# store only mfcc feature with expected number of vectors
#if len(mfcc) == num_mfcc_vectors_per_segment:
data["mfcc"].append(mfcc.tolist())
data["labels"].append(key)
print("sample:{}".format(value.index(sample)))
# save MFCCs to json file
with open(json_path, "w") as fp:
json.dump(data, fp, indent=4)
'''
# HELP FUNCTIONS: ------------------------------------------------------------------------: # HELP FUNCTIONS: ------------------------------------------------------------------------:
# Help: gets the str from emg nr # Help: gets the str from emg nr
@ -600,3 +658,18 @@ def get_samplerate(df:DataFrame):
samples = len(df.index) samples = len(df.index)
samplerate = samples / seconds samplerate = samples / seconds
return int(samplerate) return int(samplerate)
# Takes in a df and outputs np arrays for x and y values
def get_xory_from_df(x_or_y, df:DataFrame):
swither = {
'x': df.iloc[:,0].to_numpy(),
'y': df.iloc[:,1].to_numpy()
}
return swither.get(x_or_y, 0)
# Slightly modified mfcc with inputs like below.
# Returns N (x_values from original df) and mfcc_y_values
def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters):
N = get_xory_from_df('x', df)
y = get_xory_from_df('y', df)
return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)

View File

@ -8,10 +8,10 @@ from matplotlib import cm
import matplotlib.ticker as ticker import matplotlib.ticker as ticker
# Global variables for MFCC # Global variables for MFCC
mfcc_stepsize = 0.5 # Seconds MFCC_STEPSIZE = 0.5 # Seconds
mfcc_windowsize = 2 # Seconds MFCC_WINDOWSIZE = 2 # Seconds
nr_coefficients = 13 # Number of coefficients NR_COEFFICIENTS = 13 # Number of coefficients
nr_mel_filters = 40 # Number of mel-filter-bins NR_MEL_BINS = 40 # Number of mel-filter-bins
# PLOT FUNCTIONS --------------------------------------------------------------: # PLOT FUNCTIONS --------------------------------------------------------------:
@ -126,13 +126,6 @@ def denoice_dataset(handler:Handler.CSV_handler, subject_nr, which_arm, round, e
df_new = Handler.make_df_from_xandy(N, y_values, emg_nr) df_new = Handler.make_df_from_xandy(N, y_values, emg_nr)
return df_new return df_new
# Slightly modified mfcc with inputs like below.
# Returns N (x_values from original df) and mfcc_y_values
def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters):
N = get_xory_from_df('x', df)
y = get_xory_from_df('y', df)
return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)
def test_for_NaN(dict, samples_per_person): def test_for_NaN(dict, samples_per_person):
for key, value in dict.items(): for key, value in dict.items():
@ -201,14 +194,14 @@ def mfcc_all_emg_plots(csv_handler:CSV_handler):
df6, samplerate6 = csv_handler.get_data( 1, 'left', 1, 6) df6, samplerate6 = csv_handler.get_data( 1, 'left', 1, 6)
df7, samplerate7 = csv_handler.get_data( 1, 'left', 1, 7) df7, samplerate7 = csv_handler.get_data( 1, 'left', 1, 7)
df8, samplerate8 = csv_handler.get_data( 1, 'left', 1, 8) df8, samplerate8 = csv_handler.get_data( 1, 'left', 1, 8)
N1, mfcc_feat1 = mfcc_custom(df1, samplerate1, mfcc_windowsize, mfcc_stepsize) N1, mfcc_feat1 = csv_handler.mfcc_custom(df1, samplerate1, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N2, mfcc_feat2 = mfcc_custom(df2, samplerate2, mfcc_windowsize, mfcc_stepsize) N2, mfcc_feat2 = csv_handler.mfcc_custom(df2, samplerate2, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N3, mfcc_feat3 = mfcc_custom(df3, samplerate3, mfcc_windowsize, mfcc_stepsize) N3, mfcc_feat3 = csv_handler.mfcc_custom(df3, samplerate3, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N4, mfcc_feat4 = mfcc_custom(df4, samplerate4, mfcc_windowsize, mfcc_stepsize) N4, mfcc_feat4 = csv_handler.mfcc_custom(df4, samplerate4, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N5, mfcc_feat5 = mfcc_custom(df5, samplerate5, mfcc_windowsize, mfcc_stepsize) N5, mfcc_feat5 = csv_handler.mfcc_custom(df5, samplerate5, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N6, mfcc_feat6 = mfcc_custom(df6, samplerate6, mfcc_windowsize, mfcc_stepsize) N6, mfcc_feat6 = csv_handler.mfcc_custom(df6, samplerate6, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N7, mfcc_feat7 = mfcc_custom(df7, samplerate7, mfcc_windowsize, mfcc_stepsize) N7, mfcc_feat7 = csv_handler.mfcc_custom(df7, samplerate7, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N8, mfcc_feat8 = mfcc_custom(df8, samplerate8, mfcc_windowsize, mfcc_stepsize) N8, mfcc_feat8 = csv_handler.mfcc_custom(df8, samplerate8, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
feat_list = [mfcc_feat1, mfcc_feat2, mfcc_feat3, mfcc_feat4, mfcc_feat5, mfcc_feat6, mfcc_feat7, mfcc_feat8] feat_list = [mfcc_feat1, mfcc_feat2, mfcc_feat3, mfcc_feat4, mfcc_feat5, mfcc_feat6, mfcc_feat7, mfcc_feat8]
label_1 = 'Subject 1, session 1, left arm, emg nr. 1' label_1 = 'Subject 1, session 1, left arm, emg nr. 1'
label_2 = 'Subject 1, session 1, left arm, emg nr. 2' label_2 = 'Subject 1, session 1, left arm, emg nr. 2'
@ -229,9 +222,13 @@ def main():
csv_handler = CSV_handler() csv_handler = CSV_handler()
csv_handler.load_data('soft') csv_handler.load_data('soft')
dl_data_handler = DL_data_handler(csv_handler) dl_data_handler = DL_data_handler(csv_handler)
mfcc_3_plots_1_1_2(csv_handler)
'''
dl_data_handler.store_samples(10) dl_data_handler.store_samples(10)
dict = dl_data_handler.samples_per_subject dict = dl_data_handler.samples_per_subject
dl_data_handler.save_mfcc()
'''
main() main()

View File

@ -4,17 +4,8 @@ from scipy.fft import fft, fftfreq
import pywt import pywt
import sys import sys
import Handle_emg_data as Handler import Handle_emg_data as Handler
sys.path.insert(0, '/Users/Markus/Prosjekter git/Slovakia 2021/python_speech_features/python_speech_features')
from python_speech_features.python_speech_features import *
# Takes in a df and outputs np arrays for x and y values
def get_xory_from_df(x_or_y, df:DataFrame):
swither = {
'x': df.iloc[:,0].to_numpy(),
'y': df.iloc[:,1].to_numpy()
}
return swither.get(x_or_y, 0)
# Normalizes a ndarray of a signal to the scale of int16(32767) # Normalizes a ndarray of a signal to the scale of int16(32767)
def normalize_wave(y_values): def normalize_wave(y_values):