chore: move mfcc func and add

skeleton for saving mfcc data
This commit is contained in:
Skudalen 2021-07-02 10:13:27 +02:00
parent 76aeb10ea7
commit c36ddf1609
5 changed files with 109 additions and 48 deletions

View File

@ -5,8 +5,18 @@ from pathlib import Path
import numpy as np
from pandas.core.frame import DataFrame
from math import floor
import sys
sys.path.insert(0, '/Users/Markus/Prosjekter git/Slovakia 2021/python_speech_features/python_speech_features')
from python_speech_features.python_speech_features import *
import json
#from Present_data import get_data
# Global variables for MFCC
MFCC_STEPSIZE = 0.5 # Seconds
MFCC_WINDOWSIZE = 2 # Seconds
NR_COEFFICIENTS = 13 # Number of coefficients
NR_MEL_BINS = 40 # Number of mel-filter-bins
class Data_container:
def __init__(self, subject_nr:int, subject_name:str):
@ -488,6 +498,12 @@ class CSV_handler:
class DL_data_handler:
JSON_PATH = "mfcc_data.json"
SAMPLE_RATE = None
TRACK_DURATION = None # measured in seconds
#SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION
def __init__(self, csv_handler:CSV_handler) -> None:
self.csv_handler = csv_handler
# Should med 4 sessions * split nr of samples per person. Each sample is structured like [sample_df, samplerate]
@ -497,6 +513,7 @@ class DL_data_handler:
4: [],
5: []
}
def get_samples_dict(self):
return self.samples_per_subject
@ -568,30 +585,71 @@ class DL_data_handler:
main_df = pd.concat([main_df, adding_df], ignore_index=True)
samplerate = get_samplerate(main_df)
return main_df, samplerate
'''
def save_mfcc(raw_data_dict, json_path, samples_per_subject):
# dictionary to store mapping, labels, and MFCCs
data = {
"mapping": [],
"labels": [],
"mfcc": []
}
#hop_length = MFCC_STEPSIZE * sample_rate
#num_mfcc_vectors_per_segment = math.ceil(samples_per_subject / hop_length)
# loop through all subjects to get samples
for key, value in raw_data_dict.items():
# HELP FUNCTIONS: ------------------------------------------------------------------------:
# save genre label (i.e., sub-folder name) in the mapping
subject_label = 'Subject ' + key
data["mapping"].append(subject_label)
print("\nProcessing: {}".format(subject_label))
# Help: gets the str from emg nr
def get_emg_str(emg_nr):
# process all audio files in genre sub-dir
for sample in value:
# load audio file
signal, sample_rate = sample[0], sample[1]
# extract mfcc
mfcc = mfcc_custom(signal, sample_rate, MFCC_WINDOWSIZE, MFCC_STEPSIZE, NR_COEFFICIENTS, NR_MEL_BINS)
mfcc = mfcc.T
print(len(mfcc))
# store only mfcc feature with expected number of vectors
#if len(mfcc) == num_mfcc_vectors_per_segment:
data["mfcc"].append(mfcc.tolist())
data["labels"].append(key)
print("sample:{}".format(value.index(sample)))
# save MFCCs to json file
with open(json_path, "w") as fp:
json.dump(data, fp, indent=4)
'''
# HELP FUNCTIONS: ------------------------------------------------------------------------:
# Help: gets the str from emg nr
def get_emg_str(emg_nr):
return 'emg' + str(emg_nr)
# Help: gets the min/max of a df
def get_min_max_timestamp(df:DataFrame):
# Help: gets the min/max of a df
def get_min_max_timestamp(df:DataFrame):
#min = int(np.floor(df['timestamp'].min()))
min = df['timestamp'].min()
max = df['timestamp'].max()
return min, max
# Help: returns df_time_emg
def make_df_from_xandy(x, y, emg_nr):
# Help: returns df_time_emg
def make_df_from_xandy(x, y, emg_nr):
dict = {'timestamp': x, get_emg_str(emg_nr): y}
df = DataFrame(dict)
#print(df)
return df
# Help: returns the samplerate of a df
def get_samplerate(df:DataFrame):
# Help: returns the samplerate of a df
def get_samplerate(df:DataFrame):
min, max = get_min_max_timestamp(df)
if max > 60:
seconds = max - 60 - min
@ -600,3 +658,18 @@ def get_samplerate(df:DataFrame):
samples = len(df.index)
samplerate = samples / seconds
return int(samplerate)
# Takes in a df and outputs np arrays for x and y values
def get_xory_from_df(x_or_y, df:DataFrame):
swither = {
'x': df.iloc[:,0].to_numpy(),
'y': df.iloc[:,1].to_numpy()
}
return swither.get(x_or_y, 0)
# Slightly modified mfcc with inputs like below.
# Returns N (x_values from original df) and mfcc_y_values
def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters):
N = get_xory_from_df('x', df)
y = get_xory_from_df('y', df)
return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)

View File

@ -8,10 +8,10 @@ from matplotlib import cm
import matplotlib.ticker as ticker
# Global variables for MFCC
mfcc_stepsize = 0.5 # Seconds
mfcc_windowsize = 2 # Seconds
nr_coefficients = 13 # Number of coefficients
nr_mel_filters = 40 # Number of mel-filter-bins
MFCC_STEPSIZE = 0.5 # Seconds
MFCC_WINDOWSIZE = 2 # Seconds
NR_COEFFICIENTS = 13 # Number of coefficients
NR_MEL_BINS = 40 # Number of mel-filter-bins
# PLOT FUNCTIONS --------------------------------------------------------------:
@ -126,13 +126,6 @@ def denoice_dataset(handler:Handler.CSV_handler, subject_nr, which_arm, round, e
df_new = Handler.make_df_from_xandy(N, y_values, emg_nr)
return df_new
# Slightly modified mfcc with inputs like below.
# Returns N (x_values from original df) and mfcc_y_values
def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters):
N = get_xory_from_df('x', df)
y = get_xory_from_df('y', df)
return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)
def test_for_NaN(dict, samples_per_person):
for key, value in dict.items():
@ -201,14 +194,14 @@ def mfcc_all_emg_plots(csv_handler:CSV_handler):
df6, samplerate6 = csv_handler.get_data( 1, 'left', 1, 6)
df7, samplerate7 = csv_handler.get_data( 1, 'left', 1, 7)
df8, samplerate8 = csv_handler.get_data( 1, 'left', 1, 8)
N1, mfcc_feat1 = mfcc_custom(df1, samplerate1, mfcc_windowsize, mfcc_stepsize)
N2, mfcc_feat2 = mfcc_custom(df2, samplerate2, mfcc_windowsize, mfcc_stepsize)
N3, mfcc_feat3 = mfcc_custom(df3, samplerate3, mfcc_windowsize, mfcc_stepsize)
N4, mfcc_feat4 = mfcc_custom(df4, samplerate4, mfcc_windowsize, mfcc_stepsize)
N5, mfcc_feat5 = mfcc_custom(df5, samplerate5, mfcc_windowsize, mfcc_stepsize)
N6, mfcc_feat6 = mfcc_custom(df6, samplerate6, mfcc_windowsize, mfcc_stepsize)
N7, mfcc_feat7 = mfcc_custom(df7, samplerate7, mfcc_windowsize, mfcc_stepsize)
N8, mfcc_feat8 = mfcc_custom(df8, samplerate8, mfcc_windowsize, mfcc_stepsize)
N1, mfcc_feat1 = csv_handler.mfcc_custom(df1, samplerate1, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N2, mfcc_feat2 = csv_handler.mfcc_custom(df2, samplerate2, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N3, mfcc_feat3 = csv_handler.mfcc_custom(df3, samplerate3, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N4, mfcc_feat4 = csv_handler.mfcc_custom(df4, samplerate4, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N5, mfcc_feat5 = csv_handler.mfcc_custom(df5, samplerate5, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N6, mfcc_feat6 = csv_handler.mfcc_custom(df6, samplerate6, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N7, mfcc_feat7 = csv_handler.mfcc_custom(df7, samplerate7, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
N8, mfcc_feat8 = csv_handler.mfcc_custom(df8, samplerate8, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
feat_list = [mfcc_feat1, mfcc_feat2, mfcc_feat3, mfcc_feat4, mfcc_feat5, mfcc_feat6, mfcc_feat7, mfcc_feat8]
label_1 = 'Subject 1, session 1, left arm, emg nr. 1'
label_2 = 'Subject 1, session 1, left arm, emg nr. 2'
@ -229,9 +222,13 @@ def main():
csv_handler = CSV_handler()
csv_handler.load_data('soft')
dl_data_handler = DL_data_handler(csv_handler)
mfcc_3_plots_1_1_2(csv_handler)
'''
dl_data_handler.store_samples(10)
dict = dl_data_handler.samples_per_subject
dl_data_handler.save_mfcc()
'''
main()

View File

@ -4,17 +4,8 @@ from scipy.fft import fft, fftfreq
import pywt
import sys
import Handle_emg_data as Handler
sys.path.insert(0, '/Users/Markus/Prosjekter git/Slovakia 2021/python_speech_features/python_speech_features')
from python_speech_features.python_speech_features import *
# Takes in a df and outputs np arrays for x and y values
def get_xory_from_df(x_or_y, df:DataFrame):
swither = {
'x': df.iloc[:,0].to_numpy(),
'y': df.iloc[:,1].to_numpy()
}
return swither.get(x_or_y, 0)
# Normalizes a ndarray of a signal to the scale of int16(32767)
def normalize_wave(y_values):