chore: move mfcc func and add
skeleton for saving mfcc data
This commit is contained in:
parent
76aeb10ea7
commit
c36ddf1609
@ -5,8 +5,18 @@ from pathlib import Path
|
||||
import numpy as np
|
||||
from pandas.core.frame import DataFrame
|
||||
from math import floor
|
||||
import sys
|
||||
sys.path.insert(0, '/Users/Markus/Prosjekter git/Slovakia 2021/python_speech_features/python_speech_features')
|
||||
from python_speech_features.python_speech_features import *
|
||||
import json
|
||||
#from Present_data import get_data
|
||||
|
||||
# Global variables for MFCC
|
||||
MFCC_STEPSIZE = 0.5 # Seconds
|
||||
MFCC_WINDOWSIZE = 2 # Seconds
|
||||
NR_COEFFICIENTS = 13 # Number of coefficients
|
||||
NR_MEL_BINS = 40 # Number of mel-filter-bins
|
||||
|
||||
class Data_container:
|
||||
|
||||
def __init__(self, subject_nr:int, subject_name:str):
|
||||
@ -488,6 +498,12 @@ class CSV_handler:
|
||||
|
||||
class DL_data_handler:
|
||||
|
||||
JSON_PATH = "mfcc_data.json"
|
||||
SAMPLE_RATE = None
|
||||
TRACK_DURATION = None # measured in seconds
|
||||
#SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION
|
||||
|
||||
|
||||
def __init__(self, csv_handler:CSV_handler) -> None:
|
||||
self.csv_handler = csv_handler
|
||||
# Should med 4 sessions * split nr of samples per person. Each sample is structured like [sample_df, samplerate]
|
||||
@ -497,6 +513,7 @@ class DL_data_handler:
|
||||
4: [],
|
||||
5: []
|
||||
}
|
||||
|
||||
def get_samples_dict(self):
|
||||
return self.samples_per_subject
|
||||
|
||||
@ -568,30 +585,71 @@ class DL_data_handler:
|
||||
main_df = pd.concat([main_df, adding_df], ignore_index=True)
|
||||
samplerate = get_samplerate(main_df)
|
||||
return main_df, samplerate
|
||||
'''
|
||||
def save_mfcc(raw_data_dict, json_path, samples_per_subject):
|
||||
|
||||
# dictionary to store mapping, labels, and MFCCs
|
||||
data = {
|
||||
"mapping": [],
|
||||
"labels": [],
|
||||
"mfcc": []
|
||||
}
|
||||
|
||||
#hop_length = MFCC_STEPSIZE * sample_rate
|
||||
#num_mfcc_vectors_per_segment = math.ceil(samples_per_subject / hop_length)
|
||||
|
||||
# loop through all subjects to get samples
|
||||
for key, value in raw_data_dict.items():
|
||||
|
||||
|
||||
# HELP FUNCTIONS: ------------------------------------------------------------------------:
|
||||
# save genre label (i.e., sub-folder name) in the mapping
|
||||
subject_label = 'Subject ' + key
|
||||
data["mapping"].append(subject_label)
|
||||
print("\nProcessing: {}".format(subject_label))
|
||||
|
||||
# Help: gets the str from emg nr
|
||||
def get_emg_str(emg_nr):
|
||||
return 'emg' + str(emg_nr)
|
||||
# process all audio files in genre sub-dir
|
||||
for sample in value:
|
||||
|
||||
# Help: gets the min/max of a df
|
||||
def get_min_max_timestamp(df:DataFrame):
|
||||
#min = int(np.floor(df['timestamp'].min()))
|
||||
min = df['timestamp'].min()
|
||||
max = df['timestamp'].max()
|
||||
return min, max
|
||||
# load audio file
|
||||
signal, sample_rate = sample[0], sample[1]
|
||||
|
||||
# Help: returns df_time_emg
|
||||
def make_df_from_xandy(x, y, emg_nr):
|
||||
dict = {'timestamp': x, get_emg_str(emg_nr): y}
|
||||
df = DataFrame(dict)
|
||||
#print(df)
|
||||
return df
|
||||
# extract mfcc
|
||||
mfcc = mfcc_custom(signal, sample_rate, MFCC_WINDOWSIZE, MFCC_STEPSIZE, NR_COEFFICIENTS, NR_MEL_BINS)
|
||||
mfcc = mfcc.T
|
||||
print(len(mfcc))
|
||||
|
||||
# Help: returns the samplerate of a df
|
||||
def get_samplerate(df:DataFrame):
|
||||
# store only mfcc feature with expected number of vectors
|
||||
#if len(mfcc) == num_mfcc_vectors_per_segment:
|
||||
data["mfcc"].append(mfcc.tolist())
|
||||
data["labels"].append(key)
|
||||
print("sample:{}".format(value.index(sample)))
|
||||
|
||||
# save MFCCs to json file
|
||||
with open(json_path, "w") as fp:
|
||||
json.dump(data, fp, indent=4)
|
||||
'''
|
||||
# HELP FUNCTIONS: ------------------------------------------------------------------------:
|
||||
|
||||
# Help: gets the str from emg nr
|
||||
def get_emg_str(emg_nr):
|
||||
return 'emg' + str(emg_nr)
|
||||
|
||||
# Help: gets the min/max of a df
|
||||
def get_min_max_timestamp(df:DataFrame):
|
||||
#min = int(np.floor(df['timestamp'].min()))
|
||||
min = df['timestamp'].min()
|
||||
max = df['timestamp'].max()
|
||||
return min, max
|
||||
|
||||
# Help: returns df_time_emg
|
||||
def make_df_from_xandy(x, y, emg_nr):
|
||||
dict = {'timestamp': x, get_emg_str(emg_nr): y}
|
||||
df = DataFrame(dict)
|
||||
#print(df)
|
||||
return df
|
||||
|
||||
# Help: returns the samplerate of a df
|
||||
def get_samplerate(df:DataFrame):
|
||||
min, max = get_min_max_timestamp(df)
|
||||
if max > 60:
|
||||
seconds = max - 60 - min
|
||||
@ -599,4 +657,19 @@ def get_samplerate(df:DataFrame):
|
||||
seconds = max - min
|
||||
samples = len(df.index)
|
||||
samplerate = samples / seconds
|
||||
return int(samplerate)
|
||||
return int(samplerate)
|
||||
|
||||
# Takes in a df and outputs np arrays for x and y values
|
||||
def get_xory_from_df(x_or_y, df:DataFrame):
|
||||
swither = {
|
||||
'x': df.iloc[:,0].to_numpy(),
|
||||
'y': df.iloc[:,1].to_numpy()
|
||||
}
|
||||
return swither.get(x_or_y, 0)
|
||||
|
||||
# Slightly modified mfcc with inputs like below.
|
||||
# Returns N (x_values from original df) and mfcc_y_values
|
||||
def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters):
|
||||
N = get_xory_from_df('x', df)
|
||||
y = get_xory_from_df('y', df)
|
||||
return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)
|
@ -8,10 +8,10 @@ from matplotlib import cm
|
||||
import matplotlib.ticker as ticker
|
||||
|
||||
# Global variables for MFCC
|
||||
mfcc_stepsize = 0.5 # Seconds
|
||||
mfcc_windowsize = 2 # Seconds
|
||||
nr_coefficients = 13 # Number of coefficients
|
||||
nr_mel_filters = 40 # Number of mel-filter-bins
|
||||
MFCC_STEPSIZE = 0.5 # Seconds
|
||||
MFCC_WINDOWSIZE = 2 # Seconds
|
||||
NR_COEFFICIENTS = 13 # Number of coefficients
|
||||
NR_MEL_BINS = 40 # Number of mel-filter-bins
|
||||
|
||||
|
||||
# PLOT FUNCTIONS --------------------------------------------------------------:
|
||||
@ -126,13 +126,6 @@ def denoice_dataset(handler:Handler.CSV_handler, subject_nr, which_arm, round, e
|
||||
df_new = Handler.make_df_from_xandy(N, y_values, emg_nr)
|
||||
return df_new
|
||||
|
||||
# Slightly modified mfcc with inputs like below.
|
||||
# Returns N (x_values from original df) and mfcc_y_values
|
||||
def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters):
|
||||
N = get_xory_from_df('x', df)
|
||||
y = get_xory_from_df('y', df)
|
||||
return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)
|
||||
|
||||
|
||||
def test_for_NaN(dict, samples_per_person):
|
||||
for key, value in dict.items():
|
||||
@ -201,14 +194,14 @@ def mfcc_all_emg_plots(csv_handler:CSV_handler):
|
||||
df6, samplerate6 = csv_handler.get_data( 1, 'left', 1, 6)
|
||||
df7, samplerate7 = csv_handler.get_data( 1, 'left', 1, 7)
|
||||
df8, samplerate8 = csv_handler.get_data( 1, 'left', 1, 8)
|
||||
N1, mfcc_feat1 = mfcc_custom(df1, samplerate1, mfcc_windowsize, mfcc_stepsize)
|
||||
N2, mfcc_feat2 = mfcc_custom(df2, samplerate2, mfcc_windowsize, mfcc_stepsize)
|
||||
N3, mfcc_feat3 = mfcc_custom(df3, samplerate3, mfcc_windowsize, mfcc_stepsize)
|
||||
N4, mfcc_feat4 = mfcc_custom(df4, samplerate4, mfcc_windowsize, mfcc_stepsize)
|
||||
N5, mfcc_feat5 = mfcc_custom(df5, samplerate5, mfcc_windowsize, mfcc_stepsize)
|
||||
N6, mfcc_feat6 = mfcc_custom(df6, samplerate6, mfcc_windowsize, mfcc_stepsize)
|
||||
N7, mfcc_feat7 = mfcc_custom(df7, samplerate7, mfcc_windowsize, mfcc_stepsize)
|
||||
N8, mfcc_feat8 = mfcc_custom(df8, samplerate8, mfcc_windowsize, mfcc_stepsize)
|
||||
N1, mfcc_feat1 = csv_handler.mfcc_custom(df1, samplerate1, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||
N2, mfcc_feat2 = csv_handler.mfcc_custom(df2, samplerate2, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||
N3, mfcc_feat3 = csv_handler.mfcc_custom(df3, samplerate3, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||
N4, mfcc_feat4 = csv_handler.mfcc_custom(df4, samplerate4, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||
N5, mfcc_feat5 = csv_handler.mfcc_custom(df5, samplerate5, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||
N6, mfcc_feat6 = csv_handler.mfcc_custom(df6, samplerate6, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||
N7, mfcc_feat7 = csv_handler.mfcc_custom(df7, samplerate7, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||
N8, mfcc_feat8 = csv_handler.mfcc_custom(df8, samplerate8, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||
feat_list = [mfcc_feat1, mfcc_feat2, mfcc_feat3, mfcc_feat4, mfcc_feat5, mfcc_feat6, mfcc_feat7, mfcc_feat8]
|
||||
label_1 = 'Subject 1, session 1, left arm, emg nr. 1'
|
||||
label_2 = 'Subject 1, session 1, left arm, emg nr. 2'
|
||||
@ -229,9 +222,13 @@ def main():
|
||||
csv_handler = CSV_handler()
|
||||
csv_handler.load_data('soft')
|
||||
dl_data_handler = DL_data_handler(csv_handler)
|
||||
mfcc_3_plots_1_1_2(csv_handler)
|
||||
|
||||
'''
|
||||
dl_data_handler.store_samples(10)
|
||||
dict = dl_data_handler.samples_per_subject
|
||||
|
||||
dl_data_handler.save_mfcc()
|
||||
'''
|
||||
|
||||
|
||||
main()
|
@ -4,17 +4,8 @@ from scipy.fft import fft, fftfreq
|
||||
import pywt
|
||||
import sys
|
||||
import Handle_emg_data as Handler
|
||||
sys.path.insert(0, '/Users/Markus/Prosjekter git/Slovakia 2021/python_speech_features/python_speech_features')
|
||||
from python_speech_features.python_speech_features import *
|
||||
|
||||
|
||||
# Takes in a df and outputs np arrays for x and y values
|
||||
def get_xory_from_df(x_or_y, df:DataFrame):
|
||||
swither = {
|
||||
'x': df.iloc[:,0].to_numpy(),
|
||||
'y': df.iloc[:,1].to_numpy()
|
||||
}
|
||||
return swither.get(x_or_y, 0)
|
||||
|
||||
# Normalizes a ndarray of a signal to the scale of int16(32767)
|
||||
def normalize_wave(y_values):
|
||||
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user