chore: move mfcc func and add
skeleton for saving mfcc data
This commit is contained in:
parent
76aeb10ea7
commit
c36ddf1609
@ -5,8 +5,18 @@ from pathlib import Path
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from pandas.core.frame import DataFrame
|
from pandas.core.frame import DataFrame
|
||||||
from math import floor
|
from math import floor
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/Users/Markus/Prosjekter git/Slovakia 2021/python_speech_features/python_speech_features')
|
||||||
|
from python_speech_features.python_speech_features import *
|
||||||
|
import json
|
||||||
#from Present_data import get_data
|
#from Present_data import get_data
|
||||||
|
|
||||||
|
# Global variables for MFCC
|
||||||
|
MFCC_STEPSIZE = 0.5 # Seconds
|
||||||
|
MFCC_WINDOWSIZE = 2 # Seconds
|
||||||
|
NR_COEFFICIENTS = 13 # Number of coefficients
|
||||||
|
NR_MEL_BINS = 40 # Number of mel-filter-bins
|
||||||
|
|
||||||
class Data_container:
|
class Data_container:
|
||||||
|
|
||||||
def __init__(self, subject_nr:int, subject_name:str):
|
def __init__(self, subject_nr:int, subject_name:str):
|
||||||
@ -488,6 +498,12 @@ class CSV_handler:
|
|||||||
|
|
||||||
class DL_data_handler:
|
class DL_data_handler:
|
||||||
|
|
||||||
|
JSON_PATH = "mfcc_data.json"
|
||||||
|
SAMPLE_RATE = None
|
||||||
|
TRACK_DURATION = None # measured in seconds
|
||||||
|
#SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, csv_handler:CSV_handler) -> None:
|
def __init__(self, csv_handler:CSV_handler) -> None:
|
||||||
self.csv_handler = csv_handler
|
self.csv_handler = csv_handler
|
||||||
# Should med 4 sessions * split nr of samples per person. Each sample is structured like [sample_df, samplerate]
|
# Should med 4 sessions * split nr of samples per person. Each sample is structured like [sample_df, samplerate]
|
||||||
@ -497,6 +513,7 @@ class DL_data_handler:
|
|||||||
4: [],
|
4: [],
|
||||||
5: []
|
5: []
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_samples_dict(self):
|
def get_samples_dict(self):
|
||||||
return self.samples_per_subject
|
return self.samples_per_subject
|
||||||
|
|
||||||
@ -568,8 +585,49 @@ class DL_data_handler:
|
|||||||
main_df = pd.concat([main_df, adding_df], ignore_index=True)
|
main_df = pd.concat([main_df, adding_df], ignore_index=True)
|
||||||
samplerate = get_samplerate(main_df)
|
samplerate = get_samplerate(main_df)
|
||||||
return main_df, samplerate
|
return main_df, samplerate
|
||||||
|
'''
|
||||||
|
def save_mfcc(raw_data_dict, json_path, samples_per_subject):
|
||||||
|
|
||||||
|
# dictionary to store mapping, labels, and MFCCs
|
||||||
|
data = {
|
||||||
|
"mapping": [],
|
||||||
|
"labels": [],
|
||||||
|
"mfcc": []
|
||||||
|
}
|
||||||
|
|
||||||
|
#hop_length = MFCC_STEPSIZE * sample_rate
|
||||||
|
#num_mfcc_vectors_per_segment = math.ceil(samples_per_subject / hop_length)
|
||||||
|
|
||||||
|
# loop through all subjects to get samples
|
||||||
|
for key, value in raw_data_dict.items():
|
||||||
|
|
||||||
|
|
||||||
|
# save genre label (i.e., sub-folder name) in the mapping
|
||||||
|
subject_label = 'Subject ' + key
|
||||||
|
data["mapping"].append(subject_label)
|
||||||
|
print("\nProcessing: {}".format(subject_label))
|
||||||
|
|
||||||
|
# process all audio files in genre sub-dir
|
||||||
|
for sample in value:
|
||||||
|
|
||||||
|
# load audio file
|
||||||
|
signal, sample_rate = sample[0], sample[1]
|
||||||
|
|
||||||
|
# extract mfcc
|
||||||
|
mfcc = mfcc_custom(signal, sample_rate, MFCC_WINDOWSIZE, MFCC_STEPSIZE, NR_COEFFICIENTS, NR_MEL_BINS)
|
||||||
|
mfcc = mfcc.T
|
||||||
|
print(len(mfcc))
|
||||||
|
|
||||||
|
# store only mfcc feature with expected number of vectors
|
||||||
|
#if len(mfcc) == num_mfcc_vectors_per_segment:
|
||||||
|
data["mfcc"].append(mfcc.tolist())
|
||||||
|
data["labels"].append(key)
|
||||||
|
print("sample:{}".format(value.index(sample)))
|
||||||
|
|
||||||
|
# save MFCCs to json file
|
||||||
|
with open(json_path, "w") as fp:
|
||||||
|
json.dump(data, fp, indent=4)
|
||||||
|
'''
|
||||||
# HELP FUNCTIONS: ------------------------------------------------------------------------:
|
# HELP FUNCTIONS: ------------------------------------------------------------------------:
|
||||||
|
|
||||||
# Help: gets the str from emg nr
|
# Help: gets the str from emg nr
|
||||||
@ -600,3 +658,18 @@ def get_samplerate(df:DataFrame):
|
|||||||
samples = len(df.index)
|
samples = len(df.index)
|
||||||
samplerate = samples / seconds
|
samplerate = samples / seconds
|
||||||
return int(samplerate)
|
return int(samplerate)
|
||||||
|
|
||||||
|
# Takes in a df and outputs np arrays for x and y values
|
||||||
|
def get_xory_from_df(x_or_y, df:DataFrame):
|
||||||
|
swither = {
|
||||||
|
'x': df.iloc[:,0].to_numpy(),
|
||||||
|
'y': df.iloc[:,1].to_numpy()
|
||||||
|
}
|
||||||
|
return swither.get(x_or_y, 0)
|
||||||
|
|
||||||
|
# Slightly modified mfcc with inputs like below.
|
||||||
|
# Returns N (x_values from original df) and mfcc_y_values
|
||||||
|
def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters):
|
||||||
|
N = get_xory_from_df('x', df)
|
||||||
|
y = get_xory_from_df('y', df)
|
||||||
|
return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)
|
@ -8,10 +8,10 @@ from matplotlib import cm
|
|||||||
import matplotlib.ticker as ticker
|
import matplotlib.ticker as ticker
|
||||||
|
|
||||||
# Global variables for MFCC
|
# Global variables for MFCC
|
||||||
mfcc_stepsize = 0.5 # Seconds
|
MFCC_STEPSIZE = 0.5 # Seconds
|
||||||
mfcc_windowsize = 2 # Seconds
|
MFCC_WINDOWSIZE = 2 # Seconds
|
||||||
nr_coefficients = 13 # Number of coefficients
|
NR_COEFFICIENTS = 13 # Number of coefficients
|
||||||
nr_mel_filters = 40 # Number of mel-filter-bins
|
NR_MEL_BINS = 40 # Number of mel-filter-bins
|
||||||
|
|
||||||
|
|
||||||
# PLOT FUNCTIONS --------------------------------------------------------------:
|
# PLOT FUNCTIONS --------------------------------------------------------------:
|
||||||
@ -126,13 +126,6 @@ def denoice_dataset(handler:Handler.CSV_handler, subject_nr, which_arm, round, e
|
|||||||
df_new = Handler.make_df_from_xandy(N, y_values, emg_nr)
|
df_new = Handler.make_df_from_xandy(N, y_values, emg_nr)
|
||||||
return df_new
|
return df_new
|
||||||
|
|
||||||
# Slightly modified mfcc with inputs like below.
|
|
||||||
# Returns N (x_values from original df) and mfcc_y_values
|
|
||||||
def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters):
|
|
||||||
N = get_xory_from_df('x', df)
|
|
||||||
y = get_xory_from_df('y', df)
|
|
||||||
return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)
|
|
||||||
|
|
||||||
|
|
||||||
def test_for_NaN(dict, samples_per_person):
|
def test_for_NaN(dict, samples_per_person):
|
||||||
for key, value in dict.items():
|
for key, value in dict.items():
|
||||||
@ -201,14 +194,14 @@ def mfcc_all_emg_plots(csv_handler:CSV_handler):
|
|||||||
df6, samplerate6 = csv_handler.get_data( 1, 'left', 1, 6)
|
df6, samplerate6 = csv_handler.get_data( 1, 'left', 1, 6)
|
||||||
df7, samplerate7 = csv_handler.get_data( 1, 'left', 1, 7)
|
df7, samplerate7 = csv_handler.get_data( 1, 'left', 1, 7)
|
||||||
df8, samplerate8 = csv_handler.get_data( 1, 'left', 1, 8)
|
df8, samplerate8 = csv_handler.get_data( 1, 'left', 1, 8)
|
||||||
N1, mfcc_feat1 = mfcc_custom(df1, samplerate1, mfcc_windowsize, mfcc_stepsize)
|
N1, mfcc_feat1 = csv_handler.mfcc_custom(df1, samplerate1, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||||
N2, mfcc_feat2 = mfcc_custom(df2, samplerate2, mfcc_windowsize, mfcc_stepsize)
|
N2, mfcc_feat2 = csv_handler.mfcc_custom(df2, samplerate2, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||||
N3, mfcc_feat3 = mfcc_custom(df3, samplerate3, mfcc_windowsize, mfcc_stepsize)
|
N3, mfcc_feat3 = csv_handler.mfcc_custom(df3, samplerate3, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||||
N4, mfcc_feat4 = mfcc_custom(df4, samplerate4, mfcc_windowsize, mfcc_stepsize)
|
N4, mfcc_feat4 = csv_handler.mfcc_custom(df4, samplerate4, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||||
N5, mfcc_feat5 = mfcc_custom(df5, samplerate5, mfcc_windowsize, mfcc_stepsize)
|
N5, mfcc_feat5 = csv_handler.mfcc_custom(df5, samplerate5, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||||
N6, mfcc_feat6 = mfcc_custom(df6, samplerate6, mfcc_windowsize, mfcc_stepsize)
|
N6, mfcc_feat6 = csv_handler.mfcc_custom(df6, samplerate6, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||||
N7, mfcc_feat7 = mfcc_custom(df7, samplerate7, mfcc_windowsize, mfcc_stepsize)
|
N7, mfcc_feat7 = csv_handler.mfcc_custom(df7, samplerate7, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||||
N8, mfcc_feat8 = mfcc_custom(df8, samplerate8, mfcc_windowsize, mfcc_stepsize)
|
N8, mfcc_feat8 = csv_handler.mfcc_custom(df8, samplerate8, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
|
||||||
feat_list = [mfcc_feat1, mfcc_feat2, mfcc_feat3, mfcc_feat4, mfcc_feat5, mfcc_feat6, mfcc_feat7, mfcc_feat8]
|
feat_list = [mfcc_feat1, mfcc_feat2, mfcc_feat3, mfcc_feat4, mfcc_feat5, mfcc_feat6, mfcc_feat7, mfcc_feat8]
|
||||||
label_1 = 'Subject 1, session 1, left arm, emg nr. 1'
|
label_1 = 'Subject 1, session 1, left arm, emg nr. 1'
|
||||||
label_2 = 'Subject 1, session 1, left arm, emg nr. 2'
|
label_2 = 'Subject 1, session 1, left arm, emg nr. 2'
|
||||||
@ -229,9 +222,13 @@ def main():
|
|||||||
csv_handler = CSV_handler()
|
csv_handler = CSV_handler()
|
||||||
csv_handler.load_data('soft')
|
csv_handler.load_data('soft')
|
||||||
dl_data_handler = DL_data_handler(csv_handler)
|
dl_data_handler = DL_data_handler(csv_handler)
|
||||||
|
mfcc_3_plots_1_1_2(csv_handler)
|
||||||
|
|
||||||
|
'''
|
||||||
dl_data_handler.store_samples(10)
|
dl_data_handler.store_samples(10)
|
||||||
dict = dl_data_handler.samples_per_subject
|
dict = dl_data_handler.samples_per_subject
|
||||||
|
dl_data_handler.save_mfcc()
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
main()
|
main()
|
@ -4,17 +4,8 @@ from scipy.fft import fft, fftfreq
|
|||||||
import pywt
|
import pywt
|
||||||
import sys
|
import sys
|
||||||
import Handle_emg_data as Handler
|
import Handle_emg_data as Handler
|
||||||
sys.path.insert(0, '/Users/Markus/Prosjekter git/Slovakia 2021/python_speech_features/python_speech_features')
|
|
||||||
from python_speech_features.python_speech_features import *
|
|
||||||
|
|
||||||
|
|
||||||
# Takes in a df and outputs np arrays for x and y values
|
|
||||||
def get_xory_from_df(x_or_y, df:DataFrame):
|
|
||||||
swither = {
|
|
||||||
'x': df.iloc[:,0].to_numpy(),
|
|
||||||
'y': df.iloc[:,1].to_numpy()
|
|
||||||
}
|
|
||||||
return swither.get(x_or_y, 0)
|
|
||||||
|
|
||||||
# Normalizes a ndarray of a signal to the scale of int16(32767)
|
# Normalizes a ndarray of a signal to the scale of int16(32767)
|
||||||
def normalize_wave(y_values):
|
def normalize_wave(y_values):
|
||||||
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user