chore: move mfcc func and add

skeleton for saving mfcc data
2021-07-02 10:13:27 +02:00 · 2021-07-02 10:13:27 +02:00 · c36ddf1609
commit c36ddf1609
parent 76aeb10ea7
5 changed files with 109 additions and 48 deletions
--- a/Handle_emg_data.py
+++ b/Handle_emg_data.py
@ -5,8 +5,18 @@ from pathlib import Path
 import numpy as np
 from pandas.core.frame import DataFrame
 from math import floor
+import sys
+sys.path.insert(0, '/Users/Markus/Prosjekter git/Slovakia 2021/python_speech_features/python_speech_features')
+from python_speech_features.python_speech_features import *
+import json
 #from Present_data import get_data

+# Global variables for MFCC
+MFCC_STEPSIZE = 0.5     # Seconds
+MFCC_WINDOWSIZE = 2     # Seconds
+NR_COEFFICIENTS = 13    # Number of coefficients
+NR_MEL_BINS = 40     # Number of mel-filter-bins 
+
 class Data_container:
      
    def __init__(self, subject_nr:int, subject_name:str):
@ -488,6 +498,12 @@ class CSV_handler:

 class DL_data_handler:

+    JSON_PATH = "mfcc_data.json"
+    SAMPLE_RATE = None
+    TRACK_DURATION = None # measured in seconds
+    #SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION
+
+
    def __init__(self, csv_handler:CSV_handler) -> None:
        self.csv_handler = csv_handler
        # Should med 4 sessions * split nr of samples per person. Each sample is structured like [sample_df, samplerate]
@ -497,6 +513,7 @@ class DL_data_handler:
                                    4: [],
                                    5: []
                                    }
+                                    
    def get_samples_dict(self):
        return self.samples_per_subject
        
@ -568,30 +585,71 @@ class DL_data_handler:
            main_df = pd.concat([main_df, adding_df], ignore_index=True)
        samplerate = get_samplerate(main_df)
        return main_df, samplerate
+    '''
+    def save_mfcc(raw_data_dict, json_path, samples_per_subject):
+        
+        # dictionary to store mapping, labels, and MFCCs
+        data = {
+            "mapping": [],
+            "labels": [],
+            "mfcc": []
+        }
+
+        #hop_length = MFCC_STEPSIZE * sample_rate
+        #num_mfcc_vectors_per_segment = math.ceil(samples_per_subject / hop_length)
+
+        # loop through all subjects to get samples
+        for key, value in raw_data_dict.items():


-# HELP FUNCTIONS: ------------------------------------------------------------------------: 
+            # save genre label (i.e., sub-folder name) in the mapping
+            subject_label = 'Subject ' + key
+            data["mapping"].append(subject_label)
+            print("\nProcessing: {}".format(subject_label))

-# Help: gets the str from emg nr
-def get_emg_str(emg_nr):
-    return 'emg' + str(emg_nr)
+            # process all audio files in genre sub-dir
+            for sample in value:

-# Help: gets the min/max of a df
-def get_min_max_timestamp(df:DataFrame):
-    #min = int(np.floor(df['timestamp'].min()))
-    min = df['timestamp'].min()
-    max = df['timestamp'].max()
-    return min, max
+                # load audio file
+                signal, sample_rate = sample[0], sample[1]

-# Help: returns df_time_emg
-def make_df_from_xandy(x, y, emg_nr):
-    dict = {'timestamp': x, get_emg_str(emg_nr): y}
-    df = DataFrame(dict)
-    #print(df)
-    return df
+                # extract mfcc
+                mfcc = mfcc_custom(signal, sample_rate, MFCC_WINDOWSIZE, MFCC_STEPSIZE, NR_COEFFICIENTS, NR_MEL_BINS)
+                mfcc = mfcc.T
+                print(len(mfcc))

-# Help: returns the samplerate of a df
-def get_samplerate(df:DataFrame):
+                # store only mfcc feature with expected number of vectors
+                #if len(mfcc) == num_mfcc_vectors_per_segment:
+                data["mfcc"].append(mfcc.tolist())
+                data["labels"].append(key)
+                print("sample:{}".format(value.index(sample)))
+
+        # save MFCCs to json file
+        with open(json_path, "w") as fp:
+            json.dump(data, fp, indent=4)
+    '''
+    # HELP FUNCTIONS: ------------------------------------------------------------------------: 
+
+    # Help: gets the str from emg nr
+    def get_emg_str(emg_nr):
+        return 'emg' + str(emg_nr)
+
+    # Help: gets the min/max of a df
+    def get_min_max_timestamp(df:DataFrame):
+        #min = int(np.floor(df['timestamp'].min()))
+        min = df['timestamp'].min()
+        max = df['timestamp'].max()
+        return min, max
+
+    # Help: returns df_time_emg
+    def make_df_from_xandy(x, y, emg_nr):
+        dict = {'timestamp': x, get_emg_str(emg_nr): y}
+        df = DataFrame(dict)
+        #print(df)
+        return df
+
+    # Help: returns the samplerate of a df
+    def get_samplerate(df:DataFrame):
        min, max = get_min_max_timestamp(df)
        if max > 60:
            seconds = max - 60 - min
@ -599,4 +657,19 @@ def get_samplerate(df:DataFrame):
            seconds = max - min
        samples = len(df.index)
        samplerate = samples / seconds
-        return int(samplerate)
+        return int(samplerate)
+
+    # Takes in a df and outputs np arrays for x and y values
+    def get_xory_from_df(x_or_y, df:DataFrame):
+        swither = {
+            'x': df.iloc[:,0].to_numpy(),
+            'y': df.iloc[:,1].to_numpy()
+        }
+        return swither.get(x_or_y, 0)
+    
+    # Slightly modified mfcc with inputs like below.
+    # Returns N (x_values from original df) and mfcc_y_values 
+    def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters):
+        N = get_xory_from_df('x', df)
+        y = get_xory_from_df('y', df)
+        return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)
--- a/Present_data.py
+++ b/Present_data.py
@ -8,10 +8,10 @@ from matplotlib import cm
 import matplotlib.ticker as ticker

 # Global variables for MFCC
-mfcc_stepsize = 0.5     # Seconds
-mfcc_windowsize = 2     # Seconds
-nr_coefficients = 13    # Number of coefficients
-nr_mel_filters = 40     # Number of mel-filter-bins 
+MFCC_STEPSIZE = 0.5     # Seconds
+MFCC_WINDOWSIZE = 2     # Seconds
+NR_COEFFICIENTS = 13    # Number of coefficients
+NR_MEL_BINS = 40     # Number of mel-filter-bins 


 # PLOT FUNCTIONS --------------------------------------------------------------: 
@ -126,13 +126,6 @@ def denoice_dataset(handler:Handler.CSV_handler, subject_nr, which_arm, round, e
    df_new = Handler.make_df_from_xandy(N, y_values, emg_nr)
    return df_new

-# Slightly modified mfcc with inputs like below.
-# Returns N (x_values from original df) and mfcc_y_values 
-def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters):
-    N = get_xory_from_df('x', df)
-    y = get_xory_from_df('y', df)
-    return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)
-

 def test_for_NaN(dict, samples_per_person):
    for key, value in dict.items():
@ -201,14 +194,14 @@ def mfcc_all_emg_plots(csv_handler:CSV_handler):
    df6, samplerate6 = csv_handler.get_data( 1, 'left', 1, 6)
    df7, samplerate7 = csv_handler.get_data( 1, 'left', 1, 7)
    df8, samplerate8 = csv_handler.get_data( 1, 'left', 1, 8)
-    N1, mfcc_feat1 = mfcc_custom(df1, samplerate1, mfcc_windowsize, mfcc_stepsize)
-    N2, mfcc_feat2 = mfcc_custom(df2, samplerate2, mfcc_windowsize, mfcc_stepsize)
-    N3, mfcc_feat3 = mfcc_custom(df3, samplerate3, mfcc_windowsize, mfcc_stepsize)
-    N4, mfcc_feat4 = mfcc_custom(df4, samplerate4, mfcc_windowsize, mfcc_stepsize)
-    N5, mfcc_feat5 = mfcc_custom(df5, samplerate5, mfcc_windowsize, mfcc_stepsize)
-    N6, mfcc_feat6 = mfcc_custom(df6, samplerate6, mfcc_windowsize, mfcc_stepsize)
-    N7, mfcc_feat7 = mfcc_custom(df7, samplerate7, mfcc_windowsize, mfcc_stepsize)
-    N8, mfcc_feat8 = mfcc_custom(df8, samplerate8, mfcc_windowsize, mfcc_stepsize)
+    N1, mfcc_feat1 = csv_handler.mfcc_custom(df1, samplerate1, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
+    N2, mfcc_feat2 = csv_handler.mfcc_custom(df2, samplerate2, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
+    N3, mfcc_feat3 = csv_handler.mfcc_custom(df3, samplerate3, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
+    N4, mfcc_feat4 = csv_handler.mfcc_custom(df4, samplerate4, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
+    N5, mfcc_feat5 = csv_handler.mfcc_custom(df5, samplerate5, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
+    N6, mfcc_feat6 = csv_handler.mfcc_custom(df6, samplerate6, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
+    N7, mfcc_feat7 = csv_handler.mfcc_custom(df7, samplerate7, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
+    N8, mfcc_feat8 = csv_handler.mfcc_custom(df8, samplerate8, MFCC_WINDOWSIZE, MFCC_STEPSIZE)
    feat_list = [mfcc_feat1, mfcc_feat2, mfcc_feat3, mfcc_feat4, mfcc_feat5, mfcc_feat6, mfcc_feat7, mfcc_feat8]
    label_1 = 'Subject 1, session 1, left arm, emg nr. 1'
    label_2 = 'Subject 1, session 1, left arm, emg nr. 2'
@ -229,9 +222,13 @@ def main():
    csv_handler = CSV_handler()
    csv_handler.load_data('soft')
    dl_data_handler = DL_data_handler(csv_handler)
+    mfcc_3_plots_1_1_2(csv_handler)
+
+    '''
    dl_data_handler.store_samples(10)
    dict = dl_data_handler.samples_per_subject
-
+    dl_data_handler.save_mfcc()
+    '''
    
   
 main()
--- a/Signal_prep.py
+++ b/Signal_prep.py
@ -4,17 +4,8 @@ from scipy.fft import fft, fftfreq
 import pywt
 import sys
 import Handle_emg_data as Handler
-sys.path.insert(0, '/Users/Markus/Prosjekter git/Slovakia 2021/python_speech_features/python_speech_features')
-from python_speech_features.python_speech_features import *


-# Takes in a df and outputs np arrays for x and y values
-def get_xory_from_df(x_or_y, df:DataFrame):
-    swither = {
-        'x': df.iloc[:,0].to_numpy(),
-        'y': df.iloc[:,1].to_numpy()
-    }
-    return swither.get(x_or_y, 0)

 # Normalizes a ndarray of a signal to the scale of int16(32767)
 def normalize_wave(y_values):
--- a/pycache/Handle_emg_data.cpython-38.pyc
+++ b/pycache/Handle_emg_data.cpython-38.pyc
--- a/pycache/Signal_prep.cpython-38.pyc
+++ b/pycache/Signal_prep.cpython-38.pyc