fix: fix the dl_data_handler so it does

not create NaN values in the samples
This commit is contained in:
Skudalen 2021-07-01 17:56:20 +02:00
parent 500018b6aa
commit d0978aa2b3
3 changed files with 70 additions and 23 deletions

View File

@ -1,7 +1,10 @@
from numpy.core.arrayprint import IntegerFormat
from numpy.lib import math
import pandas as pd import pandas as pd
from pathlib import Path from pathlib import Path
import numpy as np import numpy as np
from pandas.core.frame import DataFrame from pandas.core.frame import DataFrame
from math import floor
#from Present_data import get_data #from Present_data import get_data
class Data_container: class Data_container:
@ -43,6 +46,9 @@ class CSV_handler:
def store_df_in_container(self, filename:str, emg_nr:int, which_arm:str, data_container:Data_container, round:int): def store_df_in_container(self, filename:str, emg_nr:int, which_arm:str, data_container:Data_container, round:int):
df = self.get_time_emg_table(filename, emg_nr+1) df = self.get_time_emg_table(filename, emg_nr+1)
if df.isnull().values.any():
print('NaN in: subject', data_container.subject_nr, 'arm:', which_arm, 'session:', round, 'emg nr:', emg_nr)
# Places the data correctly: # Places the data correctly:
if round == 1: if round == 1:
if which_arm == 'left': if which_arm == 'left':
@ -484,7 +490,8 @@ class DL_data_handler:
def __init__(self, csv_handler:CSV_handler) -> None: def __init__(self, csv_handler:CSV_handler) -> None:
self.csv_handler = csv_handler self.csv_handler = csv_handler
self.samples_per_subject = {1: [], # Should med 4 sessions * split nr of samples per person # Should med 4 sessions * split nr of samples per person. Each sample is structured like [sample_df, samplerate]
self.samples_per_subject = {1: [],
2: [], 2: [],
3: [], 3: [],
4: [], 4: [],
@ -503,21 +510,41 @@ class DL_data_handler:
for emg_nr in range(8): for emg_nr in range(8):
df, _ = self.csv_handler.get_data(subject_nr, 'right', session_nr, emg_nr+1) df, _ = self.csv_handler.get_data(subject_nr, 'right', session_nr, emg_nr+1)
list_of_emgs.append(DataFrame(df[get_emg_str(emg_nr+1)])) list_of_emgs.append(DataFrame(df[get_emg_str(emg_nr+1)]))
return list_of_emgs # list of emg data where first element also has timestamp column return list_of_emgs # list of emg data where first element also has timestamp column
def make_subj_sample(self, list_of_emgs): def make_subj_sample(self, list_of_emgs_):
# starting_point:DataFrame = list_of_emgs[0].rename(columns={'emg1':'emg'}) # Test and fix if the emgs have different size
list_of_emgs = []
length_left_emgs = int(len(list_of_emgs_[0].index))
length_right_emgs = int(len(list_of_emgs_[-1].index))
if length_left_emgs < length_right_emgs:
for i in range(16):
new_emg_df = list_of_emgs_[i].head(length_left_emgs)
list_of_emgs.append(new_emg_df)
elif length_right_emgs < length_left_emgs:
for i in range(16):
new_emg_df = list_of_emgs_[i].head(length_right_emgs)
list_of_emgs.append(new_emg_df)
else:
list_of_emgs = list_of_emgs_
tot_session_df_list = [] tot_session_df_list = []
for i in range(8): for i in range(8):
#emg_str = get_emg_str(i) df = list_of_emgs[i]
df = list_of_emgs[i] # .rename(columns={emg_str: 'emg'})
tot_session_df_list.append(df) tot_session_df_list.append(df)
for i in range(1, 9): for i in range(1, 9):
emg_str_old = get_emg_str(i) emg_str_old = get_emg_str(i)
emg_str_new = get_emg_str(8+i) emg_str_new = get_emg_str(8+i)
df:DataFrame = list_of_emgs[7+i].rename(columns={emg_str_old: emg_str_new}) df = list_of_emgs[7+i].rename(columns={emg_str_old: emg_str_new})
tot_session_df_list.append(df) tot_session_df_list.append(df)
tot_session_df = pd.concat(tot_session_df_list, axis=1) tot_session_df = pd.concat(tot_session_df_list, axis=1, ignore_index=True)
# TESTING FOR NAN
if tot_session_df.isnull().values.any():
print('NaN in: where? THERE')
print(length_left_emgs, length_right_emgs)
#print(tot_session_df_list)
return tot_session_df return tot_session_df
@ -527,10 +554,25 @@ class DL_data_handler:
for session_nr in range(4): for session_nr in range(4):
list_of_emg = self.get_emg_list(subject_nr+1, session_nr+1) list_of_emg = self.get_emg_list(subject_nr+1, session_nr+1)
tot_session_df = self.make_subj_sample(list_of_emg) tot_session_df = self.make_subj_sample(list_of_emg)
# TESTING FOR NAN
if tot_session_df.isnull().values.any():
print('NaN in: subject', subject_nr+1, 'session:', session_nr+1, 'where? AFTER MAKE')
samples = np.array_split(tot_session_df.to_numpy(), split_nr) samples = np.array_split(tot_session_df.to_numpy(), split_nr)
for array in samples: for array in samples:
df = DataFrame(array).rename(columns={0:'timestamp'}) df = DataFrame(array).rename(columns={0:'timestamp'})
'''
# TESTING FOR NAN
if df.isnull().values.any():
print('NaN in: subject', subject_nr+1, 'session:', session_nr+1, 'where? AFTER SPLIT')
'''
df_finished, samplerate = self.reshape_session_df_to_signal(df) df_finished, samplerate = self.reshape_session_df_to_signal(df)
'''
# TESTING FOR NAN
if df_finished.isnull().values.any():
print('NaN in: subject', subject_nr+1, 'session:', session_nr+1, 'where? AFTER RESHAPE')
'''
subj_samples.append([df_finished, samplerate]) subj_samples.append([df_finished, samplerate])
self.samples_per_subject[subject_nr+1] = subj_samples self.samples_per_subject[subject_nr+1] = subj_samples
@ -567,10 +609,10 @@ def make_df_from_xandy(x, y, emg_nr):
# Help: returns the samplerate of a df # Help: returns the samplerate of a df
def get_samplerate(df:DataFrame): def get_samplerate(df:DataFrame):
min, max = get_min_max_timestamp(df) min, max = get_min_max_timestamp(df)
#print(min, max) if max > 60:
seconds = max - 60 - min seconds = max - 60 - min
#print(seconds) else:
samples = len(df['timestamp']) seconds = max - min
#print(samples) samples = len(df.index)
samplerate = samples / seconds samplerate = samples / seconds
return samplerate return samplerate

View File

@ -100,13 +100,13 @@ def plot_all_emg_mfcc(data_list:list, label_list:list):
plt.show() plt.show()
def pretty(dict, indent=0): def pretty(dict):
for key, value in dict.items(): for key, value in dict.items():
print('\t' * indent + str(key)) print('Subject', key, 'samples:')
if isinstance(value, dict): print('\t\t Number av samples:', len(value))
pretty(value, indent+1) print('\t\t EX sample nr 1:')
else: print('\t\t\t Type:', type(value[0][0]), type(value[0][1]))
print('\t' * (indent+1) + str(value)) print('\t\t\t Sample:', value[0][0], value[0][1])
# DATA FUNCTIONS: --------------------------------------------------------------: # DATA FUNCTIONS: --------------------------------------------------------------:
@ -134,6 +134,13 @@ def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients,
return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters) return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)
def test_for_NaN(dict, samples_per_person):
for key, value in dict.items():
for i in range(samples_per_person):
df = value[i][0]
#print(df)
print(df.isnull())
# CASE FUNTIONS ----------------------------------------------------------------: # CASE FUNTIONS ----------------------------------------------------------------:
# Takes in a df and compares the FFT and the wavelet denoising of the FFT # Takes in a df and compares the FFT and the wavelet denoising of the FFT
@ -225,10 +232,8 @@ def main():
dl_data_handler.store_samples(10) dl_data_handler.store_samples(10)
dict = dl_data_handler.samples_per_subject dict = dl_data_handler.samples_per_subject
test_session_df = dict.get(1)[0]
print(test_session_df)
df, _ = dl_data_handler.reshape_session_df_to_signal(test_session_df)
print(df[:50])