fix: fix the dl_data_handler so it does
not create NaN values in the samples
This commit is contained in:
parent
500018b6aa
commit
d0978aa2b3
@ -1,7 +1,10 @@
|
|||||||
|
from numpy.core.arrayprint import IntegerFormat
|
||||||
|
from numpy.lib import math
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from pandas.core.frame import DataFrame
|
from pandas.core.frame import DataFrame
|
||||||
|
from math import floor
|
||||||
#from Present_data import get_data
|
#from Present_data import get_data
|
||||||
|
|
||||||
class Data_container:
|
class Data_container:
|
||||||
@ -43,6 +46,9 @@ class CSV_handler:
|
|||||||
def store_df_in_container(self, filename:str, emg_nr:int, which_arm:str, data_container:Data_container, round:int):
|
def store_df_in_container(self, filename:str, emg_nr:int, which_arm:str, data_container:Data_container, round:int):
|
||||||
df = self.get_time_emg_table(filename, emg_nr+1)
|
df = self.get_time_emg_table(filename, emg_nr+1)
|
||||||
|
|
||||||
|
if df.isnull().values.any():
|
||||||
|
print('NaN in: subject', data_container.subject_nr, 'arm:', which_arm, 'session:', round, 'emg nr:', emg_nr)
|
||||||
|
|
||||||
# Places the data correctly:
|
# Places the data correctly:
|
||||||
if round == 1:
|
if round == 1:
|
||||||
if which_arm == 'left':
|
if which_arm == 'left':
|
||||||
@ -484,7 +490,8 @@ class DL_data_handler:
|
|||||||
|
|
||||||
def __init__(self, csv_handler:CSV_handler) -> None:
|
def __init__(self, csv_handler:CSV_handler) -> None:
|
||||||
self.csv_handler = csv_handler
|
self.csv_handler = csv_handler
|
||||||
self.samples_per_subject = {1: [], # Should med 4 sessions * split nr of samples per person
|
# Should med 4 sessions * split nr of samples per person. Each sample is structured like [sample_df, samplerate]
|
||||||
|
self.samples_per_subject = {1: [],
|
||||||
2: [],
|
2: [],
|
||||||
3: [],
|
3: [],
|
||||||
4: [],
|
4: [],
|
||||||
@ -503,21 +510,41 @@ class DL_data_handler:
|
|||||||
for emg_nr in range(8):
|
for emg_nr in range(8):
|
||||||
df, _ = self.csv_handler.get_data(subject_nr, 'right', session_nr, emg_nr+1)
|
df, _ = self.csv_handler.get_data(subject_nr, 'right', session_nr, emg_nr+1)
|
||||||
list_of_emgs.append(DataFrame(df[get_emg_str(emg_nr+1)]))
|
list_of_emgs.append(DataFrame(df[get_emg_str(emg_nr+1)]))
|
||||||
|
|
||||||
return list_of_emgs # list of emg data where first element also has timestamp column
|
return list_of_emgs # list of emg data where first element also has timestamp column
|
||||||
|
|
||||||
def make_subj_sample(self, list_of_emgs):
|
def make_subj_sample(self, list_of_emgs_):
|
||||||
# starting_point:DataFrame = list_of_emgs[0].rename(columns={'emg1':'emg'})
|
# Test and fix if the emgs have different size
|
||||||
|
list_of_emgs = []
|
||||||
|
length_left_emgs = int(len(list_of_emgs_[0].index))
|
||||||
|
length_right_emgs = int(len(list_of_emgs_[-1].index))
|
||||||
|
if length_left_emgs < length_right_emgs:
|
||||||
|
for i in range(16):
|
||||||
|
new_emg_df = list_of_emgs_[i].head(length_left_emgs)
|
||||||
|
list_of_emgs.append(new_emg_df)
|
||||||
|
elif length_right_emgs < length_left_emgs:
|
||||||
|
for i in range(16):
|
||||||
|
new_emg_df = list_of_emgs_[i].head(length_right_emgs)
|
||||||
|
list_of_emgs.append(new_emg_df)
|
||||||
|
else:
|
||||||
|
list_of_emgs = list_of_emgs_
|
||||||
|
|
||||||
tot_session_df_list = []
|
tot_session_df_list = []
|
||||||
for i in range(8):
|
for i in range(8):
|
||||||
#emg_str = get_emg_str(i)
|
df = list_of_emgs[i]
|
||||||
df = list_of_emgs[i] # .rename(columns={emg_str: 'emg'})
|
|
||||||
tot_session_df_list.append(df)
|
tot_session_df_list.append(df)
|
||||||
for i in range(1, 9):
|
for i in range(1, 9):
|
||||||
emg_str_old = get_emg_str(i)
|
emg_str_old = get_emg_str(i)
|
||||||
emg_str_new = get_emg_str(8+i)
|
emg_str_new = get_emg_str(8+i)
|
||||||
df:DataFrame = list_of_emgs[7+i].rename(columns={emg_str_old: emg_str_new})
|
df = list_of_emgs[7+i].rename(columns={emg_str_old: emg_str_new})
|
||||||
tot_session_df_list.append(df)
|
tot_session_df_list.append(df)
|
||||||
tot_session_df = pd.concat(tot_session_df_list, axis=1)
|
tot_session_df = pd.concat(tot_session_df_list, axis=1, ignore_index=True)
|
||||||
|
|
||||||
|
# TESTING FOR NAN
|
||||||
|
if tot_session_df.isnull().values.any():
|
||||||
|
print('NaN in: where? THERE')
|
||||||
|
print(length_left_emgs, length_right_emgs)
|
||||||
|
#print(tot_session_df_list)
|
||||||
|
|
||||||
return tot_session_df
|
return tot_session_df
|
||||||
|
|
||||||
@ -527,10 +554,25 @@ class DL_data_handler:
|
|||||||
for session_nr in range(4):
|
for session_nr in range(4):
|
||||||
list_of_emg = self.get_emg_list(subject_nr+1, session_nr+1)
|
list_of_emg = self.get_emg_list(subject_nr+1, session_nr+1)
|
||||||
tot_session_df = self.make_subj_sample(list_of_emg)
|
tot_session_df = self.make_subj_sample(list_of_emg)
|
||||||
|
|
||||||
|
# TESTING FOR NAN
|
||||||
|
if tot_session_df.isnull().values.any():
|
||||||
|
print('NaN in: subject', subject_nr+1, 'session:', session_nr+1, 'where? AFTER MAKE')
|
||||||
|
|
||||||
samples = np.array_split(tot_session_df.to_numpy(), split_nr)
|
samples = np.array_split(tot_session_df.to_numpy(), split_nr)
|
||||||
for array in samples:
|
for array in samples:
|
||||||
df = DataFrame(array).rename(columns={0:'timestamp'})
|
df = DataFrame(array).rename(columns={0:'timestamp'})
|
||||||
|
'''
|
||||||
|
# TESTING FOR NAN
|
||||||
|
if df.isnull().values.any():
|
||||||
|
print('NaN in: subject', subject_nr+1, 'session:', session_nr+1, 'where? AFTER SPLIT')
|
||||||
|
'''
|
||||||
df_finished, samplerate = self.reshape_session_df_to_signal(df)
|
df_finished, samplerate = self.reshape_session_df_to_signal(df)
|
||||||
|
'''
|
||||||
|
# TESTING FOR NAN
|
||||||
|
if df_finished.isnull().values.any():
|
||||||
|
print('NaN in: subject', subject_nr+1, 'session:', session_nr+1, 'where? AFTER RESHAPE')
|
||||||
|
'''
|
||||||
subj_samples.append([df_finished, samplerate])
|
subj_samples.append([df_finished, samplerate])
|
||||||
|
|
||||||
self.samples_per_subject[subject_nr+1] = subj_samples
|
self.samples_per_subject[subject_nr+1] = subj_samples
|
||||||
@ -567,10 +609,10 @@ def make_df_from_xandy(x, y, emg_nr):
|
|||||||
# Help: returns the samplerate of a df
|
# Help: returns the samplerate of a df
|
||||||
def get_samplerate(df:DataFrame):
|
def get_samplerate(df:DataFrame):
|
||||||
min, max = get_min_max_timestamp(df)
|
min, max = get_min_max_timestamp(df)
|
||||||
#print(min, max)
|
if max > 60:
|
||||||
seconds = max - 60 - min
|
seconds = max - 60 - min
|
||||||
#print(seconds)
|
else:
|
||||||
samples = len(df['timestamp'])
|
seconds = max - min
|
||||||
#print(samples)
|
samples = len(df.index)
|
||||||
samplerate = samples / seconds
|
samplerate = samples / seconds
|
||||||
return samplerate
|
return samplerate
|
@ -100,13 +100,13 @@ def plot_all_emg_mfcc(data_list:list, label_list:list):
|
|||||||
|
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
def pretty(dict, indent=0):
|
def pretty(dict):
|
||||||
for key, value in dict.items():
|
for key, value in dict.items():
|
||||||
print('\t' * indent + str(key))
|
print('Subject', key, 'samples:')
|
||||||
if isinstance(value, dict):
|
print('\t\t Number av samples:', len(value))
|
||||||
pretty(value, indent+1)
|
print('\t\t EX sample nr 1:')
|
||||||
else:
|
print('\t\t\t Type:', type(value[0][0]), type(value[0][1]))
|
||||||
print('\t' * (indent+1) + str(value))
|
print('\t\t\t Sample:', value[0][0], value[0][1])
|
||||||
|
|
||||||
# DATA FUNCTIONS: --------------------------------------------------------------:
|
# DATA FUNCTIONS: --------------------------------------------------------------:
|
||||||
|
|
||||||
@ -134,6 +134,13 @@ def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients,
|
|||||||
return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)
|
return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters)
|
||||||
|
|
||||||
|
|
||||||
|
def test_for_NaN(dict, samples_per_person):
|
||||||
|
for key, value in dict.items():
|
||||||
|
for i in range(samples_per_person):
|
||||||
|
df = value[i][0]
|
||||||
|
#print(df)
|
||||||
|
print(df.isnull())
|
||||||
|
|
||||||
# CASE FUNTIONS ----------------------------------------------------------------:
|
# CASE FUNTIONS ----------------------------------------------------------------:
|
||||||
|
|
||||||
# Takes in a df and compares the FFT and the wavelet denoising of the FFT
|
# Takes in a df and compares the FFT and the wavelet denoising of the FFT
|
||||||
@ -225,10 +232,8 @@ def main():
|
|||||||
dl_data_handler.store_samples(10)
|
dl_data_handler.store_samples(10)
|
||||||
dict = dl_data_handler.samples_per_subject
|
dict = dl_data_handler.samples_per_subject
|
||||||
|
|
||||||
test_session_df = dict.get(1)[0]
|
|
||||||
print(test_session_df)
|
|
||||||
df, _ = dl_data_handler.reshape_session_df_to_signal(test_session_df)
|
|
||||||
print(df[:50])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user