From d0978aa2b3012e3cfff583d1e158d60757422f31 Mon Sep 17 00:00:00 2001 From: Skudalen Date: Thu, 1 Jul 2021 17:56:20 +0200 Subject: [PATCH] fix: fix the dl_data_handler so it does not create NaN values in the samples --- Handle_emg_data.py | 66 +++++++++++++++++---- Present_data.py | 27 +++++---- __pycache__/Handle_emg_data.cpython-38.pyc | Bin 22756 -> 23518 bytes 3 files changed, 70 insertions(+), 23 deletions(-) diff --git a/Handle_emg_data.py b/Handle_emg_data.py index d07356d..6dccc9c 100644 --- a/Handle_emg_data.py +++ b/Handle_emg_data.py @@ -1,7 +1,10 @@ +from numpy.core.arrayprint import IntegerFormat +from numpy.lib import math import pandas as pd from pathlib import Path import numpy as np from pandas.core.frame import DataFrame +from math import floor #from Present_data import get_data class Data_container: @@ -43,6 +46,9 @@ class CSV_handler: def store_df_in_container(self, filename:str, emg_nr:int, which_arm:str, data_container:Data_container, round:int): df = self.get_time_emg_table(filename, emg_nr+1) + if df.isnull().values.any(): + print('NaN in: subject', data_container.subject_nr, 'arm:', which_arm, 'session:', round, 'emg nr:', emg_nr) + # Places the data correctly: if round == 1: if which_arm == 'left': @@ -484,7 +490,8 @@ class DL_data_handler: def __init__(self, csv_handler:CSV_handler) -> None: self.csv_handler = csv_handler - self.samples_per_subject = {1: [], # Should med 4 sessions * split nr of samples per person + # Should med 4 sessions * split nr of samples per person. Each sample is structured like [sample_df, samplerate] + self.samples_per_subject = {1: [], 2: [], 3: [], 4: [], @@ -503,21 +510,41 @@ class DL_data_handler: for emg_nr in range(8): df, _ = self.csv_handler.get_data(subject_nr, 'right', session_nr, emg_nr+1) list_of_emgs.append(DataFrame(df[get_emg_str(emg_nr+1)])) + return list_of_emgs # list of emg data where first element also has timestamp column - def make_subj_sample(self, list_of_emgs): - # starting_point:DataFrame = list_of_emgs[0].rename(columns={'emg1':'emg'}) + def make_subj_sample(self, list_of_emgs_): + # Test and fix if the emgs have different size + list_of_emgs = [] + length_left_emgs = int(len(list_of_emgs_[0].index)) + length_right_emgs = int(len(list_of_emgs_[-1].index)) + if length_left_emgs < length_right_emgs: + for i in range(16): + new_emg_df = list_of_emgs_[i].head(length_left_emgs) + list_of_emgs.append(new_emg_df) + elif length_right_emgs < length_left_emgs: + for i in range(16): + new_emg_df = list_of_emgs_[i].head(length_right_emgs) + list_of_emgs.append(new_emg_df) + else: + list_of_emgs = list_of_emgs_ + tot_session_df_list = [] for i in range(8): - #emg_str = get_emg_str(i) - df = list_of_emgs[i] # .rename(columns={emg_str: 'emg'}) + df = list_of_emgs[i] tot_session_df_list.append(df) for i in range(1, 9): emg_str_old = get_emg_str(i) emg_str_new = get_emg_str(8+i) - df:DataFrame = list_of_emgs[7+i].rename(columns={emg_str_old: emg_str_new}) + df = list_of_emgs[7+i].rename(columns={emg_str_old: emg_str_new}) tot_session_df_list.append(df) - tot_session_df = pd.concat(tot_session_df_list, axis=1) + tot_session_df = pd.concat(tot_session_df_list, axis=1, ignore_index=True) + + # TESTING FOR NAN + if tot_session_df.isnull().values.any(): + print('NaN in: where? THERE') + print(length_left_emgs, length_right_emgs) + #print(tot_session_df_list) return tot_session_df @@ -527,10 +554,25 @@ class DL_data_handler: for session_nr in range(4): list_of_emg = self.get_emg_list(subject_nr+1, session_nr+1) tot_session_df = self.make_subj_sample(list_of_emg) + + # TESTING FOR NAN + if tot_session_df.isnull().values.any(): + print('NaN in: subject', subject_nr+1, 'session:', session_nr+1, 'where? AFTER MAKE') + samples = np.array_split(tot_session_df.to_numpy(), split_nr) for array in samples: df = DataFrame(array).rename(columns={0:'timestamp'}) + ''' + # TESTING FOR NAN + if df.isnull().values.any(): + print('NaN in: subject', subject_nr+1, 'session:', session_nr+1, 'where? AFTER SPLIT') + ''' df_finished, samplerate = self.reshape_session_df_to_signal(df) + ''' + # TESTING FOR NAN + if df_finished.isnull().values.any(): + print('NaN in: subject', subject_nr+1, 'session:', session_nr+1, 'where? AFTER RESHAPE') + ''' subj_samples.append([df_finished, samplerate]) self.samples_per_subject[subject_nr+1] = subj_samples @@ -567,10 +609,10 @@ def make_df_from_xandy(x, y, emg_nr): # Help: returns the samplerate of a df def get_samplerate(df:DataFrame): min, max = get_min_max_timestamp(df) - #print(min, max) - seconds = max - 60 - min - #print(seconds) - samples = len(df['timestamp']) - #print(samples) + if max > 60: + seconds = max - 60 - min + else: + seconds = max - min + samples = len(df.index) samplerate = samples / seconds return samplerate \ No newline at end of file diff --git a/Present_data.py b/Present_data.py index 5f23465..a9f619a 100644 --- a/Present_data.py +++ b/Present_data.py @@ -100,13 +100,13 @@ def plot_all_emg_mfcc(data_list:list, label_list:list): plt.show() -def pretty(dict, indent=0): - for key, value in dict.items(): - print('\t' * indent + str(key)) - if isinstance(value, dict): - pretty(value, indent+1) - else: - print('\t' * (indent+1) + str(value)) +def pretty(dict): + for key, value in dict.items(): + print('Subject', key, 'samples:') + print('\t\t Number av samples:', len(value)) + print('\t\t EX sample nr 1:') + print('\t\t\t Type:', type(value[0][0]), type(value[0][1])) + print('\t\t\t Sample:', value[0][0], value[0][1]) # DATA FUNCTIONS: --------------------------------------------------------------: @@ -134,6 +134,13 @@ def mfcc_custom(df:DataFrame, samplesize, windowsize, stepsize, nr_coefficients, return N, base.mfcc(y, samplesize, windowsize, stepsize, nr_coefficients, nr_mel_filters) +def test_for_NaN(dict, samples_per_person): + for key, value in dict.items(): + for i in range(samples_per_person): + df = value[i][0] + #print(df) + print(df.isnull()) + # CASE FUNTIONS ----------------------------------------------------------------: # Takes in a df and compares the FFT and the wavelet denoising of the FFT @@ -224,11 +231,9 @@ def main(): dl_data_handler = DL_data_handler(csv_handler) dl_data_handler.store_samples(10) dict = dl_data_handler.samples_per_subject + - test_session_df = dict.get(1)[0] - print(test_session_df) - df, _ = dl_data_handler.reshape_session_df_to_signal(test_session_df) - print(df[:50]) + diff --git a/__pycache__/Handle_emg_data.cpython-38.pyc b/__pycache__/Handle_emg_data.cpython-38.pyc index 033f0e4324f6b8593ae743def950dc1dde53d9fe..22a0c4345bef1c9d6ceebe01d81f322a81d27fb7 100644 GIT binary patch delta 4522 zcmb_gU2Ggz6`ni)`#1iLe-bxw8zs&rPSVyP1lQ!J#A$w)P5hH~8h1VMjCaT0o!#6S zCytrb3LF6s1W~S3712sILZCcB8>vW0NPPibc!9@}-~mCYRZ)0AAo>8Db7$>18~Oyh z+B5f_d+xdC-22^g&g>uFA;0?xF{Uz^qyV4%AAdXh-gjO%1{yzro-NpuT zy(IJu=%ws5^wQ94vop4FZ&5JYos`}77O~syj(f7%VRzbH_XM-E$?djx0JqDQCWLIy z2HKt_QRaddI7Nq^tI~=U07HgHDTj?bG~k&yYX#OhYE>NQDD&lNmCn81(f`h$(nQrk z2~Dtnl0qhKLjQq4yO<+hCTaGr=pNjGB3+M@ZtQA6a0h)6h%Qo%J$t2_y&`>}?!;l4 zjmszcJF)p_HlK(Zcd<9*!t?;NvNGL`LO3ZBYqqIV$Fd7^{%sh+>5)F~mK_i5k6Sfx zD-!Mb7PXO9u@;?zJ^wVjr0geq8C9I#{hOMzD%VB#B4dbstQ_3?xK&YS(FqEHTX70b zrC128n`LK+y`YY$%_)I;ype#}Ux1%45%HU9792x<=N81#wMU7wGXo zw#86hP;4obY&leHB@|b6B0!5QigX^hDmT@+Uw(3G#n$%74J+6D{%yE`Jd$^zE?nn7p%w@t&D!6`;PCsRc~}fgIyi;=qOF$ z38dNgwEc|~JqVphb$xHCT#nS+R(Z+sBgyjaMoNvkUO+i2GdRUS(uSlRCnYa;b8)6fefLbL+M#W`EUplPeF~5`W2zq2efg9$fMltt%FF7U^>>tg26+N0t#w zRc~7_I&fF%2pjDk8pFHQ+$|JROBS{B`3EY$c#%Ra53c>^P-qm%AQH6j!D-t0@a^6Y z<hx!ZoJcqvxW=smq8hwjR4R^A(7nFy~PyI6U8ahQ^ zK!U!|F(l{${XCKpBqx!aLUI~Os~3KCKFH_ki#T=`$u?Wq+lO|FKjzCmoErXS=EncL zoQzI=%3`NR2TA(j?C5jG|6v_;>r>YG^Z7wiVzZ+^Vn4fJ=g}p=5FS({LCB#U$Y>&% z9~go+(wY~C;-Q&|xvCdfuIJDvu*_~<+(~}UzH#w~rSl+4&mg&oql7jR%4$t5H&0f`dh%X3aGfLodsV{8xN*tT=wQXC_Qi(j$tUaE~W??n#B3=EyV zRjHL7Y6Z>{-q0(kaEP6}JkZ~wR>!HRHDdnd1C1UO>_yUtgy){E7*6|$Bh{+a9M7hhwy1)f z6e`9X=LcJ%Q)j31yU$^qj7WiFuEmKcy*aDw`oTZY8}v;eGU*{nk|sOJKuksU{rsT@ z+JUR_Y&i_#MWHSRLO?=dlYYpOt7Mhbr6xz*;>e*C%8LShIgo5=m7Eo32I>lAS*2fC zgRyl2c~)FgLUkPqv|A^Fl`^96cf0TJcvdZ>YF zovVHm#uZy#6~Tnjx)P|N0tP1n6_a_d0PQ5Cehu4TycQrygpGg0X9E*n97y?* zV--S8AP-2S%~i`w70;i{ijizByMB~*i(a)soq~%Aa;1ASG2baU)HyXcd4Bxbc-G)| zDUzVBiWDx$0TRh2sJ-Z`xWEk{kxF^tMW5jvh(dE#K#$?@XOJ96@>w9+G>;r+2GLQd z&QBx2`~rOq#>{qDr5Kb7WoMoP&o?`o9O@QJF)Jdjne?1HarT)vyMii&l(=|+ny!F) z=|1+Gt4C%YzgQNtU6hxVH!dC}i8t`pCMH%?%W)FFN@N1Xafs>Tj zscT0$crg@n@V*a?0Nx5XR^H{<0od9AU_i#V>oSyqSOiMFLMS7JQcwXNAc|v8#d2^# zUQ?hboaN}@vQQ3kX+W3)hp(;aP(CKsp`ctB8+9$zK>4vc)LNmwN)}T1A_4G90K5{Q zJRK(1$qlid1UyM=nJ^iqiXw+?aH|h+BrF)QNl;PhyiQBz;+)LG77t}d!7utOkd=-m zG|rrx9KSYr<;=*XaSlG3MG+fFwk?hyv&g6-X~h5=p4gMZ3r`n;L<7v68fe5o!U(sC z58bbo-Jsx6UQn6oZO7)*84xs6O#~qjsE4|=IJ5YXVo_?{;BXagL38pDf&yPL1UPIw{%$yIDECA>_HNZsJK2X5d&xeQp8Q&a`^`7$0BXh^ z;30McbM<7F&>}n`o1&}n#^C*+Wh7T|sQI#i@zjLZ64wI=(+c(ymTkLU5&QqeYU+c` zS$1q{yfqVIs*2O$E6|E$ejjF$$&ceUw)pm=hHZP%gOO{MU2IS2l9AE( z8c*?&IgB=&A850ToMhs3_X*qq@EEra9z*chrkyLsK#|Aog&%A$4;=Ik%T13T#*Ku) zBvo7wh_wub5AF&55|R)Jw=Xu5y+6IPV-gjqFd~R)I3-$VA5EV;wgP>~v$zuXGOI*n zIU;wtJ&}yr@yWty=6Y}A)h%@d4qBvN1`Ut7ic6v0{HmXZMgZ@1t9Z8{P2juTqD-H` zXCGY6KA|poIwT9;f?US)7=!@GLAwOF_6kI9F}E=Wdcf7X&;WZ&(4j7J)E4Kl$b*n- z`!qnA2_)P01G|W(o9Kf94TP>v3=!fKJY%GFn>)OvO6~5EIRN_+enH|FCv?heB;vuc zdy`7YP-~WFTYenRHGGrCAsL1ug^SV}igmmUXf?p9oTaXccV;zOxp3pbZr vwdKXhrbCF6NFS?I?WMAF5~1b~0>Kl;0F1x1WV9I#*s+E%VH9;4YB23TT1~`B delta 3758 zcmb_fU2Ggz6`p(NXJ_s8dYwPpiQ^h4ZMsSAgtUY-2}vk%lQfC5AxV?05;hah*jamb zc5`QpoHDD0Iu%HzQq$W9g!S=YF=Y`8xGn&1Ds>$mo|> zcvUazEEzM3CXMM8tC}jNn08Wg4cEM#~ble&Zs}HJWF!QEP&ehcf4QTH=khx@}cQy~er@WN79u6iY}I#CLp zTNQ7o{EGFw)mrj>>p&BZ_#AzMM+A{Jr$sE{F6-7F@P-wcE}P=wc%-|We)mOIWK1%b z_i7P8qTTs*tFI@0{~IK4kX*ebe^br#b@JR@lD{39P1mFabFBq$s|B>#nho^j1qW#Z zi{yXNlK-KSH`d7;%kp+;62I8%#7!cJJ-W!qNMQrg1dCjgN9}>@me_?Ma9gwkGJp<( zxb3v>dNU(J)I>hD+TW0P7LW(*1`Gn$UBPC#H@!Kthh~DE1RBe|lRnAv@>Kf3km3Xv zii&+Sm)DaGltwkWN}2p1-Oo-g{4D*Kj$Kd}XHGel>0rt^=Ll~~JhyJB7KkDFyY5YF zSpKbh*AXyx#qQX;?FhF}n70(IV^cW(geUev=#zl$032!lxaeHC+Ve-_m^#mjtuRIm z09Kumy7A^pjW`Ycr9wgR&ZR&cU1)CqQnrbMi~RtcS3D0G0=xhi27C%|0PtzR`hy3x z@vu-3FJkU6;ECHqYW4QtCduGkO}?>xeqnGZ>bU+tAH;x#Pde8SMEln|WAB`O50qYj!JaM&!@WZrQ9d z;Id8P1h(8M|8cgjcb#$@xe&U4C5O*F+uR7jO@Pe+mG+l2tJs1ul@koK_5oC2D7#=% zVy3ci1l^N>Gk}$SRZG=@9-4s801cbl)>hlfrMC2CQ}%WBxnZa5tiUX)m9$5H)Tbz_ z!bC-g3J$~tdGMTE9No5CnSPE&R&BXlb^ZVk;@&kzR2)8kdcSfT6X&s@|M69E4h{;O zYQ5qGCAT~l{#nkS-^)hjZ_fYnf=Zb<(V_}Sbu86F8l^&{jsf#y%c<8r-xY_jg^rPS zC`JJa!sQ%l%6}I6Uq-@=SxFIaoe^-!-%Nk}# z06Vn%l#L82{LNjU>R+kqsFo)Uu$sktiIeYk}ov!sCqI)M(g2V(io4-2jsy}pEDwB&U`=*3wlzJ zd&t{^4Lh_VdzMY6a0BF3gS?uK^f^09KVZk`az^j;M0%u6rW4il+OSm-H%Env46;HK z&j79w_6eLBs)BHar~s;fl~Z=f+O@4+d!Wdo8I5P?hJ-J_ zdF^O(iu#miv6HeePw_Pqv%Bg%v4QOPL;`GRz1K_QqE;=(;8-EQnt}Y7gb?O%ojc> ziS>%-6Cu``kD5;z#SMf@GLKOKV<1xVMI0JQK%)$SkIfQ>br!8US>*&tdP>#lpRG#y zmLPd8HvQ>p{jPARUF=H4hR2Aw!{hijETM|3r;1o$K;mJXSNTQlL03ctcJ)O;w|%5q fbEhlbQE(>MN`RC_Up?EIZEE-wvL|eVDr)LKG+ynM