diff --git a/extract_handcrafted_features.py b/src/extract_handcrafted_features.py similarity index 90% rename from extract_handcrafted_features.py rename to src/extract_handcrafted_features.py index dff29a2..ccc20ed 100644 --- a/extract_handcrafted_features.py +++ b/src/extract_handcrafted_features.py @@ -1,5 +1,4 @@ -import json -import os +import os import sys import warnings from math import pi @@ -13,7 +12,7 @@ warnings.filterwarnings("ignore") SR = 22050 # sample rate FRAME_LEN = int(SR / 10) # 100 ms -HOP = int(FRAME_LEN / 2) # 50% overlap, meaning 5ms hop length +HOP = int(FRAME_LEN / 2) # 50% overlap, meaning 5 ms hop length (step size) MFCC_dim = 13 # the MFCC dimension def sta_fun(np_data): @@ -190,16 +189,17 @@ def extract_features(signal, signal_sr): if __name__ == "__main__": - # data path (raw_files\devel OR test OR train folder) - path = sys.argv[1] + # data path (dist/wav/) + path = sys.argv[1] + + # train, devel or test subset + dataset = sys.argv[2] x_data = [] y_label = [] - y_uid = [] - #extract features - files = os.listdir(path) - for file in files: + # extract features for all audio samples from correct subset + for file in sorted([f for f in os.listdir(path) if dataset in f]): try: sample_path = os.path.join(path,file) file_b = sample_path @@ -213,18 +213,11 @@ if __name__ == "__main__": yt, index = librosa.effects.trim( y, frame_length=FRAME_LEN, hop_length=HOP ) - duration = librosa.get_duration(y=yt, sr=sr) - if duration < 2: - continue features = extract_features(signal=y, signal_sr=sr) - + x_data.append(features.tolist()) - #save features in numpy.array - x_data = np.array(x_data) - labels_path = 'labels\\' + os.path.basename(os.path.normpath(path)) + '.csv' - df = pd.read_csv(labels_path, sep =',') - y_label = df.label - - np.save(os.path.join('hand_features',"x_" + os.path.basename(os.path.normpath(path)) + "_data.npy"), x_data) - np.save(os.path.join('hand_features',"y_" + os.path.basename(os.path.normpath(path)) + "_label.npy"), y_label) \ No newline at end of file + # save features and labels + df = pd.read_csv('./dist/lab/' + dataset + '.csv', sep =',') + np.save(os.path.join('./features/hand_features',"x_" + dataset + "_data.npy"), np.array(x_data)) + np.save(os.path.join('./features/hand_features',"y_" + dataset + "_label.npy"), df.label) \ No newline at end of file