handcrafted features

2021-09-07 09:48:00 +02:00 · 2021-09-07 09:48:00 +02:00 · 71e262b340
commit 71e262b340
parent 6b3fbce6d1
1 changed files with 14 additions and 21 deletions
--- a/src/extract_handcrafted_features.py
+++ b/src/extract_handcrafted_features.py
@ -1,5 +1,4 @@
-import json
+import os
 import os
 import sys
 import warnings
 from math import pi
@ -13,7 +12,7 @@ warnings.filterwarnings("ignore")
 SR = 22050  # sample rate
 FRAME_LEN = int(SR / 10)  # 100 ms
-HOP = int(FRAME_LEN / 2)  # 50% overlap, meaning 5ms hop length
+HOP = int(FRAME_LEN / 2)  # 50% overlap, meaning 5 ms hop length (step size)
 MFCC_dim = 13  # the MFCC dimension
 def sta_fun(np_data):
@ -190,16 +189,17 @@ def extract_features(signal, signal_sr):
 if __name__ == "__main__":
-    # data path (raw_files\devel OR test OR train folder)
+    # data path (dist/wav/)
    path = sys.argv[1]
    # train, devel or test subset
    dataset = sys.argv[2]  
    x_data = []
    y_label = []
    y_uid = []
-    #extract features
+    # extract features for all audio samples from correct subset
-    files = os.listdir(path)
+    for file in sorted([f for f in os.listdir(path) if dataset in f]):
    for file in files: 
        try:
            sample_path = os.path.join(path,file)                      
            file_b = sample_path
@ -213,18 +213,11 @@ if __name__ == "__main__":
        yt, index = librosa.effects.trim(
            y, frame_length=FRAME_LEN, hop_length=HOP
        )
        duration = librosa.get_duration(y=yt, sr=sr)
        if duration < 2:
            continue 
        features = extract_features(signal=y, signal_sr=sr)
        x_data.append(features.tolist())
-    #save features in numpy.array
+    # save features and labels
-    x_data = np.array(x_data)
+    df = pd.read_csv('./dist/lab/' + dataset + '.csv', sep =',')
-    labels_path = 'labels\\' + os.path.basename(os.path.normpath(path)) + '.csv'
+    np.save(os.path.join('./features/hand_features',"x_" + dataset + "_data.npy"), np.array(x_data))
-    df = pd.read_csv(labels_path, sep =',')
+    np.save(os.path.join('./features/hand_features',"y_" + dataset + "_label.npy"), df.label)
    y_label = df.label
    np.save(os.path.join('hand_features',"x_" + os.path.basename(os.path.normpath(path)) + "_data.npy"), x_data)
    np.save(os.path.join('hand_features',"y_" + os.path.basename(os.path.normpath(path)) + "_label.npy"), y_label)