vggish features

2021-09-07 14:41:05 +02:00 · 2021-09-07 14:41:05 +02:00 · f7cfe4b93b
commit f7cfe4b93b
parent f772a6ad21
1 changed files with 114 additions and 0 deletions
--- a/src/extract_vgg_features.py
+++ b/src/extract_vgg_features.py
@ -0,0 +1,114 @@
+import tensorflow.compat.v1 as tf
+tf.disable_v2_behavior()
+
+import pandas as pd
+import os
+import sys
+import numpy as np
+import librosa
+import urllib
+sys.path.append('vggish')
+import vggish_input
+import vggish_params
+import vggish_slim
+
+SR = 22050  # sample rate
+SR_VGG = 16000  # VGG pretrained model sample rate
+FRAME_LEN = int(SR / 10)  # 100 ms
+HOP = int(FRAME_LEN / 2)  # 50%overlap, 5ms
+
+def download(url, dst_dir):
+    """Download file.
+    If the file not exist then download it.
+    Args:url: Web location of the file.
+    Returns: path to downloaded file.
+    """
+    filename = url.split('/')[-1]
+    filepath = os.path.join(dst_dir, filename)
+    if not os.path.exists(filepath):
+        def _progress(count, block_size, total_size):
+            sys.stdout.write('\r>> Downloading %s %.1f%%' %
+                             (filename,
+                              float(count * block_size) / float(total_size) * 100.0))
+            sys.stdout.flush()
+
+        filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
+        statinfo = os.stat(filepath)
+        print('Successfully downloaded:', filename, statinfo.st_size, 'bytes.')
+    return filepath
+
+def sta_fun_2(npdata):  # 1D np array
+    """Extract various statistical features from the numpy array provided as input.
+
+    :param np_data: the numpy array to extract the features from
+    :type np_data: numpy.ndarray
+    :return: The extracted features as a vector
+    :rtype: numpy.ndarray
+    """
+
+    # perform a sanity check
+    if npdata is None:
+        raise ValueError("Input array cannot be None")
+
+    # perform the feature extraction
+    Mean = np.mean(npdata, axis=0)
+    Std = np.std(npdata, axis=0)
+    # finally return the features in a concatenated array (as a vector)
+    return np.concatenate((Mean, Std), axis=0).reshape(1, -1)
+    
+print("\nTesting your install of VGGish\n")
+# Paths to downloaded VGGish files.
+checkpoint_path = "vggish/vggish_model.ckpt"
+
+if not os.path.exists(checkpoint_path): #automatically download the checkpoint if not exist.
+    url = 'https://storage.googleapis.com/audioset/vggish_model.ckpt'
+    download(url, './vggish/')
+    
+if __name__ == "__main__":
+    # data path (dist/wav/)
+    path = sys.argv[1]
+
+    # train, devel or test subset
+    dataset = sys.argv[2]
+
+    with tf.Graph().as_default(), tf.Session() as sess:
+        # load pre-trained model
+        vggish_slim.define_vggish_slim()
+        vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)
+        features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
+        embedding_tensor = sess.graph.get_tensor_by_name(
+            vggish_params.OUTPUT_TENSOR_NAME
+        )
+        
+        x_data = []
+        y_label = []
+
+        # extract features for all audio samples from correct subset
+        for file in sorted([f for f in os.listdir(path) if dataset in f]):
+            try:
+                sample_path = os.path.join(path,file)                      
+                file_b = sample_path
+                y, sr = librosa.load(
+                    file_b, sr=SR, mono=True, offset=0.0, duration=None
+                )
+            except IOError:
+                print("file doesn't exit")
+                continue
+
+            yt, index = librosa.effects.trim(
+                y, frame_length=FRAME_LEN, hop_length=HOP
+            )
+            input_batch = vggish_input.waveform_to_examples(
+                yt, SR
+            )
+            [features] = sess.run(
+                [embedding_tensor], feed_dict={features_tensor: input_batch}
+            )
+            features = sta_fun_2(features)
+
+            x_data.append(features.tolist())
+        
+        # save features and labels
+        df = pd.read_csv('./dist/lab/' + dataset + '.csv', sep =',')
+        np.save(os.path.join('./features/vgg_features',"x_" + dataset + "_data_vgg.npy"), np.array(x_data))
+        np.save(os.path.join('./features/vgg_features',"y_" + dataset + "_label_vgg.npy"), df.label)