From f7cfe4b93ba90d5f9f883440c80cb969108d0984 Mon Sep 17 00:00:00 2001 From: em474re Date: Tue, 7 Sep 2021 14:41:05 +0200 Subject: [PATCH] vggish features --- src/extract_vgg_features.py | 114 ++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 src/extract_vgg_features.py diff --git a/src/extract_vgg_features.py b/src/extract_vgg_features.py new file mode 100644 index 0000000..a055d92 --- /dev/null +++ b/src/extract_vgg_features.py @@ -0,0 +1,114 @@ +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() + +import pandas as pd +import os +import sys +import numpy as np +import librosa +import urllib +sys.path.append('vggish') +import vggish_input +import vggish_params +import vggish_slim + +SR = 22050 # sample rate +SR_VGG = 16000 # VGG pretrained model sample rate +FRAME_LEN = int(SR / 10) # 100 ms +HOP = int(FRAME_LEN / 2) # 50%overlap, 5ms + +def download(url, dst_dir): + """Download file. + If the file not exist then download it. + Args:url: Web location of the file. + Returns: path to downloaded file. + """ + filename = url.split('/')[-1] + filepath = os.path.join(dst_dir, filename) + if not os.path.exists(filepath): + def _progress(count, block_size, total_size): + sys.stdout.write('\r>> Downloading %s %.1f%%' % + (filename, + float(count * block_size) / float(total_size) * 100.0)) + sys.stdout.flush() + + filepath, _ = urllib.request.urlretrieve(url, filepath, _progress) + statinfo = os.stat(filepath) + print('Successfully downloaded:', filename, statinfo.st_size, 'bytes.') + return filepath + +def sta_fun_2(npdata): # 1D np array + """Extract various statistical features from the numpy array provided as input. + + :param np_data: the numpy array to extract the features from + :type np_data: numpy.ndarray + :return: The extracted features as a vector + :rtype: numpy.ndarray + """ + + # perform a sanity check + if npdata is None: + raise ValueError("Input array cannot be None") + + # perform the feature extraction + Mean = np.mean(npdata, axis=0) + Std = np.std(npdata, axis=0) + # finally return the features in a concatenated array (as a vector) + return np.concatenate((Mean, Std), axis=0).reshape(1, -1) + +print("\nTesting your install of VGGish\n") +# Paths to downloaded VGGish files. +checkpoint_path = "vggish/vggish_model.ckpt" + +if not os.path.exists(checkpoint_path): #automatically download the checkpoint if not exist. + url = 'https://storage.googleapis.com/audioset/vggish_model.ckpt' + download(url, './vggish/') + +if __name__ == "__main__": + # data path (dist/wav/) + path = sys.argv[1] + + # train, devel or test subset + dataset = sys.argv[2] + + with tf.Graph().as_default(), tf.Session() as sess: + # load pre-trained model + vggish_slim.define_vggish_slim() + vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path) + features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME) + embedding_tensor = sess.graph.get_tensor_by_name( + vggish_params.OUTPUT_TENSOR_NAME + ) + + x_data = [] + y_label = [] + + # extract features for all audio samples from correct subset + for file in sorted([f for f in os.listdir(path) if dataset in f]): + try: + sample_path = os.path.join(path,file) + file_b = sample_path + y, sr = librosa.load( + file_b, sr=SR, mono=True, offset=0.0, duration=None + ) + except IOError: + print("file doesn't exit") + continue + + yt, index = librosa.effects.trim( + y, frame_length=FRAME_LEN, hop_length=HOP + ) + input_batch = vggish_input.waveform_to_examples( + yt, SR + ) + [features] = sess.run( + [embedding_tensor], feed_dict={features_tensor: input_batch} + ) + features = sta_fun_2(features) + + x_data.append(features.tolist()) + + # save features and labels + df = pd.read_csv('./dist/lab/' + dataset + '.csv', sep =',') + np.save(os.path.join('./features/vgg_features',"x_" + dataset + "_data_vgg.npy"), np.array(x_data)) + np.save(os.path.join('./features/vgg_features',"y_" + dataset + "_label_vgg.npy"), df.label) \ No newline at end of file