import tensorflow.compat.v1 as tf tf.disable_v2_behavior() import pandas as pd import os import sys import numpy as np import librosa import urllib sys.path.append('vggish') import vggish_input import vggish_params import vggish_slim SR = 22050 # sample rate SR_VGG = 16000 # VGG pretrained model sample rate FRAME_LEN = int(SR / 10) # 100 ms HOP = int(FRAME_LEN / 2) # 50%overlap, 5ms def download(url, dst_dir): """Download file. If the file not exist then download it. Args:url: Web location of the file. Returns: path to downloaded file. """ filename = url.split('/')[-1] filepath = os.path.join(dst_dir, filename) if not os.path.exists(filepath): def _progress(count, block_size, total_size): sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename, float(count * block_size) / float(total_size) * 100.0)) sys.stdout.flush() filepath, _ = urllib.request.urlretrieve(url, filepath, _progress) statinfo = os.stat(filepath) print('Successfully downloaded:', filename, statinfo.st_size, 'bytes.') return filepath def sta_fun_2(npdata): # 1D np array """Extract various statistical features from the numpy array provided as input. :param np_data: the numpy array to extract the features from :type np_data: numpy.ndarray :return: The extracted features as a vector :rtype: numpy.ndarray """ # perform a sanity check if npdata is None: raise ValueError("Input array cannot be None") # perform the feature extraction Mean = np.mean(npdata, axis=0) Std = np.std(npdata, axis=0) # finally return the features in a concatenated array (as a vector) return np.concatenate((Mean, Std), axis=0).reshape(1, -1) print("\nTesting your install of VGGish\n") # Paths to downloaded VGGish files. checkpoint_path = "vggish/vggish_model.ckpt" if not os.path.exists(checkpoint_path): #automatically download the checkpoint if not exist. url = 'https://storage.googleapis.com/audioset/vggish_model.ckpt' download(url, './vggish/') if __name__ == "__main__": # data path (dist/wav/) path = sys.argv[1] # train, devel or test subset dataset = sys.argv[2] with tf.Graph().as_default(), tf.Session() as sess: # load pre-trained model vggish_slim.define_vggish_slim() vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path) features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME) embedding_tensor = sess.graph.get_tensor_by_name( vggish_params.OUTPUT_TENSOR_NAME ) x_data = [] y_label = [] # extract features for all audio samples from correct subset for file in sorted([f for f in os.listdir(path) if dataset in f]): try: sample_path = os.path.join(path,file) file_b = sample_path y, sr = librosa.load( file_b, sr=SR, mono=True, offset=0.0, duration=None ) except IOError: print("file doesn't exit") continue yt, index = librosa.effects.trim( y, frame_length=FRAME_LEN, hop_length=HOP ) input_batch = vggish_input.waveform_to_examples( yt, SR ) [features] = sess.run( [embedding_tensor], feed_dict={features_tensor: input_batch} ) features = sta_fun_2(features) x_data.append(features.tolist()) # save features and labels df = pd.read_csv('./dist/lab/' + dataset + '.csv', sep =',') np.save(os.path.join('./features/vgg_features',"x_" + dataset + "_data_vgg.npy"), np.array(x_data)) np.save(os.path.join('./features/vgg_features',"y_" + dataset + "_label_vgg.npy"), df.label)