vggish features

This commit is contained in:
em474re 2021-09-07 14:41:05 +02:00
parent f772a6ad21
commit f7cfe4b93b

114
src/extract_vgg_features.py Normal file
View File

@ -0,0 +1,114 @@
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import pandas as pd
import os
import sys
import numpy as np
import librosa
import urllib
sys.path.append('vggish')
import vggish_input
import vggish_params
import vggish_slim
SR = 22050 # sample rate
SR_VGG = 16000 # VGG pretrained model sample rate
FRAME_LEN = int(SR / 10) # 100 ms
HOP = int(FRAME_LEN / 2) # 50%overlap, 5ms
def download(url, dst_dir):
"""Download file.
If the file not exist then download it.
Args:url: Web location of the file.
Returns: path to downloaded file.
"""
filename = url.split('/')[-1]
filepath = os.path.join(dst_dir, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' %
(filename,
float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
statinfo = os.stat(filepath)
print('Successfully downloaded:', filename, statinfo.st_size, 'bytes.')
return filepath
def sta_fun_2(npdata): # 1D np array
"""Extract various statistical features from the numpy array provided as input.
:param np_data: the numpy array to extract the features from
:type np_data: numpy.ndarray
:return: The extracted features as a vector
:rtype: numpy.ndarray
"""
# perform a sanity check
if npdata is None:
raise ValueError("Input array cannot be None")
# perform the feature extraction
Mean = np.mean(npdata, axis=0)
Std = np.std(npdata, axis=0)
# finally return the features in a concatenated array (as a vector)
return np.concatenate((Mean, Std), axis=0).reshape(1, -1)
print("\nTesting your install of VGGish\n")
# Paths to downloaded VGGish files.
checkpoint_path = "vggish/vggish_model.ckpt"
if not os.path.exists(checkpoint_path): #automatically download the checkpoint if not exist.
url = 'https://storage.googleapis.com/audioset/vggish_model.ckpt'
download(url, './vggish/')
if __name__ == "__main__":
# data path (dist/wav/)
path = sys.argv[1]
# train, devel or test subset
dataset = sys.argv[2]
with tf.Graph().as_default(), tf.Session() as sess:
# load pre-trained model
vggish_slim.define_vggish_slim()
vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)
features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
embedding_tensor = sess.graph.get_tensor_by_name(
vggish_params.OUTPUT_TENSOR_NAME
)
x_data = []
y_label = []
# extract features for all audio samples from correct subset
for file in sorted([f for f in os.listdir(path) if dataset in f]):
try:
sample_path = os.path.join(path,file)
file_b = sample_path
y, sr = librosa.load(
file_b, sr=SR, mono=True, offset=0.0, duration=None
)
except IOError:
print("file doesn't exit")
continue
yt, index = librosa.effects.trim(
y, frame_length=FRAME_LEN, hop_length=HOP
)
input_batch = vggish_input.waveform_to_examples(
yt, SR
)
[features] = sess.run(
[embedding_tensor], feed_dict={features_tensor: input_batch}
)
features = sta_fun_2(features)
x_data.append(features.tolist())
# save features and labels
df = pd.read_csv('./dist/lab/' + dataset + '.csv', sep =',')
np.save(os.path.join('./features/vgg_features',"x_" + dataset + "_data_vgg.npy"), np.array(x_data))
np.save(os.path.join('./features/vgg_features',"y_" + dataset + "_label_vgg.npy"), df.label)