remove old files
This commit is contained in:
parent
02feb535b3
commit
fa942d7a64
@ -1,130 +0,0 @@
|
|||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import tensorflow.compat.v1 as tf
|
|
||||||
tf.disable_v2_behavior()
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import os
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
import librosa
|
|
||||||
|
|
||||||
import urllib
|
|
||||||
sys.path.append('vggish')
|
|
||||||
import vggish_input
|
|
||||||
import vggish_params
|
|
||||||
import vggish_slim
|
|
||||||
|
|
||||||
SR = 22050 # sample rate
|
|
||||||
SR_VGG = 16000 # VGG pretrained model sample rate
|
|
||||||
FRAME_LEN = int(SR / 10) # 100 ms
|
|
||||||
HOP = int(FRAME_LEN / 2) # 50%overlap, 5ms
|
|
||||||
|
|
||||||
|
|
||||||
def download(url, dst_dir):
|
|
||||||
"""Download file.
|
|
||||||
If the file not exist then download it.
|
|
||||||
Args:url: Web location of the file.
|
|
||||||
Returns: path to downloaded file.
|
|
||||||
"""
|
|
||||||
filename = url.split('/')[-1]
|
|
||||||
filepath = os.path.join(dst_dir, filename)
|
|
||||||
if not os.path.exists(filepath):
|
|
||||||
def _progress(count, block_size, total_size):
|
|
||||||
sys.stdout.write('\r>> Downloading %s %.1f%%' %
|
|
||||||
(filename,
|
|
||||||
float(count * block_size) / float(total_size) * 100.0))
|
|
||||||
sys.stdout.flush()
|
|
||||||
|
|
||||||
filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
|
|
||||||
statinfo = os.stat(filepath)
|
|
||||||
print('Successfully downloaded:', filename, statinfo.st_size, 'bytes.')
|
|
||||||
return filepath
|
|
||||||
|
|
||||||
def sta_fun_2(npdata): # 1D np array
|
|
||||||
"""Extract various statistical features from the numpy array provided as input.
|
|
||||||
|
|
||||||
:param np_data: the numpy array to extract the features from
|
|
||||||
:type np_data: numpy.ndarray
|
|
||||||
:return: The extracted features as a vector
|
|
||||||
:rtype: numpy.ndarray
|
|
||||||
"""
|
|
||||||
|
|
||||||
# perform a sanity check
|
|
||||||
if npdata is None:
|
|
||||||
raise ValueError("Input array cannot be None")
|
|
||||||
|
|
||||||
# perform the feature extraction
|
|
||||||
Mean = np.mean(npdata, axis=0)
|
|
||||||
Std = np.std(npdata, axis=0)
|
|
||||||
|
|
||||||
# finally return the features in a concatenated array (as a vector)
|
|
||||||
return np.concatenate((Mean, Std), axis=0).reshape(1, -1)
|
|
||||||
|
|
||||||
print("\nTesting your install of VGGish\n")
|
|
||||||
# Paths to downloaded VGGish files.
|
|
||||||
checkpoint_path = "vggish/vggish_model.ckpt"
|
|
||||||
|
|
||||||
if not os.path.exists(checkpoint_path): #automatically download the checkpoint if not exist.
|
|
||||||
url = 'https://storage.googleapis.com/audioset/vggish_model.ckpt'
|
|
||||||
download(url, './vggish/')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# data path (raw_files\devel OR test OR train folder)
|
|
||||||
path = sys.argv[1]
|
|
||||||
|
|
||||||
##feature extraction
|
|
||||||
with tf.Graph().as_default(), tf.Session() as sess:
|
|
||||||
# load pre-trained model
|
|
||||||
vggish_slim.define_vggish_slim()
|
|
||||||
vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)
|
|
||||||
features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
|
|
||||||
embedding_tensor = sess.graph.get_tensor_by_name(
|
|
||||||
vggish_params.OUTPUT_TENSOR_NAME
|
|
||||||
)
|
|
||||||
|
|
||||||
x_data = []
|
|
||||||
y_label = []
|
|
||||||
y_uid = []
|
|
||||||
|
|
||||||
# extract features
|
|
||||||
files = os.listdir(path)
|
|
||||||
for file in files:
|
|
||||||
try:
|
|
||||||
sample_path = os.path.join(path,file)
|
|
||||||
file_b = sample_path
|
|
||||||
y, sr = librosa.load(
|
|
||||||
file_b, sr=SR, mono=True, offset=0.0, duration=None
|
|
||||||
)
|
|
||||||
except IOError:
|
|
||||||
print("file doesn't exit")
|
|
||||||
continue
|
|
||||||
|
|
||||||
yt, index = librosa.effects.trim(
|
|
||||||
y, frame_length=FRAME_LEN, hop_length=HOP
|
|
||||||
)
|
|
||||||
duration = librosa.get_duration(y=yt, sr=sr)
|
|
||||||
if duration < 2:
|
|
||||||
continue
|
|
||||||
input_batch = vggish_input.waveform_to_examples(
|
|
||||||
yt, SR_VGG
|
|
||||||
) # ?x96x64 --> ?x128
|
|
||||||
[features] = sess.run(
|
|
||||||
[embedding_tensor], feed_dict={features_tensor: input_batch}
|
|
||||||
)
|
|
||||||
features = sta_fun_2(features)
|
|
||||||
|
|
||||||
x_data.append(features.tolist())
|
|
||||||
y_uid.append(file)
|
|
||||||
|
|
||||||
#save features in numpy.array
|
|
||||||
x_data = np.array(x_data)
|
|
||||||
labels_path = 'labels\\' + os.path.basename(os.path.normpath(path)) + '.csv'
|
|
||||||
df = pd.read_csv(labels_path, sep =',')
|
|
||||||
y_label = df.label
|
|
||||||
|
|
||||||
np.save(os.path.join('vgg_features',"x_" + os.path.basename(os.path.normpath(path)) + "_data_vgg.npy"), x_data)
|
|
||||||
np.save(os.path.join('vgg_features',"y_" + os.path.basename(os.path.normpath(path)) + "_label_vgg.npy"), y_label)
|
|
137
svm.py
137
svm.py
@ -1,137 +0,0 @@
|
|||||||
from sklearn.svm import LinearSVC
|
|
||||||
from sklearn.base import clone
|
|
||||||
from sklearn.pipeline import Pipeline
|
|
||||||
from sklearn.utils import resample
|
|
||||||
from sklearn.model_selection import PredefinedSplit, GridSearchCV
|
|
||||||
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
|
|
||||||
from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer
|
|
||||||
from joblib import Parallel, delayed
|
|
||||||
import pandas as pd
|
|
||||||
import scipy
|
|
||||||
import os, yaml
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
import arff
|
|
||||||
import numpy as np
|
|
||||||
from tqdm import tqdm
|
|
||||||
from glob import glob
|
|
||||||
|
|
||||||
RANDOM_SEED = 42
|
|
||||||
|
|
||||||
GRID = [
|
|
||||||
{'scaler': [StandardScaler(), None],
|
|
||||||
'estimator': [LinearSVC(random_state=RANDOM_SEED)],
|
|
||||||
'estimator__loss': ['squared_hinge'],
|
|
||||||
'estimator__C': np.logspace(-1, -5, num=5),
|
|
||||||
'estimator__class_weight': ['balanced', None],
|
|
||||||
'estimator__max_iter': [100000]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
PIPELINE = Pipeline([('scaler', None), ('estimator', LinearSVC())])
|
|
||||||
|
|
||||||
if __name__=='__main__':
|
|
||||||
|
|
||||||
# load features and labels
|
|
||||||
devel_X_vgg = np.load(
|
|
||||||
"vgg_features\\x_devel_data_vgg.npy", allow_pickle=True
|
|
||||||
)
|
|
||||||
|
|
||||||
test_X_vgg = np.load(
|
|
||||||
"vgg_features\\x_test_data_vgg.npy", allow_pickle=True
|
|
||||||
)
|
|
||||||
|
|
||||||
train_X_vgg = np.load(
|
|
||||||
"vgg_features\\x_train_data_vgg.npy", allow_pickle=True
|
|
||||||
)
|
|
||||||
|
|
||||||
devel_X_hand = np.load(
|
|
||||||
"hand_features\\x_devel_data.npy", allow_pickle=True
|
|
||||||
)
|
|
||||||
|
|
||||||
test_X_hand = np.load(
|
|
||||||
"hand_features\\x_test_data.npy", allow_pickle=True
|
|
||||||
)
|
|
||||||
|
|
||||||
train_X_hand = np.load(
|
|
||||||
"hand_features\\x_train_data.npy", allow_pickle=True
|
|
||||||
)
|
|
||||||
|
|
||||||
devel_y = np.load(
|
|
||||||
"vgg_features\\y_devel_label_vgg.npy", allow_pickle=True
|
|
||||||
)
|
|
||||||
|
|
||||||
test_y = np.load(
|
|
||||||
"vgg_features\\y_test_label_vgg.npy", allow_pickle=True
|
|
||||||
)
|
|
||||||
|
|
||||||
train_y = np.load(
|
|
||||||
"vgg_features\\y_train_label_vgg.npy", allow_pickle=True
|
|
||||||
)
|
|
||||||
|
|
||||||
num_train = train_X_vgg.shape[0]
|
|
||||||
num_devel = devel_X_vgg.shape[0]
|
|
||||||
split_indices = np.repeat([-1, 0], [num_train, num_devel])
|
|
||||||
split = PredefinedSplit(split_indices)
|
|
||||||
|
|
||||||
train_X_vgg = np.squeeze(train_X_vgg)
|
|
||||||
devel_X_vgg = np.squeeze(devel_X_vgg)
|
|
||||||
test_X_vgg = np.squeeze(test_X_vgg)
|
|
||||||
|
|
||||||
devel_X = np.concatenate(
|
|
||||||
(
|
|
||||||
devel_X_hand,
|
|
||||||
devel_X_vgg
|
|
||||||
),
|
|
||||||
axis=1,
|
|
||||||
)
|
|
||||||
|
|
||||||
test_X = np.concatenate(
|
|
||||||
(
|
|
||||||
test_X_hand,
|
|
||||||
test_X_vgg
|
|
||||||
),
|
|
||||||
axis=1,
|
|
||||||
)
|
|
||||||
|
|
||||||
train_X = np.concatenate(
|
|
||||||
(
|
|
||||||
train_X_hand,
|
|
||||||
train_X_vgg
|
|
||||||
),
|
|
||||||
axis=1,
|
|
||||||
)
|
|
||||||
|
|
||||||
X = np.append(train_X, devel_X, axis=0)
|
|
||||||
y = np.append(train_y, devel_y, axis=0)
|
|
||||||
|
|
||||||
grid_search = GridSearchCV(estimator=PIPELINE, param_grid=GRID,
|
|
||||||
scoring=make_scorer(recall_score, average='macro'),
|
|
||||||
n_jobs=-1, cv=split, refit=True, verbose=1,
|
|
||||||
return_train_score=False)
|
|
||||||
|
|
||||||
# fit on data. train -> devel first, then train+devel implicit
|
|
||||||
grid_search.fit(X, y)
|
|
||||||
best_estimator = grid_search.best_estimator_
|
|
||||||
|
|
||||||
# fit clone of best estimator on train again for devel predictions
|
|
||||||
estimator = clone(best_estimator, safe=False)
|
|
||||||
estimator.fit(train_X, train_y)
|
|
||||||
preds = estimator.predict(devel_X)
|
|
||||||
|
|
||||||
metrics = {'dev': {}, 'test': {}}
|
|
||||||
|
|
||||||
# devel metrics
|
|
||||||
print('DEVEL')
|
|
||||||
uar = recall_score(devel_y, preds, average='macro')
|
|
||||||
cm = confusion_matrix(devel_y, preds)
|
|
||||||
print(f'UAR: {uar}\n{classification_report(devel_y, preds)}\n\nConfusion Matrix:\n\n{cm}')
|
|
||||||
|
|
||||||
pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False)
|
|
||||||
|
|
||||||
# test metrics
|
|
||||||
print('TEST')
|
|
||||||
preds = best_estimator.predict(test_X)
|
|
||||||
uar = recall_score(test_y, preds, average='macro')
|
|
||||||
cm = confusion_matrix(test_y, preds)
|
|
||||||
print(f'UAR: {uar}\n{classification_report(test_y, preds)}\n\nConfusion Matrix:\n\n{cm}')
|
|
Loading…
Reference in New Issue
Block a user