remove old files
This commit is contained in:
parent
02feb535b3
commit
fa942d7a64
@ -1,130 +0,0 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow.compat.v1 as tf
|
||||
tf.disable_v2_behavior()
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
import json
|
||||
import sys
|
||||
import numpy as np
|
||||
|
||||
import librosa
|
||||
|
||||
import urllib
|
||||
sys.path.append('vggish')
|
||||
import vggish_input
|
||||
import vggish_params
|
||||
import vggish_slim
|
||||
|
||||
SR = 22050 # sample rate
|
||||
SR_VGG = 16000 # VGG pretrained model sample rate
|
||||
FRAME_LEN = int(SR / 10) # 100 ms
|
||||
HOP = int(FRAME_LEN / 2) # 50%overlap, 5ms
|
||||
|
||||
|
||||
def download(url, dst_dir):
|
||||
"""Download file.
|
||||
If the file not exist then download it.
|
||||
Args:url: Web location of the file.
|
||||
Returns: path to downloaded file.
|
||||
"""
|
||||
filename = url.split('/')[-1]
|
||||
filepath = os.path.join(dst_dir, filename)
|
||||
if not os.path.exists(filepath):
|
||||
def _progress(count, block_size, total_size):
|
||||
sys.stdout.write('\r>> Downloading %s %.1f%%' %
|
||||
(filename,
|
||||
float(count * block_size) / float(total_size) * 100.0))
|
||||
sys.stdout.flush()
|
||||
|
||||
filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
|
||||
statinfo = os.stat(filepath)
|
||||
print('Successfully downloaded:', filename, statinfo.st_size, 'bytes.')
|
||||
return filepath
|
||||
|
||||
def sta_fun_2(npdata): # 1D np array
|
||||
"""Extract various statistical features from the numpy array provided as input.
|
||||
|
||||
:param np_data: the numpy array to extract the features from
|
||||
:type np_data: numpy.ndarray
|
||||
:return: The extracted features as a vector
|
||||
:rtype: numpy.ndarray
|
||||
"""
|
||||
|
||||
# perform a sanity check
|
||||
if npdata is None:
|
||||
raise ValueError("Input array cannot be None")
|
||||
|
||||
# perform the feature extraction
|
||||
Mean = np.mean(npdata, axis=0)
|
||||
Std = np.std(npdata, axis=0)
|
||||
|
||||
# finally return the features in a concatenated array (as a vector)
|
||||
return np.concatenate((Mean, Std), axis=0).reshape(1, -1)
|
||||
|
||||
print("\nTesting your install of VGGish\n")
|
||||
# Paths to downloaded VGGish files.
|
||||
checkpoint_path = "vggish/vggish_model.ckpt"
|
||||
|
||||
if not os.path.exists(checkpoint_path): #automatically download the checkpoint if not exist.
|
||||
url = 'https://storage.googleapis.com/audioset/vggish_model.ckpt'
|
||||
download(url, './vggish/')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# data path (raw_files\devel OR test OR train folder)
|
||||
path = sys.argv[1]
|
||||
|
||||
##feature extraction
|
||||
with tf.Graph().as_default(), tf.Session() as sess:
|
||||
# load pre-trained model
|
||||
vggish_slim.define_vggish_slim()
|
||||
vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)
|
||||
features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
|
||||
embedding_tensor = sess.graph.get_tensor_by_name(
|
||||
vggish_params.OUTPUT_TENSOR_NAME
|
||||
)
|
||||
|
||||
x_data = []
|
||||
y_label = []
|
||||
y_uid = []
|
||||
|
||||
# extract features
|
||||
files = os.listdir(path)
|
||||
for file in files:
|
||||
try:
|
||||
sample_path = os.path.join(path,file)
|
||||
file_b = sample_path
|
||||
y, sr = librosa.load(
|
||||
file_b, sr=SR, mono=True, offset=0.0, duration=None
|
||||
)
|
||||
except IOError:
|
||||
print("file doesn't exit")
|
||||
continue
|
||||
|
||||
yt, index = librosa.effects.trim(
|
||||
y, frame_length=FRAME_LEN, hop_length=HOP
|
||||
)
|
||||
duration = librosa.get_duration(y=yt, sr=sr)
|
||||
if duration < 2:
|
||||
continue
|
||||
input_batch = vggish_input.waveform_to_examples(
|
||||
yt, SR_VGG
|
||||
) # ?x96x64 --> ?x128
|
||||
[features] = sess.run(
|
||||
[embedding_tensor], feed_dict={features_tensor: input_batch}
|
||||
)
|
||||
features = sta_fun_2(features)
|
||||
|
||||
x_data.append(features.tolist())
|
||||
y_uid.append(file)
|
||||
|
||||
#save features in numpy.array
|
||||
x_data = np.array(x_data)
|
||||
labels_path = 'labels\\' + os.path.basename(os.path.normpath(path)) + '.csv'
|
||||
df = pd.read_csv(labels_path, sep =',')
|
||||
y_label = df.label
|
||||
|
||||
np.save(os.path.join('vgg_features',"x_" + os.path.basename(os.path.normpath(path)) + "_data_vgg.npy"), x_data)
|
||||
np.save(os.path.join('vgg_features',"y_" + os.path.basename(os.path.normpath(path)) + "_label_vgg.npy"), y_label)
|
137
svm.py
137
svm.py
@ -1,137 +0,0 @@
|
||||
from sklearn.svm import LinearSVC
|
||||
from sklearn.base import clone
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.utils import resample
|
||||
from sklearn.model_selection import PredefinedSplit, GridSearchCV
|
||||
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
|
||||
from sklearn.metrics import classification_report, confusion_matrix, recall_score, make_scorer
|
||||
from joblib import Parallel, delayed
|
||||
import pandas as pd
|
||||
import scipy
|
||||
import os, yaml
|
||||
import json
|
||||
import sys
|
||||
import arff
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from glob import glob
|
||||
|
||||
RANDOM_SEED = 42
|
||||
|
||||
GRID = [
|
||||
{'scaler': [StandardScaler(), None],
|
||||
'estimator': [LinearSVC(random_state=RANDOM_SEED)],
|
||||
'estimator__loss': ['squared_hinge'],
|
||||
'estimator__C': np.logspace(-1, -5, num=5),
|
||||
'estimator__class_weight': ['balanced', None],
|
||||
'estimator__max_iter': [100000]
|
||||
}
|
||||
]
|
||||
|
||||
PIPELINE = Pipeline([('scaler', None), ('estimator', LinearSVC())])
|
||||
|
||||
if __name__=='__main__':
|
||||
|
||||
# load features and labels
|
||||
devel_X_vgg = np.load(
|
||||
"vgg_features\\x_devel_data_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
test_X_vgg = np.load(
|
||||
"vgg_features\\x_test_data_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
train_X_vgg = np.load(
|
||||
"vgg_features\\x_train_data_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
devel_X_hand = np.load(
|
||||
"hand_features\\x_devel_data.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
test_X_hand = np.load(
|
||||
"hand_features\\x_test_data.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
train_X_hand = np.load(
|
||||
"hand_features\\x_train_data.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
devel_y = np.load(
|
||||
"vgg_features\\y_devel_label_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
test_y = np.load(
|
||||
"vgg_features\\y_test_label_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
train_y = np.load(
|
||||
"vgg_features\\y_train_label_vgg.npy", allow_pickle=True
|
||||
)
|
||||
|
||||
num_train = train_X_vgg.shape[0]
|
||||
num_devel = devel_X_vgg.shape[0]
|
||||
split_indices = np.repeat([-1, 0], [num_train, num_devel])
|
||||
split = PredefinedSplit(split_indices)
|
||||
|
||||
train_X_vgg = np.squeeze(train_X_vgg)
|
||||
devel_X_vgg = np.squeeze(devel_X_vgg)
|
||||
test_X_vgg = np.squeeze(test_X_vgg)
|
||||
|
||||
devel_X = np.concatenate(
|
||||
(
|
||||
devel_X_hand,
|
||||
devel_X_vgg
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
|
||||
test_X = np.concatenate(
|
||||
(
|
||||
test_X_hand,
|
||||
test_X_vgg
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
|
||||
train_X = np.concatenate(
|
||||
(
|
||||
train_X_hand,
|
||||
train_X_vgg
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
|
||||
X = np.append(train_X, devel_X, axis=0)
|
||||
y = np.append(train_y, devel_y, axis=0)
|
||||
|
||||
grid_search = GridSearchCV(estimator=PIPELINE, param_grid=GRID,
|
||||
scoring=make_scorer(recall_score, average='macro'),
|
||||
n_jobs=-1, cv=split, refit=True, verbose=1,
|
||||
return_train_score=False)
|
||||
|
||||
# fit on data. train -> devel first, then train+devel implicit
|
||||
grid_search.fit(X, y)
|
||||
best_estimator = grid_search.best_estimator_
|
||||
|
||||
# fit clone of best estimator on train again for devel predictions
|
||||
estimator = clone(best_estimator, safe=False)
|
||||
estimator.fit(train_X, train_y)
|
||||
preds = estimator.predict(devel_X)
|
||||
|
||||
metrics = {'dev': {}, 'test': {}}
|
||||
|
||||
# devel metrics
|
||||
print('DEVEL')
|
||||
uar = recall_score(devel_y, preds, average='macro')
|
||||
cm = confusion_matrix(devel_y, preds)
|
||||
print(f'UAR: {uar}\n{classification_report(devel_y, preds)}\n\nConfusion Matrix:\n\n{cm}')
|
||||
|
||||
pd.DataFrame(grid_search.cv_results_).to_csv('grid_search.csv', index=False)
|
||||
|
||||
# test metrics
|
||||
print('TEST')
|
||||
preds = best_estimator.predict(test_X)
|
||||
uar = recall_score(test_y, preds, average='macro')
|
||||
cm = confusion_matrix(test_y, preds)
|
||||
print(f'UAR: {uar}\n{classification_report(test_y, preds)}\n\nConfusion Matrix:\n\n{cm}')
|
Loading…
Reference in New Issue
Block a user