230 lines
8.0 KiB
Python
230 lines
8.0 KiB
Python
|
import json
|
|||
|
import os
|
|||
|
import sys
|
|||
|
import warnings
|
|||
|
from math import pi
|
|||
|
|
|||
|
import librosa
|
|||
|
import numpy as np
|
|||
|
import pandas as pd
|
|||
|
from scipy.fftpack import fft, hilbert
|
|||
|
|
|||
|
warnings.filterwarnings("ignore")
|
|||
|
|
|||
|
SR = 22050 # sample rate
|
|||
|
FRAME_LEN = int(SR / 10) # 100 ms
|
|||
|
HOP = int(FRAME_LEN / 2) # 50% overlap, meaning 5ms hop length
|
|||
|
MFCC_dim = 13 # the MFCC dimension
|
|||
|
|
|||
|
def sta_fun(np_data):
|
|||
|
"""Extract various statistical features from the numpy array provided as input.
|
|||
|
|
|||
|
:param np_data: the numpy array to extract the features from
|
|||
|
:type np_data: numpy.ndarray
|
|||
|
:return: The extracted features as a vector
|
|||
|
:rtype: numpy.ndarray
|
|||
|
"""
|
|||
|
|
|||
|
# perform a sanity check
|
|||
|
if np_data is None:
|
|||
|
raise ValueError("Input array cannot be None")
|
|||
|
|
|||
|
# perform the feature extraction
|
|||
|
dat_min = np.min(np_data)
|
|||
|
dat_max = np.max(np_data)
|
|||
|
dat_mean = np.mean(np_data)
|
|||
|
dat_rms = np.sqrt(np.sum(np.square(np_data)) / len(np_data))
|
|||
|
dat_median = np.median(np_data)
|
|||
|
dat_qrl1 = np.percentile(np_data, 25)
|
|||
|
dat_qrl3 = np.percentile(np_data, 75)
|
|||
|
dat_lower_q = np.quantile(np_data, 0.25, interpolation="lower")
|
|||
|
dat_higher_q = np.quantile(np_data, 0.75, interpolation="higher")
|
|||
|
dat_iqrl = dat_higher_q - dat_lower_q
|
|||
|
dat_std = np.std(np_data)
|
|||
|
s = pd.Series(np_data)
|
|||
|
dat_skew = s.skew()
|
|||
|
dat_kurt = s.kurt()
|
|||
|
|
|||
|
# finally return the features in a concatenated array (as a vector)
|
|||
|
return np.array([dat_mean, dat_min, dat_max, dat_std, dat_rms,
|
|||
|
dat_median, dat_qrl1, dat_qrl3, dat_iqrl, dat_skew, dat_kurt])
|
|||
|
|
|||
|
def get_period(signal, signal_sr):
|
|||
|
"""Extract the period from the the provided signal
|
|||
|
|
|||
|
:param signal: the signal to extract the period from
|
|||
|
:type signal: numpy.ndarray
|
|||
|
:param signal_sr: the sampling rate of the input signal
|
|||
|
:type signal_sr: integer
|
|||
|
:return: a vector containing the signal period
|
|||
|
:rtype: numpy.ndarray
|
|||
|
"""
|
|||
|
|
|||
|
# perform a sanity check
|
|||
|
if signal is None:
|
|||
|
raise ValueError("Input signal cannot be None")
|
|||
|
|
|||
|
# transform the signal to the hilbert space
|
|||
|
hy = hilbert(signal)
|
|||
|
|
|||
|
ey = np.sqrt(signal ** 2 + hy ** 2)
|
|||
|
min_time = 1.0 / signal_sr
|
|||
|
tot_time = len(ey) * min_time
|
|||
|
pow_ft = np.abs(fft(ey))
|
|||
|
peak_freq = pow_ft[3: int(len(pow_ft) / 2)]
|
|||
|
peak_freq_pos = peak_freq.argmax()
|
|||
|
peak_freq_val = 2 * pi * (peak_freq_pos + 2) / tot_time
|
|||
|
period = 2 * pi / peak_freq_val
|
|||
|
|
|||
|
return np.array([period])
|
|||
|
|
|||
|
def extract_signal_features(signal, signal_sr):
|
|||
|
"""Extract part of handcrafted features from the input signal.
|
|||
|
|
|||
|
:param signal: the signal the extract features from
|
|||
|
:type signal: numpy.ndarray
|
|||
|
:param signal_sr: the sample rate of the signal
|
|||
|
:type signal_sr: integer
|
|||
|
:return: the populated feature vector
|
|||
|
:rtype: numpy.ndarray
|
|||
|
"""
|
|||
|
|
|||
|
# normalise the sound signal before processing
|
|||
|
signal = signal / np.max(np.abs(signal))
|
|||
|
# trim the signal to the appropriate length
|
|||
|
trimmed_signal, idc = librosa.effects.trim(signal, frame_length=FRAME_LEN, hop_length=HOP)
|
|||
|
# extract the signal duration
|
|||
|
signal_duration = librosa.get_duration(y=trimmed_signal, sr=signal_sr)
|
|||
|
# use librosa to track the beats
|
|||
|
tempo, beats = librosa.beat.beat_track(y=trimmed_signal, sr=signal_sr)
|
|||
|
# find the onset strength of the trimmed signal
|
|||
|
o_env = librosa.onset.onset_strength(trimmed_signal, sr=signal_sr)
|
|||
|
# find the frames of the onset
|
|||
|
onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=signal_sr)
|
|||
|
# keep only the first onset frame
|
|||
|
onsets = onset_frames.shape[0]
|
|||
|
# decompose the signal into its magnitude and the phase components such that signal = mag * phase
|
|||
|
mag, phase = librosa.magphase(librosa.stft(trimmed_signal, n_fft=FRAME_LEN, hop_length=HOP))
|
|||
|
# extract the rms from the magnitude component
|
|||
|
rms = librosa.feature.rms(y=trimmed_signal)[0]
|
|||
|
# extract the spectral centroid of the magnitude
|
|||
|
cent = librosa.feature.spectral_centroid(S=mag)[0]
|
|||
|
# extract the spectral rolloff point from the magnitude
|
|||
|
rolloff = librosa.feature.spectral_rolloff(S=mag, sr=signal_sr)[0]
|
|||
|
# extract the zero crossing rate from the trimmed signal using the predefined frame and hop lengths
|
|||
|
zcr = librosa.feature.zero_crossing_rate(trimmed_signal, frame_length=FRAME_LEN, hop_length=HOP)[0]
|
|||
|
|
|||
|
# pack the extracted features into the feature vector to be returned
|
|||
|
signal_features = np.concatenate(
|
|||
|
(
|
|||
|
np.array([signal_duration, tempo, onsets]),
|
|||
|
get_period(signal, signal_sr=sr),
|
|||
|
sta_fun(rms),
|
|||
|
sta_fun(cent),
|
|||
|
sta_fun(rolloff),
|
|||
|
sta_fun(zcr),
|
|||
|
),
|
|||
|
axis=0,
|
|||
|
)
|
|||
|
|
|||
|
# finally, return the gathered features and the trimmed signal
|
|||
|
return signal_features, trimmed_signal
|
|||
|
|
|||
|
def extract_mfcc(signal, signal_sr=SR, n_fft=FRAME_LEN, hop_length=HOP, n_mfcc=MFCC_dim):
|
|||
|
"""Extracts the Mel-frequency cepstral coefficients (MFCC) from the provided signal
|
|||
|
|
|||
|
:param signal: the signal to extract the mfcc from
|
|||
|
:type signal: numpy.ndarray
|
|||
|
:param signal_sr: the signal sample rate
|
|||
|
:type signal_sr: integer
|
|||
|
:param n_fft: the fft window size
|
|||
|
:type n_fft: integer
|
|||
|
:param hop_length: the hop length
|
|||
|
:type hop_length: integer
|
|||
|
:param n_mfcc: the dimension of the mfcc
|
|||
|
:type n_mfcc: integer
|
|||
|
:return: the populated feature vector
|
|||
|
:rtype: numpy.ndarray
|
|||
|
"""
|
|||
|
# compute the mfcc of the input signal
|
|||
|
mfcc = librosa.feature.mfcc(
|
|||
|
y=signal, sr=signal_sr, n_fft=n_fft, hop_length=hop_length, n_mfcc=n_mfcc, dct_type=3
|
|||
|
)
|
|||
|
|
|||
|
# extract the first and second order deltas from the retrieved mfcc's
|
|||
|
mfcc_delta = librosa.feature.delta(mfcc, order=1)
|
|||
|
mfcc_delta2 = librosa.feature.delta(mfcc, order=2)
|
|||
|
|
|||
|
# create the mfcc array
|
|||
|
mfccs = []
|
|||
|
|
|||
|
# populate it using the extracted features
|
|||
|
for i in range(n_mfcc):
|
|||
|
mfccs.extend(sta_fun(mfcc[i, :]))
|
|||
|
for i in range(n_mfcc):
|
|||
|
mfccs.extend(sta_fun(mfcc_delta[i, :]))
|
|||
|
for i in range(n_mfcc):
|
|||
|
mfccs.extend(sta_fun(mfcc_delta2[i, :]))
|
|||
|
|
|||
|
# finally return the coefficients
|
|||
|
return mfccs
|
|||
|
|
|||
|
def extract_features(signal, signal_sr):
|
|||
|
"""Extract all features from the input signal.
|
|||
|
|
|||
|
:param signal: the signal the extract features from
|
|||
|
:type signal: numpy.ndarray
|
|||
|
:param signal_sr: the sample rate of the signal
|
|||
|
:type signal_sr: integer
|
|||
|
:return: the extracted feature vector
|
|||
|
:rtype: numpy.ndarray
|
|||
|
"""
|
|||
|
|
|||
|
# extract the signal features
|
|||
|
signal_features, trimmed_signal = extract_signal_features(signal, signal_sr)
|
|||
|
|
|||
|
# extract the mfcc's from the trimmed signal and get the statistical feature.
|
|||
|
mfccs = extract_mfcc(trimmed_signal)
|
|||
|
|
|||
|
return np.concatenate((signal_features, mfccs), axis=0)
|
|||
|
|
|||
|
|
|||
|
if __name__ == "__main__":
|
|||
|
# data path (raw_files\devel OR test OR train folder)
|
|||
|
path = sys.argv[1]
|
|||
|
|
|||
|
x_data = []
|
|||
|
y_label = []
|
|||
|
y_uid = []
|
|||
|
|
|||
|
#extract features
|
|||
|
files = os.listdir(path)
|
|||
|
for file in files:
|
|||
|
try:
|
|||
|
sample_path = os.path.join(path,file)
|
|||
|
file_b = sample_path
|
|||
|
y, sr = librosa.load(
|
|||
|
file_b, sr=SR, mono=True, offset=0.0, duration=None
|
|||
|
)
|
|||
|
except IOError:
|
|||
|
print("file doesn't exit")
|
|||
|
continue
|
|||
|
|
|||
|
yt, index = librosa.effects.trim(
|
|||
|
y, frame_length=FRAME_LEN, hop_length=HOP
|
|||
|
)
|
|||
|
duration = librosa.get_duration(y=yt, sr=sr)
|
|||
|
if duration < 2:
|
|||
|
continue
|
|||
|
features = extract_features(signal=y, signal_sr=sr)
|
|||
|
|
|||
|
x_data.append(features.tolist())
|
|||
|
|
|||
|
#save features in numpy.array
|
|||
|
x_data = np.array(x_data)
|
|||
|
labels_path = 'labels\\' + os.path.basename(os.path.normpath(path)) + '.csv'
|
|||
|
df = pd.read_csv(labels_path, sep =',')
|
|||
|
y_label = df.label
|
|||
|
|
|||
|
np.save(os.path.join('hand_features',"x_" + os.path.basename(os.path.normpath(path)) + "_data.npy"), x_data)
|
|||
|
np.save(os.path.join('hand_features',"y_" + os.path.basename(os.path.normpath(path)) + "_label.npy"), y_label)
|