import json import os import sys import warnings from math import pi import librosa import numpy as np import pandas as pd from scipy.fftpack import fft, hilbert warnings.filterwarnings("ignore") SR = 22050 # sample rate FRAME_LEN = int(SR / 10) # 100 ms HOP = int(FRAME_LEN / 2) # 50% overlap, meaning 5ms hop length MFCC_dim = 13 # the MFCC dimension def sta_fun(np_data): """Extract various statistical features from the numpy array provided as input. :param np_data: the numpy array to extract the features from :type np_data: numpy.ndarray :return: The extracted features as a vector :rtype: numpy.ndarray """ # perform a sanity check if np_data is None: raise ValueError("Input array cannot be None") # perform the feature extraction dat_min = np.min(np_data) dat_max = np.max(np_data) dat_mean = np.mean(np_data) dat_rms = np.sqrt(np.sum(np.square(np_data)) / len(np_data)) dat_median = np.median(np_data) dat_qrl1 = np.percentile(np_data, 25) dat_qrl3 = np.percentile(np_data, 75) dat_lower_q = np.quantile(np_data, 0.25, interpolation="lower") dat_higher_q = np.quantile(np_data, 0.75, interpolation="higher") dat_iqrl = dat_higher_q - dat_lower_q dat_std = np.std(np_data) s = pd.Series(np_data) dat_skew = s.skew() dat_kurt = s.kurt() # finally return the features in a concatenated array (as a vector) return np.array([dat_mean, dat_min, dat_max, dat_std, dat_rms, dat_median, dat_qrl1, dat_qrl3, dat_iqrl, dat_skew, dat_kurt]) def get_period(signal, signal_sr): """Extract the period from the the provided signal :param signal: the signal to extract the period from :type signal: numpy.ndarray :param signal_sr: the sampling rate of the input signal :type signal_sr: integer :return: a vector containing the signal period :rtype: numpy.ndarray """ # perform a sanity check if signal is None: raise ValueError("Input signal cannot be None") # transform the signal to the hilbert space hy = hilbert(signal) ey = np.sqrt(signal ** 2 + hy ** 2) min_time = 1.0 / signal_sr tot_time = len(ey) * min_time pow_ft = np.abs(fft(ey)) peak_freq = pow_ft[3: int(len(pow_ft) / 2)] peak_freq_pos = peak_freq.argmax() peak_freq_val = 2 * pi * (peak_freq_pos + 2) / tot_time period = 2 * pi / peak_freq_val return np.array([period]) def extract_signal_features(signal, signal_sr): """Extract part of handcrafted features from the input signal. :param signal: the signal the extract features from :type signal: numpy.ndarray :param signal_sr: the sample rate of the signal :type signal_sr: integer :return: the populated feature vector :rtype: numpy.ndarray """ # normalise the sound signal before processing signal = signal / np.max(np.abs(signal)) # trim the signal to the appropriate length trimmed_signal, idc = librosa.effects.trim(signal, frame_length=FRAME_LEN, hop_length=HOP) # extract the signal duration signal_duration = librosa.get_duration(y=trimmed_signal, sr=signal_sr) # use librosa to track the beats tempo, beats = librosa.beat.beat_track(y=trimmed_signal, sr=signal_sr) # find the onset strength of the trimmed signal o_env = librosa.onset.onset_strength(trimmed_signal, sr=signal_sr) # find the frames of the onset onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=signal_sr) # keep only the first onset frame onsets = onset_frames.shape[0] # decompose the signal into its magnitude and the phase components such that signal = mag * phase mag, phase = librosa.magphase(librosa.stft(trimmed_signal, n_fft=FRAME_LEN, hop_length=HOP)) # extract the rms from the magnitude component rms = librosa.feature.rms(y=trimmed_signal)[0] # extract the spectral centroid of the magnitude cent = librosa.feature.spectral_centroid(S=mag)[0] # extract the spectral rolloff point from the magnitude rolloff = librosa.feature.spectral_rolloff(S=mag, sr=signal_sr)[0] # extract the zero crossing rate from the trimmed signal using the predefined frame and hop lengths zcr = librosa.feature.zero_crossing_rate(trimmed_signal, frame_length=FRAME_LEN, hop_length=HOP)[0] # pack the extracted features into the feature vector to be returned signal_features = np.concatenate( ( np.array([signal_duration, tempo, onsets]), get_period(signal, signal_sr=sr), sta_fun(rms), sta_fun(cent), sta_fun(rolloff), sta_fun(zcr), ), axis=0, ) # finally, return the gathered features and the trimmed signal return signal_features, trimmed_signal def extract_mfcc(signal, signal_sr=SR, n_fft=FRAME_LEN, hop_length=HOP, n_mfcc=MFCC_dim): """Extracts the Mel-frequency cepstral coefficients (MFCC) from the provided signal :param signal: the signal to extract the mfcc from :type signal: numpy.ndarray :param signal_sr: the signal sample rate :type signal_sr: integer :param n_fft: the fft window size :type n_fft: integer :param hop_length: the hop length :type hop_length: integer :param n_mfcc: the dimension of the mfcc :type n_mfcc: integer :return: the populated feature vector :rtype: numpy.ndarray """ # compute the mfcc of the input signal mfcc = librosa.feature.mfcc( y=signal, sr=signal_sr, n_fft=n_fft, hop_length=hop_length, n_mfcc=n_mfcc, dct_type=3 ) # extract the first and second order deltas from the retrieved mfcc's mfcc_delta =, order=1) mfcc_delta2 =, order=2) # create the mfcc array mfccs = [] # populate it using the extracted features for i in range(n_mfcc): mfccs.extend(sta_fun(mfcc[i, :])) for i in range(n_mfcc): mfccs.extend(sta_fun(mfcc_delta[i, :])) for i in range(n_mfcc): mfccs.extend(sta_fun(mfcc_delta2[i, :])) # finally return the coefficients return mfccs def extract_features(signal, signal_sr): """Extract all features from the input signal. :param signal: the signal the extract features from :type signal: numpy.ndarray :param signal_sr: the sample rate of the signal :type signal_sr: integer :return: the extracted feature vector :rtype: numpy.ndarray """ # extract the signal features signal_features, trimmed_signal = extract_signal_features(signal, signal_sr) # extract the mfcc's from the trimmed signal and get the statistical feature. mfccs = extract_mfcc(trimmed_signal) return np.concatenate((signal_features, mfccs), axis=0) if __name__ == "__main__": # data path (raw_files\devel OR test OR train folder) path = sys.argv[1] x_data = [] y_label = [] y_uid = [] #extract features files = os.listdir(path) for file in files: try: sample_path = os.path.join(path,file) file_b = sample_path y, sr = librosa.load( file_b, sr=SR, mono=True, offset=0.0, duration=None ) except IOError: print("file doesn't exit") continue yt, index = librosa.effects.trim( y, frame_length=FRAME_LEN, hop_length=HOP ) duration = librosa.get_duration(y=yt, sr=sr) if duration < 2: continue features = extract_features(signal=y, signal_sr=sr) x_data.append(features.tolist()) #save features in numpy.array x_data = np.array(x_data) labels_path = 'labels\\' + os.path.basename(os.path.normpath(path)) + '.csv' df = pd.read_csv(labels_path, sep =',') y_label = df.label'hand_features',"x_" + os.path.basename(os.path.normpath(path)) + "_data.npy"), x_data)'hand_features',"y_" + os.path.basename(os.path.normpath(path)) + "_label.npy"), y_label)