106 lines
2.0 KiB
Python
106 lines
2.0 KiB
Python
|
|
||
|
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
|
||
|
|
||
|
#
|
||
|
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
|
||
|
# You may obtain a copy of the License at
|
||
|
|
||
|
#
|
||
|
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
||
|
#
|
||
|
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
|
||
|
# See the License for the specific language governing permissions and
|
||
|
|
||
|
# limitations under the License.
|
||
|
|
||
|
# ==============================================================================
|
||
|
|
||
|
|
||
|
|
||
|
"""Global parameters for the VGGish model.
|
||
|
|
||
|
|
||
|
|
||
|
See vggish_slim.py for more information.
|
||
|
|
||
|
"""
|
||
|
|
||
|
|
||
|
|
||
|
# Architectural constants.
|
||
|
|
||
|
NUM_FRAMES = 96 # Frames in input mel-spectrogram patch.
|
||
|
|
||
|
NUM_BANDS = 64 # Frequency bands in input mel-spectrogram patch.
|
||
|
|
||
|
EMBEDDING_SIZE = 128 # Size of embedding layer.
|
||
|
|
||
|
|
||
|
|
||
|
# Hyperparameters used in feature and example generation.
|
||
|
|
||
|
SAMPLE_RATE = 16000
|
||
|
|
||
|
STFT_WINDOW_LENGTH_SECONDS = 0.025
|
||
|
|
||
|
STFT_HOP_LENGTH_SECONDS = 0.010
|
||
|
|
||
|
NUM_MEL_BINS = NUM_BANDS
|
||
|
|
||
|
MEL_MIN_HZ = 125
|
||
|
|
||
|
MEL_MAX_HZ = 7500
|
||
|
|
||
|
LOG_OFFSET = 0.01 # Offset used for stabilized log of input mel-spectrogram.
|
||
|
|
||
|
EXAMPLE_WINDOW_SECONDS = 0.96 # Each example contains 96 10ms frames
|
||
|
|
||
|
EXAMPLE_HOP_SECONDS = 0.96 # with zero overlap.
|
||
|
|
||
|
|
||
|
|
||
|
# Parameters used for embedding postprocessing.
|
||
|
|
||
|
PCA_EIGEN_VECTORS_NAME = 'pca_eigen_vectors'
|
||
|
|
||
|
PCA_MEANS_NAME = 'pca_means'
|
||
|
|
||
|
QUANTIZE_MIN_VAL = -2.0
|
||
|
|
||
|
QUANTIZE_MAX_VAL = +2.0
|
||
|
|
||
|
|
||
|
|
||
|
# Hyperparameters used in training.
|
||
|
|
||
|
INIT_STDDEV = 0.01 # Standard deviation used to initialize weights.
|
||
|
|
||
|
LEARNING_RATE = 1e-4 # Learning rate for the Adam optimizer.
|
||
|
|
||
|
ADAM_EPSILON = 1e-8 # Epsilon for the Adam optimizer.
|
||
|
|
||
|
|
||
|
|
||
|
# Names of ops, tensors, and features.
|
||
|
|
||
|
INPUT_OP_NAME = 'vggish/input_features'
|
||
|
|
||
|
INPUT_TENSOR_NAME = INPUT_OP_NAME + ':0'
|
||
|
|
||
|
OUTPUT_OP_NAME = 'vggish/embedding'
|
||
|
|
||
|
OUTPUT_TENSOR_NAME = OUTPUT_OP_NAME + ':0'
|
||
|
|
||
|
AUDIO_EMBEDDING_FEATURE_NAME = 'audio_embedding'
|