To compute mel in librosa, we can use librosa.filters.mel() and librosa.feature.melspectrogram(). In this tutorial, we will introduce the difference between them.
librosa.filters.mel()
It is defined as:
librosa.filters.mel(*, sr, n_fft, n_mels=128, fmin=0.0, fmax=None, htk=False, norm='slaney', dtype=<class 'numpy.float32'>)
- librosa.filters.mel(*, sr, n_fft, n_mels=128, fmin=0.0, fmax=None, htk=False, norm='slaney', dtype=<class 'numpy.float32'>)
librosa.filters.mel(*, sr, n_fft, n_mels=128, fmin=0.0, fmax=None, htk=False, norm='slaney', dtype=<class 'numpy.float32'>)
It will create a Mel filter-bank and produce a linear transformation matrix to project FFT bins onto Mel-frequency bins.
Notice: It creates a Mel filter-bank does not FBank, you can not use it as audio feature.
For example:
import librosa
import numpy as np
import matplotlib.pyplot as plt
def plot_mel_fbank(fbank, title=None):
fig, axs = plt.subplots(1, 1)
axs.set_title(title or "Filter bank")
axs.imshow(fbank, aspect="auto")
axs.set_ylabel("frequency bin")
axs.set_xlabel("mel bin")
plt.show(block=False)
sr = 8000
mels = librosa.filters.mel(sr=sr, n_fft = 512, fmin=0.0, fmax=sr / 2.0,n_mels=80)
print(mels.shape)
plot_mel_fbank(mels)
- import librosa
- import numpy as np
- import matplotlib.pyplot as plt
- def plot_mel_fbank(fbank, title=None):
- fig, axs = plt.subplots(1, 1)
- axs.set_title(title or "Filter bank")
- axs.imshow(fbank, aspect="auto")
- axs.set_ylabel("frequency bin")
- axs.set_xlabel("mel bin")
- plt.show(block=False)
- sr = 8000
- mels = librosa.filters.mel(sr=sr, n_fft = 512, fmin=0.0, fmax=sr / 2.0,n_mels=80)
- print(mels.shape)
- plot_mel_fbank(mels)
import librosa
import numpy as np
import matplotlib.pyplot as plt
def plot_mel_fbank(fbank, title=None):
fig, axs = plt.subplots(1, 1)
axs.set_title(title or "Filter bank")
axs.imshow(fbank, aspect="auto")
axs.set_ylabel("frequency bin")
axs.set_xlabel("mel bin")
plt.show(block=False)
sr = 8000
mels = librosa.filters.mel(sr=sr, n_fft = 512, fmin=0.0, fmax=sr / 2.0,n_mels=80)
print(mels.shape)
plot_mel_fbank(mels)
Run this code, you will see:
(80, 257)

We can find this function only returns a weight, it can not process any audio data.
librosa.feature.melspectrogram()
It is defined as:
librosa.feature.melspectrogram(*, y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', power=2.0, **kwargs)
- librosa.feature.melspectrogram(*, y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', power=2.0, **kwargs)
librosa.feature.melspectrogram(*, y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', power=2.0, **kwargs)
It can compute a mel-scaled spectrogram.
Notice: The result of this function can be used as the feature of an audio.
In order to understand how to use this function, you can read:
Compute Audio Log Mel Spectrogram Feature: A Step Guide – Python Audio Processing
We will use an example to show the effect of mel spectrogram.
import librosa
import numpy as np
import matplotlib.pyplot as plt
def plot_spectrogram(spec, title=None, ylabel="freq_bin", aspect="auto", xmax=None):
fig, axs = plt.subplots(1, 1)
axs.set_title(title or "Spectrogram (db)")
axs.set_ylabel(ylabel)
axs.set_xlabel("frame")
im = axs.imshow(librosa.power_to_db(spec), origin="lower", aspect=aspect)
if xmax:
axs.set_xlim((0, xmax))
fig.colorbar(im, ax=axs)
plt.show(block=False)
audio_file = 'speech-01-002.flac'
sr = 8000
audio_data, sr = librosa.load(audio_file, sr= sr, mono=True)
print(audio_data.shape)
win_length = int(0.025 * sr)
hop_length = int(0.01 * sr)
melspectrum = librosa.feature.melspectrogram(y=audio_data, sr=sr, hop_length= hop_length, win_length = win_length, window='hann', n_fft = 512, n_mels=80)
print(melspectrum.shape)
plot_spectrogram(melspectrum)
- import librosa
- import numpy as np
- import matplotlib.pyplot as plt
- def plot_spectrogram(spec, title=None, ylabel="freq_bin", aspect="auto", xmax=None):
- fig, axs = plt.subplots(1, 1)
- axs.set_title(title or "Spectrogram (db)")
- axs.set_ylabel(ylabel)
- axs.set_xlabel("frame")
- im = axs.imshow(librosa.power_to_db(spec), origin="lower", aspect=aspect)
- if xmax:
- axs.set_xlim((0, xmax))
- fig.colorbar(im, ax=axs)
- plt.show(block=False)
- audio_file = 'speech-01-002.flac'
- sr = 8000
- audio_data, sr = librosa.load(audio_file, sr= sr, mono=True)
- print(audio_data.shape)
- win_length = int(0.025 * sr)
- hop_length = int(0.01 * sr)
- melspectrum = librosa.feature.melspectrogram(y=audio_data, sr=sr, hop_length= hop_length, win_length = win_length, window='hann', n_fft = 512, n_mels=80)
- print(melspectrum.shape)
- plot_spectrogram(melspectrum)
import librosa
import numpy as np
import matplotlib.pyplot as plt
def plot_spectrogram(spec, title=None, ylabel="freq_bin", aspect="auto", xmax=None):
fig, axs = plt.subplots(1, 1)
axs.set_title(title or "Spectrogram (db)")
axs.set_ylabel(ylabel)
axs.set_xlabel("frame")
im = axs.imshow(librosa.power_to_db(spec), origin="lower", aspect=aspect)
if xmax:
axs.set_xlim((0, xmax))
fig.colorbar(im, ax=axs)
plt.show(block=False)
audio_file = 'speech-01-002.flac'
sr = 8000
audio_data, sr = librosa.load(audio_file, sr= sr, mono=True)
print(audio_data.shape)
win_length = int(0.025 * sr)
hop_length = int(0.01 * sr)
melspectrum = librosa.feature.melspectrogram(y=audio_data, sr=sr, hop_length= hop_length, win_length = win_length, window='hann', n_fft = 512, n_mels=80)
print(melspectrum.shape)
plot_spectrogram(melspectrum)
Run this code, you will see:
(389724,)
(80, 4872)
- (389724,)
- (80, 4872)
(389724,)
(80, 4872)

For the source code of librosa.feature.melspectrogram(), we can find:
# Build a Mel filter
mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)
return np.einsum("...ft,mf->...mt", S, mel_basis, optimize=True)
- # Build a Mel filter
- mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)
- return np.einsum("...ft,mf->...mt", S, mel_basis, optimize=True)
# Build a Mel filter
mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)
return np.einsum("...ft,mf->...mt", S, mel_basis, optimize=True)
librosa.filters.mel() is used in librosa.feature.melspectrogram().