Pulsar-V

## MFCC(梅尔倒谱系数)的算法思路

• 读取波形文件

• 汉明窗

• 分帧

• 傅里叶变换

• 回归离散数据

• 取得特征数据

• Python示例代码

``````import numpy, numpy.fft

def mel(f):
return 2595. * numpy.log10(1. + f / 700.)

def melinv(m):
return 700. * (numpy.power(10., m / 2595.) - 1.)

class MFCC(object):
def __init__(self, nfilt=40, ncep=13,
lowerf=133.3333, upperf=6855.4976, alpha=0.97,
samprate=16000, frate=100, wlen=0.0256,
nfft=512):
self.lowerf = lowerf
self.upperf = upperf
self.nfft = nfft
self.ncep = ncep
self.nfilt = nfilt
self.frate = frate
self.fshift = float(samprate) / frate

# 构建汉明窗
self.wlen = int(wlen * samprate)
self.win = numpy.hamming(self.wlen)

# Prior sample for pre-emphasis
self.prior = 0
self.alpha = alpha

# 构建梅尔滤波矩阵
self.filters = numpy.zeros((nfft/2+1,nfilt), 'd')
dfreq = float(samprate) / nfft
if upperf > samprate/2:
raise(Exception,
"Upper frequency %f exceeds Nyquist %f" % (upperf, samprate/2))
melmax = mel(upperf)
melmin = mel(lowerf)
dmelbw = (melmax - melmin) / (nfilt + 1)
# Filter edges, in Hz
filt_edge = melinv(melmin + dmelbw * numpy.arange(nfilt + 2, dtype='d'))

for whichfilt in range(0, nfilt):
# Filter triangles, in DFT points
leftfr = round(filt_edge[whichfilt] / dfreq)
centerfr = round(filt_edge[whichfilt + 1] / dfreq)
rightfr = round(filt_edge[whichfilt + 2] / dfreq)
# For some reason this is calculated in Hz, though I think
# it doesn't really matter
fwidth = (rightfr - leftfr) * dfreq
height = 2. / fwidth

if centerfr != leftfr:
leftslope = height / (centerfr - leftfr)
else:
leftslope = 0
freq = leftfr + 1
while freq < centerfr:
self.filters[freq,whichfilt] = (freq - leftfr) * leftslope
freq = freq + 1
if freq == centerfr: # This is always true
self.filters[freq,whichfilt] = height
freq = freq + 1
if centerfr != rightfr:
rightslope = height / (centerfr - rightfr)
while freq < rightfr:
self.filters[freq,whichfilt] = (freq - rightfr) * rightslope
freq = freq + 1
#             print("Filter %d: left %d=%f center %d=%f right %d=%f width %d" %
#                   (whichfilt,
#                   leftfr, leftfr*dfreq,
#                   centerfr, centerfr*dfreq,
#                   rightfr, rightfr*dfreq,
#                   freq - leftfr))
#             print self.filters[leftfr:rightfr,whichfilt]

# Build DCT matrix
self.s2dct = s2dctmat(nfilt, ncep, 1./nfilt)
self.dct = dctmat(nfilt, ncep, numpy.pi/nfilt)

def sig2s2mfc(self, sig):
nfr = int(len(sig) / self.fshift + 1)
mfcc = numpy.zeros((nfr, self.ncep), 'd')
fr = 0
while fr < nfr:
start = round(fr * self.fshift)
end = min(len(sig), start + self.wlen)
frame = sig[start:end]
if len(frame) < self.wlen:
frame = numpy.resize(frame,self.wlen)
frame[self.wlen:] = 0
mfcc[fr] = self.frame2s2mfc(frame)
fr = fr + 1
return mfcc

def sig2logspec(self, sig):
nfr = int(len(sig) / self.fshift + 1)
mfcc = numpy.zeros((nfr, self.nfilt), 'd')
fr = 0
while fr < nfr:
start = round(fr * self.fshift)
end = min(len(sig), start + self.wlen)
frame = sig[start:end]
if len(frame) < self.wlen:
frame = numpy.resize(frame,self.wlen)
frame[self.wlen:] = 0
mfcc[fr] = self.frame2logspec(frame)
fr = fr + 1
return mfcc

def pre_emphasis(self, frame):
# FIXME: Do this with matrix multiplication
outfr = numpy.empty(len(frame), 'd')
outfr[0] = frame[0] - self.alpha * self.prior
for i in range(1,len(frame)):
outfr[i] = frame[i] - self.alpha * frame[i-1]
self.prior = frame[-1]
return outfr

def frame2logspec(self, frame):
frame = self.pre_emphasis(frame) * self.win
fft = numpy.fft.rfft(frame, self.nfft)
# Square of absolute value
power = fft.real * fft.real + fft.imag * fft.imag
return numpy.log(numpy.dot(power, self.filters).clip(1e-5,numpy.inf))

def frame2s2mfc(self, frame):
logspec = self.frame2logspec(frame)
return numpy.dot(logspec, self.s2dct.T) / self.nfilt

def s2dctmat(nfilt,ncep,freqstep):
"""Return the 'legacy' not-quite-DCT matrix used by Sphinx"""
melcos = numpy.empty((ncep, nfilt), 'double')
for i in range(0,ncep):
freq = numpy.pi * float(i) / nfilt
melcos[i] = numpy.cos(freq * numpy.arange(0.5, float(nfilt)+0.5, 1.0, 'double'))
melcos[:,0] = melcos[:,0] * 0.5
return melcos

def logspec2s2mfc(logspec, ncep=13):
"""Convert log-power-spectrum bins to MFCC using the 'legacy'
Sphinx transform"""
nframes, nfilt = logspec.shape
melcos = s2dctmat(nfilt, ncep, 1./nfilt)
return numpy.dot(logspec, melcos.T) / nfilt

def dctmat(N,K,freqstep,orthogonalize=True):
"""Return the orthogonal DCT-II/DCT-III matrix of size NxK.
For computing or inverting MFCCs, N is the number of
log-power-spectrum bins while K is the number of cepstra.

"""
cosmat = numpy.zeros((N, K), 'double')
for n in range(0,N):
for k in range(0, K):
cosmat[n,k] = numpy.cos(freqstep * (n + 0.5) * k)
if orthogonalize:
cosmat[:,0] = cosmat[:,0] * 1./numpy.sqrt(2)
return cosmat

def dct(input, K=13):
"""Convert log-power-spectrum to MFCC using the orthogonal DCT-II"""
nframes, N = input.shape
freqstep = numpy.pi / N
cosmat = dctmat(N,K,freqstep)
return numpy.dot(input, cosmat) * numpy.sqrt(2.0 / N)

def dct2(input, K=13):
"""Convert log-power-spectrum to MFCC using the normalized DCT-II"""
nframes, N = input.shape
freqstep = numpy.pi / N
cosmat = dctmat(N,K,freqstep,False)
return numpy.dot(input, cosmat) * (2.0 / N)

def idct(input, K=40):
"""Convert MFCC to log-power-spectrum using the orthogonal DCT-III"""
nframes, N = input.shape
freqstep = numpy.pi / K
cosmat = dctmat(K,N,freqstep).T
return numpy.dot(input, cosmat) * numpy.sqrt(2.0 / K)

def dct3(input, K=40):
"""Convert MFCC to log-power-spectrum using the unnormalized DCT-III"""
nframes, N = input.shape
freqstep = numpy.pi / K
cosmat = dctmat(K,N,freqstep,False)
cosmat[:,0] = cosmat[:,0] * 0.5
return numpy.dot(input, cosmat.T)
``````

### Pulsar-V

PilgrimHui
2018/12/21
0
0

MFCC的python实现 1.对音频信号进行分割为帧 #coding=utf-8 对音频信号处理程序 张泽旺，2015-12-12 本程序主要有四个函数，它们分别是： audio2frame:将音频转换成帧矩阵 deframesignal:对每...

2015/12/15
10.6K
12

uwr44uouqcnsuqb60zk2
2017/12/13
0
0
Python算法设计总结篇

2014/07/06
962
2

AI科技大本营
03/12
0
0

【0918】正则介绍_grep

【0918】正则介绍_grep 9.1 正则介绍_grep上 9.2 grep中 9.3 grep下 一、正则介绍 正则是一串有规律的字符串，它使用单个字符串来描述或匹配一系列符合某个语法规则的字符串。 二、grep工具 ...

14分钟前
4
0

1. 网页加载速度更快 在网站中使用CDN技术最直接的一个好处就是它可以加快网页的加载速度。首先，CDN加速的内容分发是基于服务器缓存的，由于CDN中缓存了不少数据，它能够给用户提供更快的页...

52分钟前
8
0

symbiochina88

4
0

SOM-TL437xF是一款广州创龙基于TI AM437x ARM Cortex-A9 + Xilinx Spartan-6 FPGA芯片设计的核心板，采用沉金无铅工艺的10层板设计，适用于高速数据采集和处理系统、汽车导航、工业自动化等领...

Tronlong创龙

4
0

好程序员Java学习路线分享MyBatis之线程优化，我们的项目存在大量用户同时访问的情况，那么就会出现大量线程并发访问数据库，这样会带来线程同步问题，本章我们将讨论MyBatis的线程同步问...

6
0