文档章节

MP3解码之DCT快速算法的展开(旧)

暗之幻影
 暗之幻影
发布于 2015/01/04 15:29
字数 3933
阅读 41
收藏 0

     MP3解码的最后一步是“多相合成滤波”,多相合成滤波算法见ISO/IEC 11172-3 ANNEX_B Figure 3-A.2,经过5个步骤,将输入序列X[0..31]的32个采样值,变换为32个PCM样本输出:

// ①Shift 64 to 1024 FIFO
for i = 64 to 1023
    V[i] = V[i-64]

// ②Calculate 64 values V[i] by matrixing
for i = 0 to 63
    for k = 0 to 31
        V[i] += N[i][k] * X[k]
// 其中 N[i][K]=cos((16+i)*(2*k+1)*PI/64)

// ③Building a 512 values vector U
for i = 0 to 7
    for j = 0 to 31 {
        U[64*i+j] += V[128*i+j]
        U[64*i+32+j] += V[128*i+96+j]
    }

// ④Multiply U vector by D window
for j = 0 to 511
    W[i] = U[i] * D[i]
// 其中D[i] 见ISO/IEC 11172-3,Table 3-B.3.

// ⑤Calculating 32 Samples
for i = 0 to 31 {
    double Si = 0
    for j = 0 to 15
        Si += W[i + 32 * j]
    //Output PCM Sample: PCMi = (short)(Si * 32768)
}

     算法中第②步是DCT(32→64)运算,将输入序列的32个值X[k]变换为输出序列的64个值V[i],直接运算共64 * 32 = 2048次乘法

 

用32点DCT-II代替DCT(32→64)

    余弦函数具有周期性和对称性,所以由DCT(32→32)得到输出序列的32个值,可以直接得到 DCT(32→64) 的64个输出值,方算法如下:

void in32out64(double in[32], double out[64])
{
	int i;
	for (i = 0; i < 16; i++)
		out[i] = in[i+16];
	out[16] = 0.0;
	for (i = 17; i < 48; i++)
		out[i] = -in[48-i];
	for (i = 48; i < 64; i++)
		out[i] = -in[i-48];
}

 

根据标准中对N[i][k]的规定知DCT(32→32)是32点DCT-II。DCT-II的快速算法采用:

将N点DCT-II表示为两个N/2点DCT-II之和  N点DCT-II可以表示为N/2点DCT-II和N/2点DCT-IV之和,而DCT-IV输出序列中的一个值可以用DCT-II输出序列中两个连续的值之和表示。

将N/2点DCT再表示为两个N/4点DCT之和,一直分解下去

可以将上述两步推导过程一直递归地进行下去,直到N=2。限于博客长度限制,并且输入公式不方便,这儿不写出具体推导过程。

本文的DCT-II(32→32)快速算法的作者是Michael Hipp,开源程序mpg123使用的是该算法。

 

为了方便查表求余弦值,先初始化N[i][k]:

#define PI 3.141592654    // 原文http://lfp001.iteye.com/
void init_N()
{
    int i,k;
    for(i = 0; i < 32; i++)
        for(k = 0; k < 32; k++)
            N[i][k] = cos(i * (2 * k + 1) * PI / 64);
}

 

2点DCT

N'[i][k] = cos(i * (2 * k + 1) * PI / 4);
out[i] += in[k] * N'[i][k];
程序代码为:

void dct2(double in[2], double out[2])
{
    int i,k;
    for(i = 0; i < 2; i++) {
        out[i] = 0;
        for(k = 0; k < 2; k++)
	out[i] += in[k] * N[16 * i][k];
    }
}


2点DCT快速算法直接展开运算,这3项N[i][k]不用查表:
N[0][k]=1,N[16][0]=cos(PI/4), N[16][1]=cos(3PI/4)=-cos(PI/4)
out[0] = in[0] + in[1];
out[1] = in[0] * N[16][0] + in[1] * N[16][1] = (in[0] - in[1]) * 0.7071067811866;

其算法如下:

void fast_dct2(double in[2], double out[2])
{
    out[0] = in[0] + in[1];
    out[1] = (in[0] - in[1]) * 0.7071067811866;
}

 

4点DCT
N'[i][k] = cos(i * (2 * k + 1) * PI / 8);
out[i] += in[k] * N'[i][k];

void dct4(double in[4], double out[4])
{
	int i,k;
	for(i = 0; i < 4; i++) {
		out[i] = 0;
		for(k = 0; k < 4; k++)
			out[i] += in[k] * N[8 * i][k];
	}
}

 

4点DCT快速算法: 分解为两个2点DCT

void fast_dct4(double in[4], double out[4])
{
	int i;
	double even_in[2], even_out[2];
	double odd_in[2], odd_out[2];

	for(i = 0; i < 2; i++) {
		even_in[i] = in[i] + in[3 - i];
		odd_in[i] = (in[i] - in[3 - i]) / (2 * N[8][i]);
	}

	fast_dct2(even_in, even_out);
	fast_dct2(odd_in, odd_out);

	out[0] = even_out[0];
	out[1] = odd_out[0] + odd_out[1];
	
	out[2] = even_out[1];
	out[3] = odd_out[1];
}

 

8点DCT
N'[i][k] = cos(i * (2 * k + 1) * PI / 16);
out[i] += in[k] * N'[i][k];

void dct8(double in[8], double out[8])
{
	int i,k;
	for(i = 0; i < 8; i++) {
		out[i] = 0;
		for(k = 0; k < 8; k++)
			out[i] += in[k] * N[4 * i][k];
	}
}

8点DCT快速算法: 分解为两个4点DCT,4点DCT用快速算法

N[4][i]=cos((2*i+1)/16)  i=0,1,2,3

void fast_dct8(double in[8], double out[8])
{
	int i;
	double even_in[4], even_out[4];
	double odd_in[4], odd_out[4];

	for(i = 0; i < 4; i++) {
		even_in[i] = in[i] + in[7 - i];
		odd_in[i] = (in[i] - in[7 - i]) / (2 * N[4][i]);
	}

	fast_dct4(even_in, even_out);	//直接产生out[0..7]的偶数项
	fast_dct4(odd_in, odd_out);		//间接产生out[0..7]的奇数项

	for (i = 0; i < 3; i++) {
		out[2*i] = even_out[i];
		out[2*i+1] = odd_out[i] + odd_out[i+1];	//!
	}
	out[6] = even_out[3];
	out[7] = odd_out[3];
}

 

16点DCT
N'[i][k] = cos(i * (2 * k + 1) * PI / 32);
out[i] += in[k] * N'[i][k];

void dct16(double in[16], double out[16])
{
	int i,k;
	for(i = 0; i < 16; i++) {
		out[i] = 0;
		for(k = 0; k < 16; k++)
			out[i] += in[k] * N[2 * i][k];
	}
}

16点DCT快速算法: 分解为两个8点DCT,8点DCT用快速算法

N[2][i]=cos((2*i+1)/32)  i=0,1,...7

void fast_dct16(double in[16], double out[16])
{
	int i;
	double even_in[8], even_out[8];
	double odd_in[8], odd_out[8];

	for(i = 0; i < 8; i++) {
		even_in[i] = in[i] + in[15 - i];
		odd_in[i] = (in[i] - in[15 - i]) / (2 * N[2][i]);  // 计算误差来源于此?
	}

	fast_dct8(even_in, even_out);    // 直接产生out[0..15]的偶数项
	fast_dct8(odd_in, odd_out);       // 间接产生out[0..15]的奇数项

	for (i = 0; i < 7; i++) {
		out[2*i] = even_out[i];
		out[2*i+1] = odd_out[i] + odd_out[i+1];	  //!
	}
	out[14] = even_out[7];
	out[15] = odd_out[7];
}

 

32点DCT
N[i][k] = cos(i * (2 * k + 1) * PI / 64);
out[i] += in[k] * N[i][k];

void dct32(double in[32], double out[32])
{
	int i,k;
	for(i = 0; i < 32; i++) {
		out[i] = 0;
		for(k = 0; k < 32; k++)
			out[i] += in[k] * N[i][k];
	}
}

32点DCT快速算法: 分解为两个16点DCT,用16点DCT采用快速算法.共16+2*32=80次乘法

N[1][i]=cos((2*i+1)/64)  i=0,1,...15

void fast_dct32(double in[32], double out[32])
{
	int i;
	double even_in[16], even_out[16];
	double odd_in[16], odd_out[16];

	for(i = 0; i < 16; i++) {
		even_in[i] = in[i] + in[31 - i];
		odd_in[i] = (in[i] - in[31 - i]) / (2 * N[1][i]);
	}

	fast_dct16(even_in, even_out);	//直接产生out[0..31]的偶数项
	fast_dct16(odd_in, odd_out);		//间接产生out[0..31]的奇数项

	for (i = 0; i < 15; i++) {
		out[2*i] = even_out[i];
		out[2*i+1] = odd_out[i] + odd_out[i+1];	//!
	}
	out[30] = even_out[15];
	out[31] = odd_out[15];
}

 

各点DCT快速算法用到的N[i][j]:
DCT32: N[1][i]=cos((2*i+1)/64)  i=0..15
DCT16: N[2][i]=cos((2*i+1)/32)  i=0..7
DCT8:   N[4][i]=cos((2*i+1)/16)  i=0..3
DCT4:   N[8][i]=cos((2*i+1)/8)    i=0,1
DCT2:   cos(PI/4) = 0.7071067811866
快速算法只需事先存储16+8+4+2+1=31个值到N[0..30],取代N[32][32]。

 

DCT-II(32→64)的快速算法的展开算法

      解码一帧双声道的MP3,共要调用ISO/IEC 11172-3 ANNEX_B Figure 3-A.2第二步给出的矩阵运算2*2*18=72次,进行一次矩阵运算要进行浮点乘法是2048次,采用快速算法降低到80次,快速算法很完美。为什么还要将快速算法展开呢?由于矩阵运算调用频度极高,是影响解码速度的关键模块。分析以上快速算法函数可以看出两个特点:使用迭代和函数内大量的循环语句,应用迭代这种规律使展开成为可能,使用展开方法可以去掉中间各点DCT快速算法用到的循环语句,所以可以使矩阵运算的速度进一步提高。通过我对比实测用展开方法解码速度提升10%以上。下面给出的DCT-II(32->64)展开的快速算法是从我前几年写的MP3解码程序中直接COPY过来的,有比较详细的注解,对比上文的各点DCT的快速算法函数,很容易看懂。    【原文http://lfp001.iteye.com/

void dct32to64(double in32[32], double out64[64])
{
	double in0,in1,in2,in3,in4,in5,in6,in7,in8,in9,in10,in11,in12,in13,in14,in15;
	double out0,out1,out2,out3,out4,out5,out6,out7,out8,out9,out10,out11,out12,out13,out14,out15;
	double d8_0,d8_1,d8_2,d8_3,d8_4,d8_5,d8_6,d8_7;
	double ein0, ein1, oin0, oin1;

	//>>>>>>>>>>>>>>>> [http://lfp001.iteye.com/admin/blogs/737777]
	// 用DCT16计算DCT32输出[0..31]的偶数下标元素
	in0 = in32[0] + in32[31];
	in1 = in32[1] + in32[30];
	in2 = in32[2] + in32[29];
	in3 = in32[3] + in32[28];
	in4 = in32[4] + in32[27];
	in5 = in32[5] + in32[26];
	in6 = in32[6] + in32[25];
	in7 = in32[7] + in32[24];
	in8 = in32[8] + in32[23];
	in9 = in32[9] + in32[22];
	in10 = in32[10] + in32[21];
	in11 = in32[11] + in32[20];
	in12 = in32[12] + in32[19];
	in13 = in32[13] + in32[18];
	in14 = in32[14] + in32[17];
	in15 = in32[15] + in32[16];

	//DCT16
	{
		//>>>>>>>> 用DCT8计算DCT16输出[0..15]的偶数下标元素
		d8_0 = in0 + in15;
		d8_1 = in1 + in14;
		d8_2 = in2 + in13;
		d8_3 = in3 + in12;
		d8_4 = in4 + in11;
		d8_5 = in5 + in10;
		d8_6 = in6 + in9;
		d8_7 = in7 + in8;

		//DCT8. 加(减)法29,乘法12次
		{
			//>>>>e 用DCT4计算DCT8的输出[0..7]的偶数下标元素
			out1 = d8_0 + d8_7;
			out3 = d8_1 + d8_6;
			out5 = d8_2 + d8_5;
			out7 = d8_3 + d8_4;

			//>>e DCT2
			ein0 = out1 + out7;
			ein1 = out3 + out5;
			out64[48] =  -ein0 - ein1;
			out64[0] = (ein0 - ein1) * 0.70710678118654752;	// 0.5/cos(PI/4)

			//>>o DCT2
			oin0 = (out1 - out7) * 0.54119610014619698;		// 0.5/cos( PI/8)
			oin1 = (out3 - out5) * 1.30656296487637653;		// 0.5/cos(3PI/8)

			out2 =  oin0 + oin1;
			out12 = (oin0 - oin1) * 0.70710678118654752;	// cos(PI/4)

			out64[40] = out64[56] = -out2 - out12;
			out64[8] = out12;
			//<<<<e 完成计算DCT8的输出[0..7]的偶数下标元素

			//>>>>o 用DCT4计算DCT8的输出[0..7]的奇数下标元素
			//o DCT4 part1
			out1 = (d8_0 - d8_7) * 0.50979557910415917;		// 0.5/cos( PI/16)
			out3 = (d8_1 - d8_6) * 0.60134488693504528;		// 0.5/cos(3PI/16)
			out5 = (d8_2 - d8_5) * 0.89997622313641570;		// 0.5/cos(5PI/16)
			out7 = (d8_3 - d8_4) * 2.56291544774150618;		// 0.5/cos(7PI/16)

			//o DCT4 part2

			//e DCT2 part1
			ein0 = out1 + out7;
			ein1 = out3 + out5;

			//o DCT2 part1
			oin0 = (out1 - out7) * 0.54119610014619698;	// 0.5/cos(PI/8)
			oin1 = (out3 - out5) * 1.30656296487637653;	// 0.5/cos(3PI/8)

			//e DCT2 part2
			out1 =  ein0 + ein1;
			out5 = (ein0 - ein1) * 0.70710678118654752;	// cos(PI/4)

			//o DCT2 part2
			out3 = oin0 + oin1;
			out7 = (oin0 - oin1) * 0.70710678118654752;	// cos(PI/4)
			out3 += out7;

			//o DCT4 part3
			out64[44] = out64[52] = -out1 - out3;	//out1+=out3
			out64[36] = out64[60] = -out3 - out5;	//out3+=out5
			out64[4] = out5 + out7;					//out5+=out7
			out64[12] = out7;
			//<<<<o 完成计算DCT8的输出[0..7]的奇数下标元素
		}
		//<<<<<<<< 完成计算DCT16输出[0..15]的偶数下标元素

		//-------------------------------------------------------------------------

		//>>>>>>>> 用DCT8计算DCT16输出[0..15]的奇数下标元素
		d8_0 = (in0 - in15) * 0.50241928618815571;	// 0.5/cos( 1 * PI/32)
		d8_1 = (in1 - in14) * 0.52249861493968888;	// 0.5/cos( 3 * PI/32)
		d8_2 = (in2 - in13) * 0.56694403481635770;	// 0.5/cos( 5 * PI/32)
		d8_3 = (in3 - in12) * 0.64682178335999013;	// 0.5/cos( 7 * PI/32)
		d8_4 = (in4 - in11) * 0.78815462345125022;	// 0.5/cos( 9 * PI/32)
		d8_5 = (in5 - in10) * 1.06067768599034747;	// 0.5/cos(11 * PI/32)
		d8_6 = (in6 - in9) * 1.72244709823833393;	// 0.5/cos(13 * PI/32)
		d8_7 = (in7 - in8) * 5.10114861868916386;	// 0.5/cos(15 * PI/32)

		//DCT8
		{
			//>>>>e 用DCT4计算DCT8的输出[0..7]的偶数下标元素.
			out3 = d8_0 + d8_7;
			out7 = d8_1 + d8_6;
			out11 = d8_2 + d8_5;
			out15 = d8_3 + d8_4;

			//>>e DCT2
			ein0 = out3 + out15;
			ein1 = out7 + out11;
			out1 =  ein0 + ein1;
			out9 = (ein0 - ein1) * 0.70710678118654752;		// 0.5/cos(PI/4)

			//>>o DCT2
			oin0 = (out3 - out15) * 0.54119610014619698;	// 0.5/cos( PI/8)
			oin1 = (out7 - out11) * 1.30656296487637653;	// 0.5/cos(3PI/8)

			out5 =  oin0 + oin1;
			out13 = (oin0 - oin1) * 0.70710678118654752;	// cos(PI/4)

			out5 += out13;
			//<<<<e 完成计算DCT8的输出[0..7]的偶数下标元素

			//>>>>o 用DCT4计算DCT8的输出[0..7]的奇数下标元素
			//o DCT4 part1
			out3 = (d8_0 - d8_7) * 0.50979557910415917;		// 0.5/cos( PI/16)
			out7 = (d8_1 - d8_6) * 0.60134488693504528;		// 0.5/cos(3PI/16)
			out11 = (d8_2 - d8_5) * 0.89997622313641570;	// 0.5/cos(5PI/16)
			out15 = (d8_3 - d8_4) * 2.56291544774150618;	// 0.5/cos(7PI/16)

			//o DCT4 part2

			//e DCT2 part1
			ein0 = out3 + out15;
			ein1 = out7 + out11;

			//o DCT2 part1
			oin0 = (out3 - out15) * 0.54119610014619698;	// 0.5/cos(PI/8)
			oin1 = (out7 - out11) * 1.30656296487637653;	// 0.5/cos(3PI/8)

			//e DCT2 part2
			out3 =  ein0 + ein1;
			out11 = (ein0 - ein1) * 0.70710678118654752;	// cos(PI/4)

			//o DCT2 part2
			out7 = oin0 + oin1;
			out15 = (oin0 - oin1) * 0.70710678118654752;	// cos(PI/4)
			out7 += out15;

			//o DCT4 part3
			out3 += out7;
			out7 += out11;
			out11 += out15;
			//<<<<o 完成计算DCT8的输出[0..7]的奇数下标元素
		}

		out64[46] = out64[50] = -out1 - out3;	//out1 += out3
		out64[42] = out64[54] = -out3 - out5;	//out3 += out5
		out64[38] = out64[58] = -out5 - out7;	//out5 += out7
		out64[34] = out64[62] = -out7 - out9;	//out7 += out9
		out64[2] = out9 + out11;				//out9 += out11
		out64[6] = out11 + out13;				//out11 += out13
		out64[10] = out13 + out15;				//out13 += out15
		//<<<<<<<< 完成计算DCT16输出[0..15]的奇数下标元素
	}
	out64[14] = out15;	//out64[14]=out32[30]
	//<<<<<<<<<<<<<<<<
	// 完成计算DCT32输出[0..31]的偶数下标元素

//=============================================================================

	//>>>>>>>>>>>>>>>> [http://lfp001.iteye.com/admin/blogs/737777]
	// 用DCT16计算计算DCT32输出[0..31]的奇数下标元素
	in0 = (in32[0] - in32[31]) * 0.50060299823519630;	// 0.5/cos( 1 * PI/64)
	in1 = (in32[1] - in32[30]) * 0.50547095989754366;	// 0.5/cos( 3 * PI/64)
	in2 = (in32[2] - in32[29]) * 0.51544730992262455;	// 0.5/cos( 5 * PI/64)
	in3 = (in32[3] - in32[28]) * 0.53104259108978417;	// 0.5/cos( 7 * PI/64)
	in4 = (in32[4] - in32[27]) * 0.55310389603444453;	// 0.5/cos( 9 * PI/64)
	in5 = (in32[5] - in32[26]) * 0.58293496820613387;	// 0.5/cos(11 * PI/64)
	in6 = (in32[6] - in32[25]) * 0.62250412303566482;	// 0.5/cos(13 * PI/64)
	in7 = (in32[7] - in32[24]) * 0.67480834145500575;	// 0.5/cos(15 * PI/64)
	in8 = (in32[8] - in32[23]) * 0.74453627100229845;	// 0.5/cos(17 * PI/64)
	in9 = (in32[9] - in32[22]) * 0.83934964541552704;	// 0.5/cos(19 * PI/64)
	in10 = (in32[10] - in32[21]) * 0.97256823786196069;	// 0.5/cos(21 * PI/64)
	in11 = (in32[11] - in32[20]) * 1.16943993343288495;	// 0.5/cos(23 * PI/64)
	in12 = (in32[12] - in32[19]) * 1.48416461631416628;	// 0.5/cos(25 * PI/64)
	in13 = (in32[13] - in32[18]) * 2.05778100995341155;	// 0.5/cos(27 * PI/64)
	in14 = (in32[14] - in32[17]) * 3.40760841846871879;	// 0.5/cos(29 * PI/64)
	in15 = (in32[15] - in32[16]) * 10.1900081235480568;	// 0.5/cos(31 * PI/64)

	//DCT16
	{
		//>>>>>>>> 用DCT8计算DCT16输出[0..15]的偶数下标元素:
		d8_0 = in0 + in15;
		d8_1 = in1 + in14;
		d8_2 = in2 + in13;
		d8_3 = in3 + in12;
		d8_4 = in4 + in11;
		d8_5 = in5 + in10;
		d8_6 = in6 + in9;
		d8_7 = in7 + in8;

		//DCT8
		{
			//>>>>e 用DCT4计算DCT8的输出[0..7]的偶数下标元素
			out1 = d8_0 + d8_7;
			out3 = d8_1 + d8_6;
			out5 = d8_2 + d8_5;
			out7 = d8_3 + d8_4;

			//>>e DCT2
			ein0 = out1 + out7;
			ein1 = out3 + out5;
			out0 =  ein0 + ein1;
			out8 = (ein0 - ein1) * 0.70710678118654752;		// 0.5/cos(PI/4)

			//>>o DCT2
			oin0 = (out1 - out7) * 0.54119610014619698;		// 0.5/cos( PI/8)
			oin1 = (out3 - out5) * 1.30656296487637653;		// 0.5/cos(3PI/8)

			out4 =  oin0 + oin1;
			out12 = (oin0 - oin1) * 0.70710678118654752;	// cos(PI/4)

			out4 += out12;
			//<<<<e 完成计算DCT8的输出[0..7]的偶数下标元素

			//>>>>o 用DCT4计算DCT8的输出[0..7]的奇数下标元素
			//o DCT4 part1
			out1 = (d8_0 - d8_7) * 0.50979557910415917;		// 0.5/cos( PI/16)
			out3 = (d8_1 - d8_6) * 0.60134488693504528;		// 0.5/cos(3PI/16)
			out5 = (d8_2 - d8_5) * 0.89997622313641570;		// 0.5/cos(5PI/16)
			out7 = (d8_3 - d8_4) * 2.56291544774150618;		// 0.5/cos(7PI/16)

			//o DCT4 part2

			//e DCT2 part1
			ein0 = out1 + out7;
			ein1 = out3 + out5;

			//o DCT2 part1
			oin0 = (out1 - out7) * 0.54119610014619698;		// 0.5/cos(PI/8)
			oin1 = (out3 - out5) * 1.30656296487637653;		// 0.5/cos(3PI/8)

			//e DCT2 part2
			out2 =  ein0 + ein1;
			out10 = (ein0 - ein1) * 0.70710678118654752;	// cos(PI/4)

			//o DCT2 part2
			out6 = oin0 + oin1;
			out14 = (oin0 - oin1) * 0.70710678118654752;
			out6 += out14;

			//o DCT4 part3
			out2 += out6;
			out6 += out10;
			out10 += out14;
			//<<<<o 完成计算DCT8的输出[0..7]的奇数下标元素
		}
		//<<<<<<<< 完成计算DCT16输出[0..15]的偶数下标元素

		//-------------------------------------------------------------------------

		//>>>>>>>> 用DCT8计算DCT16输出[0..15]的奇数下标元素
		d8_0 = (in0 - in15) * 0.50241928618815571;	// 0.5/cos( 1 * PI/32)
		d8_1 = (in1 - in14) * 0.52249861493968888;	// 0.5/cos( 3 * PI/32)
		d8_2 = (in2 - in13) * 0.56694403481635770;	// 0.5/cos( 5 * PI/32)
		d8_3 = (in3 - in12) * 0.64682178335999013;	// 0.5/cos( 7 * PI/32)
		d8_4 = (in4 - in11) * 0.78815462345125022;	// 0.5/cos( 9 * PI/32)
		d8_5 = (in5 - in10) * 1.06067768599034747;	// 0.5/cos(11 * PI/32)
		d8_6 = (in6 - in9) * 1.72244709823833393;	// 0.5/cos(13 * PI/32)
		d8_7 = (in7 - in8) * 5.10114861868916386;	// 0.5/cos(15 * PI/32)

		//DCT8
		{
			//>>>>e 用DCT4计算DCT8的输出[0..7]的偶数下标元素.
			out1 = d8_0 + d8_7;
			out3 = d8_1 + d8_6;
			out5 = d8_2 + d8_5;
			out7 = d8_3 + d8_4;

			//>>e DCT2
			ein0 = out1 + out7;
			ein1 = out3 + out5;
			in0 =  ein0 + ein1;	//out0->in0,out4->in4
			in4 = (ein0 - ein1) * 0.70710678118654752;	// 0.5/cos(PI/4)

			//>>o DCT2
			oin0 = (out1 - out7) * 0.54119610014619698;	// 0.5/cos( PI/8)
			oin1 = (out3 - out5) * 1.30656296487637653;	// 0.5/cos(3PI/8)

			in2 =  oin0 + oin1;	//out2->in2,out6->in6
			in6 = (oin0 - oin1) * 0.70710678118654752;	// cos(PI/4)

			in2 += in6;
			//<<<<e 完成计算DCT8的输出[0..7]的偶数下标元素

			//>>>>o 用DCT4计算DCT8的输出[0..7]的奇数下标元素
			//o DCT4 part1
			out1 = (d8_0 - d8_7) * 0.50979557910415917;	// 0.5/cos( PI/16)
			out3 = (d8_1 - d8_6) * 0.60134488693504528;	// 0.5/cos(3PI/16)
			out5 = (d8_2 - d8_5) * 0.89997622313641570;	// 0.5/cos(5PI/16)
			out7 = (d8_3 - d8_4) * 2.56291544774150618;	// 0.5/cos(7PI/16)

			//o DCT4 part2

			//e DCT2 part1
			ein0 = out1 + out7;
			ein1 = out3 + out5;

			//o DCT2 part1
			oin0 = (out1 - out7) * 0.54119610014619698;	// 0.5/cos(PI/8)
			oin1 = (out3 - out5) * 1.30656296487637653;	// 0.5/cos(3PI/8)

			//e DCT2 part2
			out1 =  ein0 + ein1;
			out5 = (ein0 - ein1) * 0.70710678118654752;	// cos(PI/4)

			//o DCT2 part2
			out3 = oin0 + oin1;
			out15 = (oin0 - oin1) * 0.70710678118654752;
			out3 += out15;

			//o DCT4 part3
			out1 += out3;
			out3 += out5;
			out5 += out15;
			//<<<<o 完成计算DCT8的输出[0..7]的奇数下标元素
		}
								//out15=out7
		out13 = in6 + out15;	//out13=out6+ou7
		out11 = out5 + in6;		//out11=out5+ou6
		out9 = in4 + out5;		//out9 =out4+ou5
		out7 = out3 + in4;		//out7 =out3+ou4
		out5 = in2 + out3;		//out5 =out2+ou3
		out3 = out1 + in2;		//out3 =out1+ou2
		out1 += in0;			//out1 =out0+ou1
		//<<<<<<<< 完成计算DCT16输出[0..15]的奇数下标元素
	}

	//out32[i]=out[i]+out[i+1]; out32[31]=out[15]
	out64[47] = out64[49] = -out0 - out1;
	out64[45] = out64[51] = -out1 - out2;
	out64[43] = out64[53] = -out2 - out3;
	out64[41] = out64[55] = -out3 - out4;
	out64[39] = out64[57] = -out4 - out5;
	out64[37] = out64[59] = -out5 - out6;
	out64[35] = out64[61] = -out6 - out7;
	out64[33] = out64[63] = -out7 - out8;
	out64[1] = out8 + out9;
	out64[3] = out9 + out10;
	out64[5] = out10 + out11;
	out64[7] = out11 + out12;
	out64[9] = out12 + out13;
	out64[11] = out13 + out14;
	out64[13] = out14 + out15;
	out64[15] = out15;
	//<<<<<<<<<<<<<<<<

	out64[16] = 0;

	out64[17] = -out64[15];
	out64[18] = -out64[14];
	out64[19] = -out64[13];
	out64[20] = -out64[12];
	out64[21] = -out64[11];
	out64[22] = -out64[10];
	out64[23] = -out64[9];
	out64[24] = -out64[8];
	out64[25] = -out64[7];
	out64[26] = -out64[6];
	out64[27] = -out64[5];
	out64[28] = -out64[4];
	out64[29] = -out64[3];
	out64[30] = -out64[2];
	out64[31] = -out64[1];
	out64[32] = -out64[0];
}

 

未经许可,请勿转载。http://lfp001.iteye.com/

本文转载自:http://lfp001.iteye.com/blog/737777

暗之幻影
粉丝 20
博文 377
码字总数 71245
作品 0
南京
高级程序员
私信 提问
x264源代码简单分析:宏块编码(Encode)部分

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/leixiaohua1020/article/details/45938927 ===================================================== H.264源代......

雷霄骅
2015/05/24
0
0
SSE图像算法优化系列二十一:基于DCT变换图像去噪算法的进一步优化(100W像素30ms)。

  在优化IPOL网站中基于DCT(离散余弦变换)的图像去噪算法(附源代码) 一文中,我们曾经优化过基于DCT变换的图像去噪算法,在那文所提供的Demo中,处理一副1000*1000左右的灰度噪音图像耗...

Imageshop
2018/09/04
0
0
数据压缩实验五 JPEG原理分析及JPEG解码器的调试

一、实验原理 1、JPEG图像压缩标准基本介绍 JPEG 是Joint Photographic Experts Group(联合图像专家小组)的缩写,是第一个国际图像压缩标准。JPEG图像压缩算法能够在提供良好的压缩性能的同...

zgyggy
2017/05/23
0
0
开源mp3解码库--libmad

libmad是一个开源mp3解码库,其对mp3解码算法做了很多优化,性能较好,很多播放器如mplayer、xmms等都是使用这个开源库进行解码的;如果要设计mp3播放器而又不想研究mp3解码算法的话,libma...

匿名
2011/10/23
8.9K
0
从零开始手敲次世代游戏引擎(JPEG特别篇)-2

接从零开始手敲次世代游戏引擎(JPEG特别篇)-1。 (首先说一下,这篇会非常难看。但是如果动手去做了,会对各项能力有极大的提高) 我们现在有了核心的算法库,接下来就是实际读取JPEG文件并...

陈文礼
2017/12/09
0
0

没有更多内容

加载失败,请刷新页面

加载更多

SpringBoot2单元测试

引入maven依赖 引入powermock是为了解决静态方法mock的问题。 <dependency> <groupId>org.powermock</groupId> <artifactId>powermock-module-junit4</artifactId> <version>2.......

一刀
8分钟前
0
0
webpack优化公司内部分享ppt小记

Webpack整个的构建过程是基于事件流 Webpack 就像一条生产线,要经过一系列处理流程后才能将源文件转换成输出结果。 这条生产线上的每个处理流程的职责都是单一的,多个流程之间有存在依赖关...

莫西摩西
9分钟前
0
0
DevOps落地实践,BAT系列,敏捷看板

DevOps 自 2009 年诞生以来,至今整整过去了十年,从最初的摸索,逐步变成一种主流的软件开发交付模式。BAT在2014年左右,甚至更早的时候,内部的DevOps系统就已经差不多成型了,比如腾讯的织...

cs平台
13分钟前
0
0
Stylus 简单应用

1、安装 cnpm install stylus 2、基本语法 stylus中文版参考文档 3、命令行下编译 styl 文件,生成压缩的css文件 编译src目录下styl文件 stylus --compress /src 更多命令可以执行查找 styl...

微笑吔彷徨
15分钟前
0
0
Java正则表达式大全

1、匹配中文:[\u4e00-\u9fa5] 2、英文字母:[a-zA-Z] 3、数字:[0-9] 4、匹配中文,英文字母和数字及下划线:^[\u4e00-\u9fa5_a-zA-Z0-9]+$ 同时判断输入长度: [\u4e00-\u9fa5_a-zA-Z0-9_]{4...

叶湘伦
23分钟前
1
0

没有更多内容

加载失败,请刷新页面

加载更多

返回顶部
顶部