## MP3解码之DCT快速算法的展开(旧) 转

暗之幻影

MP3解码的最后一步是“多相合成滤波”，多相合成滤波算法见ISO/IEC 11172-3 ANNEX_B Figure 3-A.2，经过5个步骤，将输入序列X[0..31]的32个采样值，变换为32个PCM样本输出：

``````// ①Shift 64 to 1024 FIFO
for i = 64 to 1023
V[i] = V[i-64]

// ②Calculate 64 values V[i] by matrixing
for i = 0 to 63
for k = 0 to 31
V[i] += N[i][k] * X[k]
// 其中 N[i][K]=cos((16+i)*(2*k+1)*PI/64)

// ③Building a 512 values vector U
for i = 0 to 7
for j = 0 to 31 {
U[64*i+j] += V[128*i+j]
U[64*i+32+j] += V[128*i+96+j]
}

// ④Multiply U vector by D window
for j = 0 to 511
W[i] = U[i] * D[i]
// 其中D[i] 见ISO/IEC 11172-3，Table 3-B.3.

// ⑤Calculating 32 Samples
for i = 0 to 31 {
double Si = 0
for j = 0 to 15
Si += W[i + 32 * j]
//Output PCM Sample: PCMi = (short)(Si * 32768)
}``````

算法中第②步是DCT(32→64)运算，将输入序列的32个值X[k]变换为输出序列的64个值V[i]，直接运算共64 * 32 = 2048次乘法

余弦函数具有周期性和对称性，所以由DCT(32→32)得到输出序列的32个值，可以直接得到 DCT(32→64) 的64个输出值，方算法如下：

``````void in32out64(double in[32], double out[64])
{
int i;
for (i = 0; i < 16; i++)
out[i] = in[i+16];
out[16] = 0.0;
for (i = 17; i < 48; i++)
out[i] = -in[48-i];
for (i = 48; i < 64; i++)
out[i] = -in[i-48];
}``````

``````#define PI 3.141592654    // 原文http://lfp001.iteye.com/
void init_N()
{
int i,k;
for(i = 0; i < 32; i++)
for(k = 0; k < 32; k++)
N[i][k] = cos(i * (2 * k + 1) * PI / 64);
}``````

2点DCT

N'[i][k] = cos(i * (2 * k + 1) * PI / 4);
out[i] += in[k] * N'[i][k];

``````void dct2(double in[2], double out[2])
{
int i,k;
for(i = 0; i < 2; i++) {
out[i] = 0;
for(k = 0; k < 2; k++)
out[i] += in[k] * N[16 * i][k];
}
}``````

2点DCT快速算法直接展开运算，这3项N[i][k]不用查表：
N[0][k]=1,N[16][0]=cos(PI/4), N[16][1]=cos(3PI/4)=-cos(PI/4)
out[0] = in[0] + in[1];
out[1] = in[0] * N[16][0] + in[1] * N[16][1] = (in[0] - in[1]) * 0.7071067811866;

``````void fast_dct2(double in[2], double out[2])
{
out[0] = in[0] + in[1];
out[1] = (in[0] - in[1]) * 0.7071067811866;
}``````

4点DCT
N'[i][k] = cos(i * (2 * k + 1) * PI / 8);
out[i] += in[k] * N'[i][k];

``````void dct4(double in[4], double out[4])
{
int i,k;
for(i = 0; i < 4; i++) {
out[i] = 0;
for(k = 0; k < 4; k++)
out[i] += in[k] * N[8 * i][k];
}
}``````

4点DCT快速算法: 分解为两个2点DCT

``````void fast_dct4(double in[4], double out[4])
{
int i;
double even_in[2], even_out[2];
double odd_in[2], odd_out[2];

for(i = 0; i < 2; i++) {
even_in[i] = in[i] + in[3 - i];
odd_in[i] = (in[i] - in[3 - i]) / (2 * N[8][i]);
}

fast_dct2(even_in, even_out);
fast_dct2(odd_in, odd_out);

out[0] = even_out[0];
out[1] = odd_out[0] + odd_out[1];

out[2] = even_out[1];
out[3] = odd_out[1];
}``````

8点DCT
N'[i][k] = cos(i * (2 * k + 1) * PI / 16);
out[i] += in[k] * N'[i][k];

``````void dct8(double in[8], double out[8])
{
int i,k;
for(i = 0; i < 8; i++) {
out[i] = 0;
for(k = 0; k < 8; k++)
out[i] += in[k] * N[4 * i][k];
}
}``````

8点DCT快速算法: 分解为两个4点DCT,4点DCT用快速算法

N[4][i]=cos((2*i+1)/16)  i=0,1,2,3

``````void fast_dct8(double in[8], double out[8])
{
int i;
double even_in[4], even_out[4];
double odd_in[4], odd_out[4];

for(i = 0; i < 4; i++) {
even_in[i] = in[i] + in[7 - i];
odd_in[i] = (in[i] - in[7 - i]) / (2 * N[4][i]);
}

fast_dct4(even_in, even_out);	//直接产生out[0..7]的偶数项
fast_dct4(odd_in, odd_out);		//间接产生out[0..7]的奇数项

for (i = 0; i < 3; i++) {
out[2*i] = even_out[i];
out[2*i+1] = odd_out[i] + odd_out[i+1];	//!
}
out[6] = even_out[3];
out[7] = odd_out[3];
}``````

16点DCT
N'[i][k] = cos(i * (2 * k + 1) * PI / 32);
out[i] += in[k] * N'[i][k];

``````void dct16(double in[16], double out[16])
{
int i,k;
for(i = 0; i < 16; i++) {
out[i] = 0;
for(k = 0; k < 16; k++)
out[i] += in[k] * N[2 * i][k];
}
}``````

16点DCT快速算法: 分解为两个8点DCT,8点DCT用快速算法

N[2][i]=cos((2*i+1)/32)  i=0,1,...7

``````void fast_dct16(double in[16], double out[16])
{
int i;
double even_in[8], even_out[8];
double odd_in[8], odd_out[8];

for(i = 0; i < 8; i++) {
even_in[i] = in[i] + in[15 - i];
odd_in[i] = (in[i] - in[15 - i]) / (2 * N[2][i]);  // 计算误差来源于此?
}

fast_dct8(even_in, even_out);    // 直接产生out[0..15]的偶数项
fast_dct8(odd_in, odd_out);       // 间接产生out[0..15]的奇数项

for (i = 0; i < 7; i++) {
out[2*i] = even_out[i];
out[2*i+1] = odd_out[i] + odd_out[i+1];	  //!
}
out[14] = even_out[7];
out[15] = odd_out[7];
}``````

32点DCT
N[i][k] = cos(i * (2 * k + 1) * PI / 64);
out[i] += in[k] * N[i][k];

``````void dct32(double in[32], double out[32])
{
int i,k;
for(i = 0; i < 32; i++) {
out[i] = 0;
for(k = 0; k < 32; k++)
out[i] += in[k] * N[i][k];
}
}``````

32点DCT快速算法: 分解为两个16点DCT,用16点DCT采用快速算法.共16+2*32=80次乘法

N[1][i]=cos((2*i+1)/64)  i=0,1,...15

``````void fast_dct32(double in[32], double out[32])
{
int i;
double even_in[16], even_out[16];
double odd_in[16], odd_out[16];

for(i = 0; i < 16; i++) {
even_in[i] = in[i] + in[31 - i];
odd_in[i] = (in[i] - in[31 - i]) / (2 * N[1][i]);
}

fast_dct16(even_in, even_out);	//直接产生out[0..31]的偶数项
fast_dct16(odd_in, odd_out);		//间接产生out[0..31]的奇数项

for (i = 0; i < 15; i++) {
out[2*i] = even_out[i];
out[2*i+1] = odd_out[i] + odd_out[i+1];	//!
}
out[30] = even_out[15];
out[31] = odd_out[15];
}``````

DCT32: N[1][i]=cos((2*i+1)/64)  i=0..15
DCT16: N[2][i]=cos((2*i+1)/32)  i=0..7
DCT8:   N[4][i]=cos((2*i+1)/16)  i=0..3
DCT4:   N[8][i]=cos((2*i+1)/8)    i=0,1
DCT2:   cos(PI/4) = 0.7071067811866

DCT-II(32→64)的快速算法的展开算法

解码一帧双声道的MP3，共要调用ISO/IEC 11172-3 ANNEX_B Figure 3-A.2第二步给出的矩阵运算2*2*18=72次，进行一次矩阵运算要进行浮点乘法是2048次，采用快速算法降低到80次，快速算法很完美。为什么还要将快速算法展开呢？由于矩阵运算调用频度极高，是影响解码速度的关键模块。分析以上快速算法函数可以看出两个特点：使用迭代和函数内大量的循环语句，应用迭代这种规律使展开成为可能，使用展开方法可以去掉中间各点DCT快速算法用到的循环语句，所以可以使矩阵运算的速度进一步提高。通过我对比实测用展开方法解码速度提升10%以上。下面给出的DCT-II（32->64）展开的快速算法是从我前几年写的MP3解码程序中直接COPY过来的，有比较详细的注解，对比上文的各点DCT的快速算法函数，很容易看懂。    【原文

``````void dct32to64(double in32[32], double out64[64])
{
double in0,in1,in2,in3,in4,in5,in6,in7,in8,in9,in10,in11,in12,in13,in14,in15;
double out0,out1,out2,out3,out4,out5,out6,out7,out8,out9,out10,out11,out12,out13,out14,out15;
double d8_0,d8_1,d8_2,d8_3,d8_4,d8_5,d8_6,d8_7;
double ein0, ein1, oin0, oin1;

// 用DCT16计算DCT32输出[0..31]的偶数下标元素
in0 = in32[0] + in32[31];
in1 = in32[1] + in32[30];
in2 = in32[2] + in32[29];
in3 = in32[3] + in32[28];
in4 = in32[4] + in32[27];
in5 = in32[5] + in32[26];
in6 = in32[6] + in32[25];
in7 = in32[7] + in32[24];
in8 = in32[8] + in32[23];
in9 = in32[9] + in32[22];
in10 = in32[10] + in32[21];
in11 = in32[11] + in32[20];
in12 = in32[12] + in32[19];
in13 = in32[13] + in32[18];
in14 = in32[14] + in32[17];
in15 = in32[15] + in32[16];

//DCT16
{
//>>>>>>>> 用DCT8计算DCT16输出[0..15]的偶数下标元素
d8_0 = in0 + in15;
d8_1 = in1 + in14;
d8_2 = in2 + in13;
d8_3 = in3 + in12;
d8_4 = in4 + in11;
d8_5 = in5 + in10;
d8_6 = in6 + in9;
d8_7 = in7 + in8;

//DCT8. 加(减)法29,乘法12次
{
//>>>>e 用DCT4计算DCT8的输出[0..7]的偶数下标元素
out1 = d8_0 + d8_7;
out3 = d8_1 + d8_6;
out5 = d8_2 + d8_5;
out7 = d8_3 + d8_4;

//>>e DCT2
ein0 = out1 + out7;
ein1 = out3 + out5;
out64[48] =  -ein0 - ein1;
out64[0] = (ein0 - ein1) * 0.70710678118654752;	// 0.5/cos(PI/4)

//>>o DCT2
oin0 = (out1 - out7) * 0.54119610014619698;		// 0.5/cos( PI/8)
oin1 = (out3 - out5) * 1.30656296487637653;		// 0.5/cos(3PI/8)

out2 =  oin0 + oin1;
out12 = (oin0 - oin1) * 0.70710678118654752;	// cos(PI/4)

out64[40] = out64[56] = -out2 - out12;
out64[8] = out12;
//<<<<e 完成计算DCT8的输出[0..7]的偶数下标元素

//>>>>o 用DCT4计算DCT8的输出[0..7]的奇数下标元素
//o DCT4 part1
out1 = (d8_0 - d8_7) * 0.50979557910415917;		// 0.5/cos( PI/16)
out3 = (d8_1 - d8_6) * 0.60134488693504528;		// 0.5/cos(3PI/16)
out5 = (d8_2 - d8_5) * 0.89997622313641570;		// 0.5/cos(5PI/16)
out7 = (d8_3 - d8_4) * 2.56291544774150618;		// 0.5/cos(7PI/16)

//o DCT4 part2

//e DCT2 part1
ein0 = out1 + out7;
ein1 = out3 + out5;

//o DCT2 part1
oin0 = (out1 - out7) * 0.54119610014619698;	// 0.5/cos(PI/8)
oin1 = (out3 - out5) * 1.30656296487637653;	// 0.5/cos(3PI/8)

//e DCT2 part2
out1 =  ein0 + ein1;
out5 = (ein0 - ein1) * 0.70710678118654752;	// cos(PI/4)

//o DCT2 part2
out3 = oin0 + oin1;
out7 = (oin0 - oin1) * 0.70710678118654752;	// cos(PI/4)
out3 += out7;

//o DCT4 part3
out64[44] = out64[52] = -out1 - out3;	//out1+=out3
out64[36] = out64[60] = -out3 - out5;	//out3+=out5
out64[4] = out5 + out7;					//out5+=out7
out64[12] = out7;
//<<<<o 完成计算DCT8的输出[0..7]的奇数下标元素
}
//<<<<<<<< 完成计算DCT16输出[0..15]的偶数下标元素

//-------------------------------------------------------------------------

//>>>>>>>> 用DCT8计算DCT16输出[0..15]的奇数下标元素
d8_0 = (in0 - in15) * 0.50241928618815571;	// 0.5/cos( 1 * PI/32)
d8_1 = (in1 - in14) * 0.52249861493968888;	// 0.5/cos( 3 * PI/32)
d8_2 = (in2 - in13) * 0.56694403481635770;	// 0.5/cos( 5 * PI/32)
d8_3 = (in3 - in12) * 0.64682178335999013;	// 0.5/cos( 7 * PI/32)
d8_4 = (in4 - in11) * 0.78815462345125022;	// 0.5/cos( 9 * PI/32)
d8_5 = (in5 - in10) * 1.06067768599034747;	// 0.5/cos(11 * PI/32)
d8_6 = (in6 - in9) * 1.72244709823833393;	// 0.5/cos(13 * PI/32)
d8_7 = (in7 - in8) * 5.10114861868916386;	// 0.5/cos(15 * PI/32)

//DCT8
{
//>>>>e 用DCT4计算DCT8的输出[0..7]的偶数下标元素.
out3 = d8_0 + d8_7;
out7 = d8_1 + d8_6;
out11 = d8_2 + d8_5;
out15 = d8_3 + d8_4;

//>>e DCT2
ein0 = out3 + out15;
ein1 = out7 + out11;
out1 =  ein0 + ein1;
out9 = (ein0 - ein1) * 0.70710678118654752;		// 0.5/cos(PI/4)

//>>o DCT2
oin0 = (out3 - out15) * 0.54119610014619698;	// 0.5/cos( PI/8)
oin1 = (out7 - out11) * 1.30656296487637653;	// 0.5/cos(3PI/8)

out5 =  oin0 + oin1;
out13 = (oin0 - oin1) * 0.70710678118654752;	// cos(PI/4)

out5 += out13;
//<<<<e 完成计算DCT8的输出[0..7]的偶数下标元素

//>>>>o 用DCT4计算DCT8的输出[0..7]的奇数下标元素
//o DCT4 part1
out3 = (d8_0 - d8_7) * 0.50979557910415917;		// 0.5/cos( PI/16)
out7 = (d8_1 - d8_6) * 0.60134488693504528;		// 0.5/cos(3PI/16)
out11 = (d8_2 - d8_5) * 0.89997622313641570;	// 0.5/cos(5PI/16)
out15 = (d8_3 - d8_4) * 2.56291544774150618;	// 0.5/cos(7PI/16)

//o DCT4 part2

//e DCT2 part1
ein0 = out3 + out15;
ein1 = out7 + out11;

//o DCT2 part1
oin0 = (out3 - out15) * 0.54119610014619698;	// 0.5/cos(PI/8)
oin1 = (out7 - out11) * 1.30656296487637653;	// 0.5/cos(3PI/8)

//e DCT2 part2
out3 =  ein0 + ein1;
out11 = (ein0 - ein1) * 0.70710678118654752;	// cos(PI/4)

//o DCT2 part2
out7 = oin0 + oin1;
out15 = (oin0 - oin1) * 0.70710678118654752;	// cos(PI/4)
out7 += out15;

//o DCT4 part3
out3 += out7;
out7 += out11;
out11 += out15;
//<<<<o 完成计算DCT8的输出[0..7]的奇数下标元素
}

out64[46] = out64[50] = -out1 - out3;	//out1 += out3
out64[42] = out64[54] = -out3 - out5;	//out3 += out5
out64[38] = out64[58] = -out5 - out7;	//out5 += out7
out64[34] = out64[62] = -out7 - out9;	//out7 += out9
out64[2] = out9 + out11;				//out9 += out11
out64[6] = out11 + out13;				//out11 += out13
out64[10] = out13 + out15;				//out13 += out15
//<<<<<<<< 完成计算DCT16输出[0..15]的奇数下标元素
}
out64[14] = out15;	//out64[14]=out32[30]
//<<<<<<<<<<<<<<<<
// 完成计算DCT32输出[0..31]的偶数下标元素

//=============================================================================

// 用DCT16计算计算DCT32输出[0..31]的奇数下标元素
in0 = (in32[0] - in32[31]) * 0.50060299823519630;	// 0.5/cos( 1 * PI/64)
in1 = (in32[1] - in32[30]) * 0.50547095989754366;	// 0.5/cos( 3 * PI/64)
in2 = (in32[2] - in32[29]) * 0.51544730992262455;	// 0.5/cos( 5 * PI/64)
in3 = (in32[3] - in32[28]) * 0.53104259108978417;	// 0.5/cos( 7 * PI/64)
in4 = (in32[4] - in32[27]) * 0.55310389603444453;	// 0.5/cos( 9 * PI/64)
in5 = (in32[5] - in32[26]) * 0.58293496820613387;	// 0.5/cos(11 * PI/64)
in6 = (in32[6] - in32[25]) * 0.62250412303566482;	// 0.5/cos(13 * PI/64)
in7 = (in32[7] - in32[24]) * 0.67480834145500575;	// 0.5/cos(15 * PI/64)
in8 = (in32[8] - in32[23]) * 0.74453627100229845;	// 0.5/cos(17 * PI/64)
in9 = (in32[9] - in32[22]) * 0.83934964541552704;	// 0.5/cos(19 * PI/64)
in10 = (in32[10] - in32[21]) * 0.97256823786196069;	// 0.5/cos(21 * PI/64)
in11 = (in32[11] - in32[20]) * 1.16943993343288495;	// 0.5/cos(23 * PI/64)
in12 = (in32[12] - in32[19]) * 1.48416461631416628;	// 0.5/cos(25 * PI/64)
in13 = (in32[13] - in32[18]) * 2.05778100995341155;	// 0.5/cos(27 * PI/64)
in14 = (in32[14] - in32[17]) * 3.40760841846871879;	// 0.5/cos(29 * PI/64)
in15 = (in32[15] - in32[16]) * 10.1900081235480568;	// 0.5/cos(31 * PI/64)

//DCT16
{
//>>>>>>>> 用DCT8计算DCT16输出[0..15]的偶数下标元素:
d8_0 = in0 + in15;
d8_1 = in1 + in14;
d8_2 = in2 + in13;
d8_3 = in3 + in12;
d8_4 = in4 + in11;
d8_5 = in5 + in10;
d8_6 = in6 + in9;
d8_7 = in7 + in8;

//DCT8
{
//>>>>e 用DCT4计算DCT8的输出[0..7]的偶数下标元素
out1 = d8_0 + d8_7;
out3 = d8_1 + d8_6;
out5 = d8_2 + d8_5;
out7 = d8_3 + d8_4;

//>>e DCT2
ein0 = out1 + out7;
ein1 = out3 + out5;
out0 =  ein0 + ein1;
out8 = (ein0 - ein1) * 0.70710678118654752;		// 0.5/cos(PI/4)

//>>o DCT2
oin0 = (out1 - out7) * 0.54119610014619698;		// 0.5/cos( PI/8)
oin1 = (out3 - out5) * 1.30656296487637653;		// 0.5/cos(3PI/8)

out4 =  oin0 + oin1;
out12 = (oin0 - oin1) * 0.70710678118654752;	// cos(PI/4)

out4 += out12;
//<<<<e 完成计算DCT8的输出[0..7]的偶数下标元素

//>>>>o 用DCT4计算DCT8的输出[0..7]的奇数下标元素
//o DCT4 part1
out1 = (d8_0 - d8_7) * 0.50979557910415917;		// 0.5/cos( PI/16)
out3 = (d8_1 - d8_6) * 0.60134488693504528;		// 0.5/cos(3PI/16)
out5 = (d8_2 - d8_5) * 0.89997622313641570;		// 0.5/cos(5PI/16)
out7 = (d8_3 - d8_4) * 2.56291544774150618;		// 0.5/cos(7PI/16)

//o DCT4 part2

//e DCT2 part1
ein0 = out1 + out7;
ein1 = out3 + out5;

//o DCT2 part1
oin0 = (out1 - out7) * 0.54119610014619698;		// 0.5/cos(PI/8)
oin1 = (out3 - out5) * 1.30656296487637653;		// 0.5/cos(3PI/8)

//e DCT2 part2
out2 =  ein0 + ein1;
out10 = (ein0 - ein1) * 0.70710678118654752;	// cos(PI/4)

//o DCT2 part2
out6 = oin0 + oin1;
out14 = (oin0 - oin1) * 0.70710678118654752;
out6 += out14;

//o DCT4 part3
out2 += out6;
out6 += out10;
out10 += out14;
//<<<<o 完成计算DCT8的输出[0..7]的奇数下标元素
}
//<<<<<<<< 完成计算DCT16输出[0..15]的偶数下标元素

//-------------------------------------------------------------------------

//>>>>>>>> 用DCT8计算DCT16输出[0..15]的奇数下标元素
d8_0 = (in0 - in15) * 0.50241928618815571;	// 0.5/cos( 1 * PI/32)
d8_1 = (in1 - in14) * 0.52249861493968888;	// 0.5/cos( 3 * PI/32)
d8_2 = (in2 - in13) * 0.56694403481635770;	// 0.5/cos( 5 * PI/32)
d8_3 = (in3 - in12) * 0.64682178335999013;	// 0.5/cos( 7 * PI/32)
d8_4 = (in4 - in11) * 0.78815462345125022;	// 0.5/cos( 9 * PI/32)
d8_5 = (in5 - in10) * 1.06067768599034747;	// 0.5/cos(11 * PI/32)
d8_6 = (in6 - in9) * 1.72244709823833393;	// 0.5/cos(13 * PI/32)
d8_7 = (in7 - in8) * 5.10114861868916386;	// 0.5/cos(15 * PI/32)

//DCT8
{
//>>>>e 用DCT4计算DCT8的输出[0..7]的偶数下标元素.
out1 = d8_0 + d8_7;
out3 = d8_1 + d8_6;
out5 = d8_2 + d8_5;
out7 = d8_3 + d8_4;

//>>e DCT2
ein0 = out1 + out7;
ein1 = out3 + out5;
in0 =  ein0 + ein1;	//out0->in0,out4->in4
in4 = (ein0 - ein1) * 0.70710678118654752;	// 0.5/cos(PI/4)

//>>o DCT2
oin0 = (out1 - out7) * 0.54119610014619698;	// 0.5/cos( PI/8)
oin1 = (out3 - out5) * 1.30656296487637653;	// 0.5/cos(3PI/8)

in2 =  oin0 + oin1;	//out2->in2,out6->in6
in6 = (oin0 - oin1) * 0.70710678118654752;	// cos(PI/4)

in2 += in6;
//<<<<e 完成计算DCT8的输出[0..7]的偶数下标元素

//>>>>o 用DCT4计算DCT8的输出[0..7]的奇数下标元素
//o DCT4 part1
out1 = (d8_0 - d8_7) * 0.50979557910415917;	// 0.5/cos( PI/16)
out3 = (d8_1 - d8_6) * 0.60134488693504528;	// 0.5/cos(3PI/16)
out5 = (d8_2 - d8_5) * 0.89997622313641570;	// 0.5/cos(5PI/16)
out7 = (d8_3 - d8_4) * 2.56291544774150618;	// 0.5/cos(7PI/16)

//o DCT4 part2

//e DCT2 part1
ein0 = out1 + out7;
ein1 = out3 + out5;

//o DCT2 part1
oin0 = (out1 - out7) * 0.54119610014619698;	// 0.5/cos(PI/8)
oin1 = (out3 - out5) * 1.30656296487637653;	// 0.5/cos(3PI/8)

//e DCT2 part2
out1 =  ein0 + ein1;
out5 = (ein0 - ein1) * 0.70710678118654752;	// cos(PI/4)

//o DCT2 part2
out3 = oin0 + oin1;
out15 = (oin0 - oin1) * 0.70710678118654752;
out3 += out15;

//o DCT4 part3
out1 += out3;
out3 += out5;
out5 += out15;
//<<<<o 完成计算DCT8的输出[0..7]的奇数下标元素
}
//out15=out7
out13 = in6 + out15;	//out13=out6+ou7
out11 = out5 + in6;		//out11=out5+ou6
out9 = in4 + out5;		//out9 =out4+ou5
out7 = out3 + in4;		//out7 =out3+ou4
out5 = in2 + out3;		//out5 =out2+ou3
out3 = out1 + in2;		//out3 =out1+ou2
out1 += in0;			//out1 =out0+ou1
//<<<<<<<< 完成计算DCT16输出[0..15]的奇数下标元素
}

//out32[i]=out[i]+out[i+1]; out32[31]=out[15]
out64[47] = out64[49] = -out0 - out1;
out64[45] = out64[51] = -out1 - out2;
out64[43] = out64[53] = -out2 - out3;
out64[41] = out64[55] = -out3 - out4;
out64[39] = out64[57] = -out4 - out5;
out64[37] = out64[59] = -out5 - out6;
out64[35] = out64[61] = -out6 - out7;
out64[33] = out64[63] = -out7 - out8;
out64[1] = out8 + out9;
out64[3] = out9 + out10;
out64[5] = out10 + out11;
out64[7] = out11 + out12;
out64[9] = out12 + out13;
out64[11] = out13 + out14;
out64[13] = out14 + out15;
out64[15] = out15;
//<<<<<<<<<<<<<<<<

out64[16] = 0;

out64[17] = -out64[15];
out64[18] = -out64[14];
out64[19] = -out64[13];
out64[20] = -out64[12];
out64[21] = -out64[11];
out64[22] = -out64[10];
out64[23] = -out64[9];
out64[24] = -out64[8];
out64[25] = -out64[7];
out64[26] = -out64[6];
out64[27] = -out64[5];
out64[28] = -out64[4];
out64[29] = -out64[3];
out64[30] = -out64[2];
out64[31] = -out64[1];
out64[32] = -out64[0];
}``````

### 暗之幻影

x264源代码简单分析：宏块编码（Encode）部分

2015/05/24
0
0
SSE图像算法优化系列二十一：基于DCT变换图像去噪算法的进一步优化（100W像素30ms）。

在优化IPOL网站中基于DCT（离散余弦变换）的图像去噪算法（附源代码） 一文中，我们曾经优化过基于DCT变换的图像去噪算法，在那文所提供的Demo中，处理一副1000*1000左右的灰度噪音图像耗...

Imageshop
2018/09/04
0
0

zgyggy
2017/05/23
0
0

2011/10/23
8.9K
0

2017/12/09
0
0

SpringBoot2单元测试

8分钟前
0
0
webpack优化公司内部分享ppt小记

Webpack整个的构建过程是基于事件流 Webpack 就像一条生产线，要经过一系列处理流程后才能将源文件转换成输出结果。 这条生产线上的每个处理流程的职责都是单一的，多个流程之间有存在依赖关...

9分钟前
0
0
DevOps落地实践，BAT系列，敏捷看板

DevOps 自 2009 年诞生以来，至今整整过去了十年，从最初的摸索，逐步变成一种主流的软件开发交付模式。BAT在2014年左右，甚至更早的时候，内部的DevOps系统就已经差不多成型了，比如腾讯的织...

cs平台
13分钟前
0
0
Stylus 简单应用

1、安装 cnpm install stylus 2、基本语法 stylus中文版参考文档 3、命令行下编译 styl 文件，生成压缩的css文件 编译src目录下styl文件 stylus --compress /src 更多命令可以执行查找 styl...

15分钟前
0
0
Java正则表达式大全

1、匹配中文:[\u4e00-\u9fa5] 2、英文字母:[a-zA-Z] 3、数字:[0-9] 4、匹配中文，英文字母和数字及下划线：^[\u4e00-\u9fa5_a-zA-Z0-9]+\$ 同时判断输入长度： [\u4e00-\u9fa5_a-zA-Z0-9_]{4...

23分钟前
1
0