H.264完整的C语言代码和DCT的代码

源代码在线查看: dct2d-old.i

软件大小: 4747 K
上传用户: aoaoaoao
关键词: 264 DCT C语言代码 代码
下载地址: 免注册下载 普通下载 VIP

相关代码

				// dct.i
				// Ujval Kapasi
				// 1/22/97
				// 3/28/97
				//
				// 8x8 DCT (for JPEG and MPEG)
				// 
				// Test out a fast 1-d dct algorithm for the imagine chip implementation
				//   From Pennebaker/Mitchell, pg. 50-52.  See also Arai, Agui, Nakajima.
				// This algorithm is based on the 16-pt DFT.  Basically, the 8-pt DCT can
				//   be calculated by scaling the real parts of the output of the 16-pt DFT.
				
				
				// STUFF TO DO ONLY ONCE -- I.E., OUTSIDE OF LOOP
				
				// DEBUG : ISTREAM 2 : constants stored in VRF until ability to load constants
				// -----		onto imagine is implemented in simulator
				
				// Unnecessary : only exist because constants as of yet are not handled
				
				int zero, one, two, three, four, five, six, seven, eight;
				int twentythree, twentyseven;
				
				//one = 0x00000002;
				
				zero = 0;
				one = istream(1);
				two = one + one;
				three = two + one;
				four = two + two;
				five = three + two;
				six= three + three;
				seven = three + four;
				eight = four + four;
				twentythree = three + lo(four * five);
				twentyseven = lo(three * four) + lo(three * five);
				
				byte4 shuf_func1, shuf_func2, shuf_func3, shuf_func4;
				byte4 shuf_func5, shuf_func6, shuf_func7, shuf_func8;
				byte4 shuf_func9, shuf_func10, shuf_func11, shuf_func12;
				
				//shuf_func1  = 0x04040400;	// zero | zero || zero |  1st
				//shuf_func2  = 0x04040401;	// zero | zero || zero |  2nd
				//shuf_func3  = 0x04040402;	// zero | zero || zero |  3rd
				//shuf_func4  = 0x04040403;	// zero | zero || zero |  4th
				//shuf_func5  = 0x04000404;	// zero |  1st || zero | zero
				//shuf_func6  = 0x04010404;	// zero |  2nd || zero | zero
				//shuf_func7  = 0x04020404;	// zero |  3rd || zero | zero
				//shuf_func8  = 0x04030404;	// zero |  4th || zero | zero
				//shuf_func9  = 0x04040100;	//        zero || 2nd
				//shuf_func10 = 0x04040302;	//        zero || 1st
				//shuf_func11 = 0x03020404;	//         1st || zero
				//shuf_func12 = 0x01000404;	//         2nd || zero
				
				shuf_func1 = istream(1);
				shuf_func2 = istream(1);
				shuf_func3 = istream(1);
				shuf_func4 = istream(1);
				shuf_func5 = istream(1);
				shuf_func6 = istream(1);
				shuf_func7 = istream(1);
				shuf_func8 = istream(1);
				
				
				half2 COS_2, COS_3, COS_1_plus_COS_3, COS_1_minus_COS_3;
				
				// Stored in 2.14 format
				//COS_2             = 0x2d412d41;    // cos(2*pi/8) || cos(2*pi/8);
				//COS_3             = 0x187e187e;    // cos(3*pi/8) || cos(3*pi/8);
				//COS_1_plus_COS_3  = 0x539f539f;    // cos(pi/8) + cos(3*pi/8) || same
				//COS_1_minus_COS_3 = 0x22a322a3;    // cos(pi/8) - cos(3*pi/8) || same
				
				COS_2 = istream(1);
				COS_3 = istream(1);
				COS_1_plus_COS_3 = istream(1);
				COS_1_minus_COS_3 = istream(1);
				
				
				half2 K0, K1, K2, K3, K4, K5, K6, K7;
				
				// Stored in 2.14 format
				//K0 = 0x16a116a1           // 0.25 * sqrt(2)       || 0.25 * sqrt(2);
				//K1 = 0x10501050           // 0.25 * sec(pi/16)    || 0.25 * sec(pi/16);
				//K2 = 0x11511151           // 0.25 * sec(2*pi/16)  || 0.25 * sec(2*pi/16);
				//K3 = 0x133e133e           // 0.25 * sec(3*pi/16)  || 0.25 * sec(3*pi/16);
				//K4 = 0x16a116a1           // 0.25 * sec(4*pi/16)  || 0.25 * sec(4*pi/16);
				//K5 = 0x1ccd1ccd           // 0.25 * sec(5*pi/16)  || 0.25 * sec(5*pi/16);
				//K6 = 0x29cf29cf           // 0.25 * sec(6*pi/16)  || 0.25 * sec(6*pi/16);
				//K7 = 0x52035203           // 0.25 * sec(7*pi/16)  || 0.25 * sec(7*pi/16);
				
				K0 = istream(1);
				K1 = istream(1);
				K2 = istream(1);
				K3 = istream(1);
				K4 = istream(1);
				K5 = istream(1);
				K6 = istream(1);
				K7 = istream(1);
				
				
				array[32] half2 buf1, buf2;	// intermediate dct output.  ie, do rows then
								//   store here.  Then index into this
								//   differently to get the columns
				
				persistent uc int i = 2;
				persistent uc int j;
				
				int index;
				
				index = 0;
				j = 4;
				
				// Each iteration --> 2 rows
				loop count j unroll 1 {
				
				  // Input Streams :
				  // let n = 2*i
				  // 0 : row n
				  // 1 : row n+1
				
				  byte4 i0, i1, i2, i3;
					                        // notation : (row, column)
				  i0 = istream(0);		// i0 = (n,3)   | (n,2)   | (n,1)   | (n,0)
				  i1 = istream(0);		// i1 = (n,4)   | (n,5)   | (n,6)   | (n,7)
				  i2 = istream(0);		// i2 = (n+1,4) | (n+1,5) | (n+1,6) | (n+1,7)
				  i3 = istream(0);		// i3 = (n+1,4) | (n+1,5) | (n+1,6) | (n+1,7)
				
				  half2 a0, a1, a2, a3, a4, a5, a6, a7, c0, c1, c2, c3, c4, c5, c6, c7;
				
				  a0 = half2(shuffle(i0,shuf_func1));	// a0 = ---- || (n,0)
				  a1 = half2(shuffle(i0,shuf_func2));	// a1 = ---- || (n,1)
				  a2 = half2(shuffle(i0,shuf_func3));	// a2 = ---- || (n,2)
				  a3 = half2(shuffle(i0,shuf_func4));	// a3 = ---- || (n,3)
				  a4 = half2(shuffle(i1,shuf_func1));	// a4 = ---- || (n,4)
				  a5 = half2(shuffle(i1,shuf_func2));	// a5 = ---- || (n,5)
				  a6 = half2(shuffle(i1,shuf_func3));	// a6 = ---- || (n,6)
				  a7 = half2(shuffle(i1,shuf_func4));	// a7 = ---- || (n,7)
				
				  c0 = half2(shuffle(i2,shuf_func5));	// c0 = (n+1,0) || ----
				  c1 = half2(shuffle(i2,shuf_func6));	// c1 = (n+1,1) || ----
				  c2 = half2(shuffle(i2,shuf_func7));	// c2 = (n+1,2) || ----
				  c3 = half2(shuffle(i2,shuf_func8));	// c3 = (n+1,3) || ----
				  c4 = half2(shuffle(i3,shuf_func5));	// c4 = (n+1,4) || ----
				  c5 = half2(shuffle(i3,shuf_func6));	// c5 = (n+1,5) || ----
				  c6 = half2(shuffle(i3,shuf_func7));	// c6 = (n+1,6) || ----
				  c7 = half2(shuffle(i3,shuf_func8));	// c7 = (n+1,7) || ----
				
				  // combine a's and b's -- In 16.0 format
				  a0 = a0 | c0;	                        // a0 = (n+1,0) || (n,0)
				  a1 = a1 | c1;	                        // a1 = (n+1,1) || (n,1)
				  a2 = a2 | c2;	                        // a2 = (n+1,2) || (n,2)
				  a3 = a3 | c3;	                        // a3 = (n+1,3) || (n,3)
				  a4 = a4 | c4;	                        // a4 = (n+1,4) || (n,4)
				  a5 = a5 | c5;	                        // a5 = (n+1,5) || (n,5)
				  a6 = a6 | c6;	                        // a6 = (n+1,6) || (n,6)
				  a7 = a7 | c7;	                        // a7 = (n+1,7) || (n,7)
				
				  half2 s16, s07, s25, s34, s1625, s0734;
				
				  s07 = a0 + a7;
				  s16 = a1 + a6;
				  s25 = a2 + a5;
				  s34 = a3 + a4;
				  s1625 = s16 + s25;
				  s0734 = s07 + s34;
				
				  half2 d16, d07, d25, d34, d1625, d0734;
				
				  d07 = a0 - a7;
				  d16 = a1 - a6;
				  d25 = a2 - a5;
				  d34 = a3 - a4;
				  d1625 = s16 - s25;
				  d0734 = s07 - s34;
				
				  half2 sd16d07, sd25d34;
				
				  sd16d07 = d07 + d16;
				  sd25d34 = d25 + d34;
				
				  half2 m1_over_2, m2, m5, m6, m7, m8, m9;
				
				  // All results in 16.0
				  m1_over_2 = s0734 + s1625;
				  m2 = s0734 - s1625;
				  m5 = hi(COS_2 * shift(d1625 + d0734, two));
				  m6 = hi(COS_2 * shift(d25 + d16, two));
				  m7 = hi(COS_3 * shift(sd16d07 - sd25d34, two));
				  m8 = hi((COS_1_plus_COS_3) * shift(sd16d07, two));
				  m9 = hi((COS_1_minus_COS_3) * shift(sd25d34, two));
				
				  half2 s5, s6, s7, s8;
				
				  s5 = d07 + m6;
				  s6 = d07 - m6;
				  s7 = m8 - m7;
				  s8 = m9 - m7;
				
				  // All results in 16.0
				  buf1[index] = hi(K0 * shift(m1_over_2, two));
				  index = index + one;
				  buf1[index] = hi(K1 * shift(s5 + s7, two));
				  index = index + one;
				  buf1[index] = hi(K2 * shift(d0734 + m5, two));
				  index = index + one;
				  buf1[index] = hi(K3 * shift(s6 - s8, two));
				  index = index + one;
				  buf1[index] = hi(K4 * shift(m2, two));
				  index = index + one;
				  buf1[index] = hi(K5 * shift(s6 + s8, two));
				  index = index + one;
				  buf1[index] = hi(K6 * shift(d0734 - m5, two));
				  index = index + one;
				  buf1[index] = hi(K7 * shift(s5 - s7, two));
				  index = index + one;
				}
				
				shuf_func9 = istream(1);
				shuf_func10 = istream(1);
				shuf_func11 = istream(1);
				shuf_func12 = istream(1);
				
				int index1, index2;
				
				index1 = 0;
				index2 = 0;
				
				j = 4;
				
				// Each iteration --> 2 columns
				loop count j unroll 1 {
				
				  a0 = buf1[index1];
				  index1 = one + index1;
				  c1 = buf1[index1];
				  index1 = seven + index1;
				  index1 = zero + index1;                    // register allocation trick
				  a1 = half2(shuffle(byte4(a0), shuf_func9));
				  a0 = half2(shuffle(byte4(a0), shuf_func10));
				  c0 = half2(shuffle(byte4(c1), shuf_func11));
				  c1 = half2(shuffle(byte4(c1), shuf_func12));
				  a0 = a0 | c0;
				  a1 = c1 | a1;
				
				  a2 = buf1[index1];
				  index1 = one + index1;
				  c3 = buf1[index1];
				  index1 = seven + index1;
				  index1 = zero + index1;                    // register allocation trick
				  a3 = half2(shuffle(byte4(a2), shuf_func9));
				  a2 = half2(shuffle(byte4(a2), shuf_func10));
				  c2 = half2(shuffle(byte4(c3), shuf_func11));
				  c3 = half2(shuffle(byte4(c3), shuf_func12));
				  a2 = a2 | c2;
				  a3 = c3 | a3;
				
				  a4 = buf1[index1];
				  index1 = one + index1;
				  c5 = buf1[index1];
				  index1 = seven + index1;
				  index1 = zero + index1;                    // register allocation trick
				  a5 = half2(shuffle(byte4(a4), shuf_func9));
				  a4 = half2(shuffle(byte4(a4), shuf_func10));
				  c4 = half2(shuffle(byte4(c5), shuf_func11));
				  c5 = half2(shuffle(byte4(c5), shuf_func12));
				  a4 = a4 | c4;
				  a5 = c5 | a5;
				
				  a6 = buf1[index1];
				  index1 = one + index1;
				  c7 = buf1[index1];
				  index1 = index1 - twentythree;
				  index1 = zero + index1;                    // register allocation trick
				  a7 = half2(shuffle(byte4(a6), shuf_func9));
				  a6 = half2(shuffle(byte4(a6), shuf_func10));
				  c6 = half2(shuffle(byte4(c7), shuf_func11));
				  c7 = half2(shuffle(byte4(c7), shuf_func12));
				  a6 = a6 | c6;
				  a7 = c7 | a7;
				
				
				  s07 = a0 + a7;
				  s16 = a1 + a6;
				  s25 = a2 + a5;
				  s34 = a3 + a4;
				  s1625 = s16 + s25;
				  s0734 = s07 + s34;
				
				  d07 = a0 - a7;
				  d16 = a1 - a6;
				  d25 = a2 - a5;
				  d34 = a3 - a4;
				  d1625 = s16 - s25;
				  d0734 = s07 - s34;
				
				  sd16d07 = d07 + d16;
				  sd25d34 = d25 + d34;
				
				  m1_over_2 = s0734 + s1625;
				  m2 = s0734 - s1625;
				  m5 = hi(COS_2 * shift(d1625 + d0734, two));
				  m6 = hi(COS_2 * shift(d25 + d16, two));
				  m7 = hi(COS_3 * shift(sd16d07 - sd25d34, two));
				  m8 = hi((COS_1_plus_COS_3) * shift(sd16d07, two));
				  m9 = hi((COS_1_minus_COS_3) * shift(sd25d34, two));
				
				  s5 = d07 + m6;
				  s6 = d07 - m6;
				  s7 = m8 - m7;
				  s8 = m9 - m7;
				
				  buf2[index2] = hi(K0 * shift(m1_over_2, two));
				  index2 = index2 + four;
				  buf2[index2] = hi(K1 * shift(s5 + s7, two));
				  index2 = index2 + four;
				  buf2[index2] = hi(K2 * shift(d0734 + m5, two));
				  index2 = index2 + four;
				  buf2[index2] = hi(K3 * shift(s6 - s8, two));
				  index2 = index2 + four;
				  buf2[index2] = hi(K4 * shift(m2, two));
				  index2 = index2 + four;
				  buf2[index2] = hi(K5 * shift(s6 + s8, two));
				  index2 = index2 + four;
				  buf2[index2] = hi(K6 * shift(d0734 - m5, two));
				  index2 = index2 + four;
				  buf2[index2] = hi(K7 * shift(s5 - s7, two));
				  index2 = index2 - twentyseven;
				}
				
				
				int junk;
				junk = istream(1);    // pad out stream length to a multiple of 8 words
				junk = istream(1);
				junk = istream(1);
				junk = istream(1);    // pad out stream length to a multiple of 8 words
				junk = istream(1);
				junk = istream(1);
				junk = istream(1);    // pad out stream length to a multiple of 8 words
				
				ostream(0) = buf2[0];  ostream(0) = buf2[1];  ostream(0) = buf2[2];
				ostream(0) = buf2[3];  ostream(0) = buf2[4];  ostream(0) = buf2[5];
				ostream(0) = buf2[6];  ostream(0) = buf2[7];  ostream(0) = buf2[8];
				ostream(0) = buf2[9];  ostream(0) = buf2[10]; ostream(0) = buf2[11];
				ostream(0) = buf2[12]; ostream(0) = buf2[13]; ostream(0) = buf2[14];
				ostream(0) = buf2[15]; ostream(0) = buf2[16]; ostream(0) = buf2[17];
				ostream(0) = buf2[18]; ostream(0) = buf2[19]; ostream(0) = buf2[20];
				ostream(0) = buf2[21]; ostream(0) = buf2[22]; ostream(0) = buf2[23];
				ostream(0) = buf2[24]; ostream(0) = buf2[25]; ostream(0) = buf2[26];
				ostream(0) = buf2[27]; ostream(0) = buf2[28]; ostream(0) = buf2[29];
				ostream(0) = buf2[30]; ostream(0) = buf2[31];
							

相关资源