视频压缩标准MPEG4的视频参考代码xvid9.1

源代码在线查看: quant_h263_ia64.s

软件大小: 2657 K
上传用户: jjkk778
关键词: MPEG4 xvid 9.1 视频压缩
下载地址: 免注册下载 普通下载 VIP

相关代码

				//*******************************************************************************				//*										*				//*	functions quant_inter and dequant_inter have been softwarepipelined	*				//*	use was made of the pmpyshr2 instruction				*				//*										*				//*	by Christian Engel and Hans-Joachim Daniels				*				//*	christian.engel@ira.uka.de hans-joachim.daniels@ira.uka.de		*				//*										*				//*	This was made for the ia64 DivX laboratory (yes, it was really called	*				//*	this way, originally OpenDivX was intendet, but died shortly before our	*				//*	work started (you will probably already know ...))			*				//*	at the Universitat Karlsruhe (TH) held between April and July 2002	*				//* 	http://www.info.uni-karlsruhe.de/~rubino/ia64p/				*				//*										*				//*******************************************************************************					.file	"quant_h263_ia64.s"					.pred.safe_across_calls p1-p5,p16-p63						.section	.rodata					.align 4					.type	 multipliers#,@object					.size	 multipliers#,128				multipliers:					data4	0					data4	32769					data4	16385					data4	10923					data4	8193					data4	6554					data4	5462					data4	4682					data4	4097					data4	3641					data4	3277					data4	2979					data4	2731					data4	2521					data4	2341					data4	2185					data4	2049					data4	1928					data4	1821					data4	1725					data4	1639					data4	1561					data4	1490					data4	1425					data4	1366					data4	1311					data4	1261					data4	1214					data4	1171					data4	1130					data4	1093					data4	1058					.global __divdi3#				.text					.align 16					.global quant_intra_ia64#					.proc quant_intra_ia64#				quant_intra_ia64:					.prologue 					.save ar.pfs, r38					alloc r38 = ar.pfs, 4, 3, 2, 0					adds r16 = -8, r12					.fframe 32					adds r12 = -32, r12					mov r17 = ar.lc					addl r14 = @ltoff(multipliers#), gp					ld2 r15 = [r33]					;;					.savesp ar.lc, 24					st8 [r16] = r17, 8					ld8 r14 = [r14]					sxt2 r15 = r15					;;					.save.f 0x1					stf.spill [r16] = f2					.save rp, r37					mov r37 = b0					.body					dep.z r36 = r34, 1, 15					dep.z r16 = r34, 2, 32					cmp4.ge p6, p7 = 0, r15					;;					add r16 = r16, r14					;;					ld4 r16 = [r16]					;;					setf.sig f2 = r16					(p6) br.cond.dptk .L8					extr r39 = r35, 1, 31					sxt4 r40 = r35					;;					add r39 = r39, r15					br .L21					;;				.L8:					extr r39 = r35, 1, 31					sxt4 r40 = r35					;;					sub r39 = r15, r39					;;				.L21:					sxt4 r39 = r39					br.call.sptk.many b0 = __divdi3#					;;					addl r14 = 62, r0					st2 [r32] = r8					addl r19 = 1, r0					;;					mov ar.lc = r14					;;				.L20:					dep.z r17 = r19, 1, 32					;;					add r15 = r17, r33					adds r19 = 1, r19					;;					ld2 r14 = [r15]					;;					sxt2 r14 = r14					;;					mov r16 = r14					mov r18 = r14					;;					sub r15 = r0, r16					cmp4.le p8, p9 = r36, r16					cmp4.le p6, p7 = r0, r16					;;					sxt2 r14 = r15					(p6) br.cond.dptk .L14					;;					mov r16 = r14					add r18 = r17, r32					;;					setf.sig f6 = r16					cmp4.le p6, p7 = r36, r16					mov r15 = r18					;;					xma.l f6 = f6, f2, f0					(p7) st2 [r18] = r0					;;					getf.sig r14 = f6					;;					extr r14 = r14, 16, 16					;;					sub r14 = r0, r14					;;					(p6) st2 [r15] = r14					br .L12				.L14:					.pred.rel "mutex", p8, p9					setf.sig f6 = r18					add r16 = r17, r32					;;					xma.l f6 = f6, f2, f0					mov r15 = r16					(p9) st2 [r16] = r0					;;					getf.sig r14 = f6					;;					extr r14 = r14, 16, 16					;;					(p8) st2 [r15] = r14				.L12:					br.cloop.sptk.few .L20					adds r18 = 24, r12					;;					ld8 r19 = [r18], 8					mov ar.pfs = r38					mov b0 = r37					;;					mov ar.lc = r19					ldf.fill f2 = [r18]					.restore sp					adds r12 = 32, r12					br.ret.sptk.many b0					.endp quant_intra_ia64#					.common	quant_intra#,8,8					.common	dequant_intra#,8,8					.align 16					.global dequant_intra_ia64#					.proc dequant_intra_ia64#				dequant_intra_ia64:					.prologue					ld2 r14 = [r33]					andcm r15 = 1, r34					setf.sig f8 = r35					;;					sxt2 r14 = r14					sub r15 = r34, r15					addl r16 = -2048, r0					;;					setf.sig f6 = r14					setf.sig f7 = r15					shladd r34 = r34, 1, r0					;;					xma.l f8 = f6, f8, f0					.save ar.lc, r2					mov r2 = ar.lc					;;					.body					getf.sig r14 = f8					setf.sig f6 = r34					;;					sxt2 r15 = r14					st2 [r32] = r14					;;					cmp4.le p6, p7 = r16, r15					;;					(p7) st2 [r32] = r16					(p7) br.cond.dptk .L32					addl r14 = 2047, r0					;;					cmp4.ge p6, p7 = r14, r15					;;					(p7) st2 [r32] = r14				.L32:					addl r14 = 62, r0					addl r19 = 1, r0					addl r22 = 2048, r0					addl r21 = -2048, r0					addl r20 = 2047, r0					;;					mov ar.lc = r14					;;				.L56:					dep.z r16 = r19, 1, 32					;;					add r14 = r16, r33					add r17 = r16, r32					adds r19 = 1, r19					;;					ld2 r15 = [r14]					;;					sxt2 r15 = r15					;;					cmp4.ne p6, p7 = 0, r15					cmp4.le p8, p9 = r0, r15					;;					(p7) st2 [r17] = r0					(p7) br.cond.dpnt .L36					add r18 = r16, r32					sub r17 = r0, r15					;;					mov r14 = r18					(p8) br.cond.dptk .L40					setf.sig f8 = r17					;;					xma.l f8 = f6, f8, f7					;;					getf.sig r15 = f8					;;					cmp4.lt p6, p7 = r22, r15					sub r16 = r0, r15					;;					(p7) st2 [r14] = r16					(p6) st2 [r14] = r21					br .L36				.L40:					setf.sig f8 = r15					;;					xma.l f8 = f6, f8, f7					;;					getf.sig r15 = f8					;;					cmp4.le p6, p7 = r20, r15					;;					(p6) mov r14 = r20					(p7) mov r14 = r15					;;					st2 [r18] = r14				.L36:					br.cloop.sptk.few .L56					;;					mov ar.lc = r2					br.ret.sptk.many b0					.endp dequant_intra_ia64#																//uint32_t quant_inter_ia64(int16_t *coeff, const int16_t *data, const uint32_t quant)																	.common	quant_inter#,8,8					.align 16					.global quant_inter_ia64#					.proc quant_inter_ia64#				quant_inter_ia64:												//*******************************************************				//*							*				//*	const uint32_t mult = multipliers[quant];	*				//*	const uint16_t quant_m_2 = quant 				//*	const uint16_t quant_d_2 = quant >> 1;		*				//*	int sum = 0;					*				//*	uint32_t i;					*				//*	int16_t acLevel,acL;				*				//*							*				//*******************************************************/																	LL=3		// LL = load latency							//if LL is changed, you'll also have to change the .pred.rel... parts below!						.prologue					addl r14 = @ltoff(multipliers#), gp					dep.z r15 = r34, 2, 32					.save ar.lc, r2					mov r2 = ar.lc					;;					.body					alloc r9=ar.pfs,0,24,0,24					mov r17 = ar.ec					mov r10 = pr					ld8 r14 = [r14]					extr.u r16 = r34, 1, 16		//r16 = quant_d_2					dep.z r20 = r34, 1, 15		//r20 = quant_m_2					;;					add r15 = r15, r14					mov r21 = r16			//r21 = quant_d_2					mov r8 = r0			//r8  = sum = 0					mov pr.rot    = 0		//p16-p63 = 0					;;					ld4 r15 = [r15]					addl r14 = 63, r0					mov pr.rot = 1 					;;					mov ar.lc = r14					mov ar.ec = LL+9					mov r29 = r15					;;					mov r15 = r33			//r15 = data					mov r18 = r32			//r18 = coeff					;;															.rotr ac1[LL+3], ac2[8], ac3[2]					.rotp p[LL+9], cmp1[8], cmp1neg[8],cmp2[5], cmp2neg[2]																//*******************************************************************************				//*										*				//*	for (i = 0; i < 64; i++) {						*				//*		acL=acLevel = data[i];						*				//*		acLevel = ((acLevel < 0)?-acLevel:acLevel) - quant_d_2;		*				//*		if (acLevel < quant_m_2){					*				//*			acLevel = 0;						*				//*		}								*				//*		acLevel = (acLevel * mult) >> SCALEBITS;			*				//*		sum += acLevel;							*				//*		coeff[i] = ((acL < 0)?-acLevel:acLevel);			*				//*	}									*						//*										*					//*******************************************************************************/ 																.explicit				.L58:					.pred.rel "clear", p29, p37					.pred.rel "mutex", p29, p37																	//pipeline stage				{.mmi					(p[0]) 		ld2 ac1[0]   = [r15],2				//   0		acL=acLevel = data[i];					(p[LL+1]) 	sub ac2[0]   = r0, ac1[LL+1]			//   LL+1	ac2=-acLevel					(p[LL]) 	sxt2 ac1[LL] = ac1[LL]				//   LL				}				{.mmi					(p[LL+1]) 	cmp4.le cmp1[0], cmp1neg[0] = r0, ac1[LL+1]	//   LL+1	cmp1 = (0					(p[LL+4]) 	cmp4.le cmp2[0], cmp2neg[0] = r20, ac2[3]	//   LL+4	cmp2 = (quant_m_2 < acLevel)  ; cmp2neg = !(quant_m_2 < acLevel)					(cmp1[1])    	sub ac2[1]   = ac1[LL+2], r21			//   LL+2	acLevel = acLevel - quant_d_2;				}				{.mmi					(cmp2neg[1])	mov ac2[4] = r0					//   LL+5	if (acLevel < quant_m_2) acLevel=0;					(cmp1neg[1]) 	sub ac2[1]   = ac2[1], r21			//   LL+2	acLevel = ac2 - quant_d_2;					(p[LL+3]) 	sxt2 ac2[2]   = ac2[2]				//   LL+3				}				{.mmi					.pred.rel "mutex", p34, p42					(cmp1[6]) 	mov ac3[0] = ac2[6]				//   LL+7	ac3 = acLevel;					(cmp1neg[6])	sub ac3[0] = r0, ac2[6]				//   LL+7	ac3 = -acLevel;					(p[LL+6]) 	pmpyshr2.u ac2[5] = r29, ac2[5], 16		//   LL+6	acLevel = (acLevel * mult) >> SCALEBITS;				}				{.mib					(p[LL+8]) 	st2 [r18] = ac3[1] , 2				//   LL+8	coeff[i] = ac3;					(cmp2[4]) 	add r8 = r8, ac2[7]				//   LL+8	sum += acLevel;						br.ctop.sptk.few .L58					;;				}									.pred.rel "clear", p29, p37				.default					mov ar.ec = r17					;;					mov ar.lc = r2					mov pr = r10, -1					mov ar.pfs = r9					br.ret.sptk.many b0					.endp quant_inter_ia64#																																// void dequant_inter_ia64(int16_t *data, const int16_t *coeff, const uint32_t quant)									.common	dequant_inter#,8,8					.align 16					.global dequant_inter_ia64#					.proc dequant_inter_ia64#				dequant_inter_ia64:									//***********************************************************************				//*									*				//*	const uint16_t quant_m_2 = quant 				//*	const uint16_t quant_add = (quant & 1 ? quant : quant - 1);	*				//*	uint32_t i;							*				//*									*						//***********************************************************************																									.prologue					andcm r14 = 1, r34					dep.z r29 = r34, 1, 15					alloc r9=ar.pfs,0,32,0,32					.save ar.lc, r2					mov r2 = ar.lc					;;					.body					sub r15 = r34, r14		// r15 = quant					addl r14 = 63, r0					addl r21 = -2048, r0					addl r20 = 2047, r0					mov r16 = ar.ec					mov r17 = pr					;;					zxt2 r15 = r15					mov ar.lc = r14					mov pr.rot = 0					;;					adds r14 = 0, r33		// r14 = coeff					mov r18 = r32			// r18 = data					mov ar.ec = LL+10					mov pr.rot = 1 					;;								//*******************************************************************************				//*										*				//*for (i = 0; i < 64; i++) {							*				//*		int16_t acLevel = coeff[i];					*				//*										*						//*		if (acLevel == 0)						*				//*		{								*				//*			data[i] = 0;						*				//*		}								*				//*		else if (acLevel < 0)						*				//*		{								*				//*			acLevel = acLevel * quant_m_2 - quant_add;		*				//*			data[i] = (acLevel >= -2048 ? acLevel : -2048);		*				//*		}								*				//*		else // if (acLevel > 0)					*				//*		{								*				//*			acLevel = acLevel * quant_m_2 + quant_add; 		*				//*			data[i] = (acLevel 				//*		}								*						//*	}									*				//*										*					//*******************************************************************************/																		LL=2	// LL := load latency						//if LL is changed, you'll also have to change the .pred.rel... parts below!															.rotr ac1[LL+10], x[5], y1[3], y2[3]					.rotp p[LL+10] , cmp1neg[8], cmp2[5], cmp2neg[5],cmp3[2], cmp3neg[2]									.explicit													//pipeline stage									.L60:					.pred.rel "clear", p36					.pred.rel "mutex", p47, p49					.pred.rel "mutex", p46, p48					.pred.rel "mutex", p40, p45					.pred.rel "mutex", p39, p44					.pred.rel "mutex", p38, p43					.pred.rel "mutex", p37, p42					.pred.rel "mutex", p36, p41				{.mmi						(p[0])ld2 ac1[0] = [r14] ,2				//	0  	acLevel = coeff[i];					(p[LL+1])cmp4.ne p6, cmp1neg[0] = 0, ac1[LL+1]		//	LL+1					(p[LL])sxt2 ac1[LL] = ac1[LL]				//	LL								}				{.mmi					(p[LL+1])cmp4.le cmp2[0], cmp2neg[0] = r0, ac1[LL+1]	//	LL+1					(cmp2[1]) mov x[0] = r20				//	LL+2					(p[LL+2])pmpyshr2.u ac1[LL+2] = r29, ac1[LL+2], 0	//	LL+2				}				{.mmi					(cmp2neg[1]) mov x[0] = r21				//  	LL+2					(cmp2[2]) add ac1[LL+3] = ac1[LL+3], r15		//	LL+3					(cmp2neg[2]) sub ac1[LL+3] = ac1[LL+3], r15		//	LL+3								}				{.mmi					(cmp2neg[4]) mov y1[0] = ac1[LL+5]			//	LL+5					(cmp2neg[4]) mov y2[0] = x[3]				//	LL+5					(p[LL+4])sxt2 ac1[LL+4] = ac1[LL+4]			//	LL+4				}				{.mmi					(cmp2[4]) mov y1[0] = x[3]				//	LL+5					(cmp2[4]) mov y2[0] = ac1[LL+5]				//	LL+5					(p[LL+6])cmp4.le cmp3[0], cmp3neg[0] = x[4], ac1[LL+6]	//	LL+6				}				{.mmi					(cmp3[1]) mov ac1[LL+7] = y1[2]				//	LL+7					(cmp3neg[1]) mov ac1[LL+7] = y2[2]			//	LL+7					(cmp1neg[7])  mov ac1[LL+8] = r0			//	LL+8				}				{.mbb					(p[LL+9])st2 [r18] = ac1[LL+9] ,2			//	LL+9					nop.b 0x0					br.ctop.sptk.few .L60					;;				}					.pred.rel "clear", p36				.default					mov ar.lc = r2					mov ar.pfs = r9					mov ar.ec  = r16					mov pr = r17, -1					;;					mov ar.lc = r2					br.ret.sptk.many b0					.endp dequant_inter_ia64#					.ident	"GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"							

相关资源