《基于TI DSP的通用算法实现》程序代码

源代码在线查看: example 2-4.asm

软件大小: 223.37K
上传用户: yueyan51
关键词: DSP 算法 程序 代码
下载地址: 免注册下载 普通下载 VIP

相关代码

				
				;Example 2 - 4. Unsigned Fixed-Point Division ASM Subroutine for C6x
				
				*==============================================================================
				*
				* TEXAS INSTRUMENTS, INC.
				* DIVMODU32 (32 bits unsigned division and modulo)
				* Revision Date: 07/15/97
				*
				* USAGE
				* 	This routine is C Callable and can be called as:
				*
				* 	struct divmodu divmodu32(unsigned int a, unsigned int b);
				*
				* 	a ––– unsigned numerator
				* 	b ––– unsigned denominator
				*
				* 	If routine is not to be used as a C callable function then
				* 	you need to initialize values for all of the values passed
				* 	as these are assumed to be in registers as defined by the
				* 	calling convention of the compiler, (refer to the C compiler
				* 	reference guide).
				*
				* C CODE
				* 	This is the C equivalent of the assembly code. Note that
				* 	the assembly code is hand optimized and restrictions may
				* 	apply.
				**
				* 	struct divmodu {
				* 		unsigned int div;
				* 		unsigned int mod;
				* 	};
				**
				* 	struct divmodu divmodu32(unsigned int a, unsigned int b)
				* 	{
				* 		struct divmodu tmp;
				*
				* 		tmp.div = a / b;
				* 		tmp.mod = a % b;
				*
				* 		return tmp;
				* 	}
				*
				* DESCRIPTION
				* 	This routine divides two unsigned 32 bit values and returns
				* 	their quotient and remainder. The inputs are unsigned 32–bit
				* 	numbers, and the result is a unsigned 32–bit number.
				*
				* TECHNIQUE
				* 	The loop is executed at least 6 times. In the loop, the
				* 	conditional subtract divide step (SUBC) is block from doing
				* 	extraneous executions. In short, the SUBC instruction
				* 	is conditional and will not necessarily be executed.
				*
				* MEMORY NOTE
				* 	No memory bank hits under any conditions.
				*
				* CYCLES
				* 	Minimum execution time –> 18 cycles
				* 	Maximum execution time –> 42 cycles
				*
				*==============================================================================
				
					.global _divmodu32
					.text
				
				_divmodu32:
				
				*** BEGIN Benchmark Timing ***
				B_START:
					LMBD 	.L2X 	1, A4, B1 			; mag_num = lmbd(1, num)
				|| 	LMBD 	.L1X 	1, B4, A1 			; mag_den = lmbd(1, den)
				|| 	MVK 	.S1 	32, A0 				; const 32
				|| 	ZERO 	.D1 	A8 				; first_div = 1
					CMPGTU 	.L1X 	B4, A4, A1 		; zero = (den > num)
				|| 	SUB 	.L2X 	A1, B1, B0 		; i = mag_den – mag_num
				|| 	MV 	.D1 	A4, A5 			; save num
				||[!B1] MVK 	.S1 	1, A8 			; if (num32) first_div = 1
					SHL 	.S2 	B4, B0, B4 		; den 				||[B1] 	ADD 	.D2 	B0, 1, B0 		; if (!num32) i++
				|| 	MV 		B0, A6
					CMPGTU 	.L2X 	B4, A4, B2 		; gt = den > num
				|| 	SUB 	.L1X 	A0, B0, A0 		; qs = 32 – i
				|| 	SHL 	.S1 	A8, A6, A8 		; first_div 				|| 	B 	.S2 	LOOP 			;
				||[B1] 	MPY 	.M2 	B2, 0, B2 		; num32 && gt
					ADD 	.L1X 	0, B0, A2
				||[B2] 	MV 	.D2 	B2, B1 			; !(num32 && !gt)
				||[B2] 	SHRU 	.S1 	A8, 1, A8 		; first_div >>= 1
				|| 	B 	.S2 	LOOP ;
				  [B2] 	SHRU 	.S2 	B4, 1, B4 		; if (num32 && gt) den >> 1
				||[!B1] SUB 	.L1X 	A4, B4, A4 		; if (num32 && !gt) num –= den
				||[B0] 	SUB 	.D2 	B0, 1, B0 		; i––
				|| 	B 	.S1 	LOOP 			;
				[!B1] 	SHRU 	.S2 	B4, 1, B4 		; if (num32 && !gt) den >> 1
				||[B2] 	SUB 	.L1X 	A4, B4, A4 		; if (num32 && gt) num –= den
				|| 	CMPLT 	.L2 	B0, 6, B2 		; check for neg. loop counter
				|| 	SUB 	.D2 	B0, 6, B1 		; generate loop counter
				|| 	B 	.S1 	LOOP 			;
				  [B2] 	ZERO 	.L2 	B1 			; zero negative loop counter
				||[A2] 	SUBC 	.L1X 	A4, B4, A4 		; num = subc(num, den)
				|| 	B 	.S2 	LOOP 			;
				
				LOOP:
				  [B0] 	SUBC 	.L1X 	A4, B4, A4 		; num = subc(num, den)
				||[B0] 	SUB 	.L2 	B0, 1, B0 		; i––
				||[B1] 	SUB 	.D2 	B1, 1, B1 		; i––
				||[B1] 	B 	.S1 	LOOP			; for
				;end of LOOP
				
					ADD 	.L2 	A3, 4, B7 		; address for mod result
				||[!A1] SHL 	.S1 	A4, A0, A6 		; q = num 				||[A1] 	MPY 	.M1 	0, A6, A6 		; if (zero) q = zero
				|| 	B 	.S2 	B3
				  [!A1] SHRU 	.S1	A6, A0, A6 		; q = num >> qs
				||[A1] 	MV 	.L1 	A5, A2 			; if (zero) mod = num
				|| 	MV 		A8, B5 			;
					ADD 	.L2X 	A6, B5, B8 		;
				||[!A1] SHRU 	.S1 	A4, A2, A2 		; mod = n >> ms
					STW 	.D1 	B8, *A3++ 		; c[2 * i] = q
				|| 	STW 	.D2 	A2, *B7++ 		; c[2 * i + 1] = mod
				B_END:
				*** END Benchmark Timing ***
				NOP 2			

相关资源