Попытка написать свой фильтр. Проверьте пожалуйста! //r(n)= b0*x(n) + b1*x(n-1) + b2*x(n-2) - a1*y(n-1) - a2*y(n-2) void DSP_iir51 ( register short *Input, register const short *restrict Coefs, register short *Output, register short *restrict State, register int nCoefs, register short nx ) { register int j, x, nx_cnt, p_0, p_1, y;
register int s_32, s_10, x0b0;
register long long c_4321; register long long s_3210;
/* -------------------------------------------------------------------- */ /* Iterate over the biquads, processing two per iteration. */ /* -------------------------------------------------------------------- */
for(nx_cnt=0; nx_cnt<nx; nx_cnt++) { x = (int) *Input++;
for (j = 0; j < nCoefs; j += 8) { // s_3210 = _amem8(&State[j]); //dbuffer 64bit 4*16 bit y(n-2) y(n-1) x(n-2) x(n-1) c_4321 = _mem8_const(&Coefs[j + 1]); //Coefs 64bit 4*16 bit a2 a1 b2 b1
x0b0 = x * Coefs[j]; // x * b0
s_10 = _loll(s_3210); // low 32 bit 2*16 bit x(n-2) x(n-1) s_32 = _hill(s_3210); // hi 32 bit 2*16 bit y(n-2) y(n-1)
p_0 = _dotp2(_hill(c_4321), s_32); // a2*y(n-2)+a1*y(n-1) p_1 = _dotp2(_loll(c_4321), s_10); // b2*x(n-2)+b1*x(n-1)
x0b0 = _sadd(x0b0, p_0); //x0b0 = b0*x(n) + b1*x(n-1) + b2*x(n-2) x0b0 = _ssub(x0b0, p_1); //x0b0 = x0b0 - ( a1*y(n-1) + a2*y(n-2) ) y = _sshvr(x0b0, 15); //32 -> 16 bit // in ram // x-1 x-2 y-1 y-2 // x s_10h y s_32l // x0b0 // in reg // y-2 y-1 x-2 x-1 _mem8(&State[j]) = _itoll(_pack2(s_32, y), _packhl2(s_10, x)); } *Output++=y; } }
Код на асме выглядит так: _DSP_iir51__FPsPCsN21is: MV .L1X B6,A5 ; |40| MV .L1X B4,A3 ; |40|
MV .L1X B8,A20 ; |40| || MV .L2X A6,B4 ; |40|
ZERO .L1 A19 ; |52| CMPLT .L1 A19,A20,A0 ; |52| [!A0] BNOP .S1 $C$L4,5 ; |52| $C$L1: $C$DW$L$_DSP_iir51__FPsPCsN21is$2$B: LDH .D1T1 *A4++,A18 ; |55| NOP 4 ZERO .L1 A9 ; |57| CMPLT .L1 A9,A8,A0 ; |57| [!A0] BNOP .S1 $C$L3,5 ; |57| $C$DW$L$_DSP_iir51__FPsPCsN21is$2$E: $C$L2: $C$DW$L$_DSP_iir51__FPsPCsN21is$3$B: ADDAH .D1 A5,A9,A6 ; |58| LDDW .D1T1 *A6,A7:A6 ; |58| NOP 4 ADD .L1 1,A9,A16 ; |59| ADD .L1 A16,A16,A16 ; |59| LDNDW .D1T1 *+A16(A3),A17:A16 ; |59| NOP 4 LDH .D1T1 *+A3[A9],A21 ; |61| NOP 4 MPY32 .M1 A21,A18,A21 ; |61| NOP 3 MV .L2X A6,B5 ; |63| DOTP2 .M1 A17,A7,A17 ; |66| NOP 3 DOTP2 .M2X A16,B5,B5 ; |67| NOP 3 SADD .L1 A21,A17,A16 ; |69| NOP 1 SSUB .L2X A16,B5,B5 ; |70| SHR .S2 B5,15,B5 ; |72| NOP 1
PACK2 .L1X A7,B5,A7 ; |79| || PACKHL2 .S1 A6,A18,A6 ; |79| || ADD .D1 A9,A9,A16 ; |79|
STNDW .D1T1 A7:A6,*+A16(A5) ; |79| NOP 2 ADD .L1 8,A9,A9 ; |57| CMPLT .L1 A9,A8,A0 ; |57| [ A0] BNOP .S1 $C$L2,5 ; |57| $C$DW$L$_DSP_iir51__FPsPCsN21is$3$E: ;** --------------------------------------------------------------------------* $C$L3: $C$DW$L$_DSP_iir51__FPsPCsN21is$4$B: STH .D2T2 B5,*B4++ ; |81| NOP 2 ADD .L1 1,A19,A19 ; |52| CMPLT .L1 A19,A20,A0 ; |52| [ A0] BNOP .S1 $C$L1,5 ; |52| $C$DW$L$_DSP_iir51__FPsPCsN21is$4$E: RETNOP .S2 B3,5 ; |83|
что можно изменить - подправить?
|