Прокоментируйте пожалуйста,
16bit fixed point FFT performance with megaAVR @16MHz
;
; Points: Input, FFT, Output, Total: Throughput
; 64pts: .17ms, 1.9ms, 1.4ms, 3.5ms: 18.3kpps (expected)
; 128pts: .34ms, 4.4ms, 2.6ms, 7.3ms: 17.5kpps (measured)
; 256pts: .68ms, 10.1ms, 5.2ms, 16.0ms: 16.0kpps (expected)
; 512pts: 1.4ms, 22.6ms, 10.4ms, 34.4ms: 14.8kpps (expected)
;
; Input: Input waveform into butterfly table with applying window
; FFT: Execute butterfly operations
; Output: Descramble and output the spectrum as scalar values
do_window: ; Fill butterfly table with applying window function
ldiw X, CaptBuf ;Source
ldiw Y, BflyBuf ;Destination
ldiw Z, t_hamming*2 ;Window table
ldi CL, FFT_N
ms_l1: lpmw A, Z+ ;Get a window attenuation ratio
ldw B, X+ ;Get an input value
FMULS16 A, B, T4, T2 ;Apply window
stw Y+, T4 ;Store real and image
stw Y+, T4 ;/
dec CL
brne ms_l1
ret
do_fft: ; Execute butterfly operations (not optimized)
ldi XH, 1 ;u
ldi EL, FFT_N/2 ;l
df_l1: ldiw Z, BflyBuf ;Z = BflyBuf
ldiw Y, BflyBuf ;Y = BflyBuf + l * 4
muli EL, 4 ;
addw Y, T0 ;/
mul AL, XH ;T10L = u * 4
mov T10L, T0L ;/
mov XL, XH ;w = u
df_l2: clr T14L ;p=0
df_l3: lddw A, Z+0 ;A = [Z+0] - [Y+0], [Z+0] += [Y+0]
asrw A ;
movw CL, AL ;B = [Z+2] - [Y+2], [Z+2] += [Y+2]
lddw D, Y+0 ;
asrw D ;
subw A, D ;
addw C, D ;
stw Z+, C ;
lddw B, Z+0 ;
asrw B ;
movw CL, BL ;
lddw D, Y+2 ;
asrw D ;
subw B, D ;
addw C, D ;
stw Z+, C ;/
movw T0L, ZL
movw ZL, T14L ;C=cos(p), D=sin(p)
addiw Z, t_cos_sin*2 ;
lpmw C, Z+ ;
lpmw D, Z+ ;/
movw ZL, T0L
FMULS16 A, C, T4, T2 ;[Y+0] = A * C + B * D
FMULS16 B, D, T8, T6 ;[Y+2] = B * C - A * D
addw T2, T6 ;
adcw T4, T8 ;
stw Y+, T4 ;
FMULS16 B, C, T4, T2 ;
FMULS16 A, D, T8, T6 ;
subw T2, T6 ;
sbcw T4, T8 ;
stw Y+, T4 ;/
add T14L, T10L ;p += u
rjne df_l3
muli EL, 4 ;Y += l * 4, Z += l * 4; (skip split segment)
addw Y, T0 ;
addw Z, T0 ;/
dec XL ;--w
rjne df_l2
lsl XH ;u *= 2
lsr EL ;l /= 2
rjne df_l1
ret
make_bars: ;Get scalar spectrum and update spectrum bar length
ldiw Z, t_desc*2 ;Descramble table
ldiw Y, LvlBuf ;Bar length buffer
ldi CH, FFT_N/2 ;Number of elements
ub_l1: lpmw A, Z+ ;Get a vector value (A = r, B = i)
ldiw X, BflyBuf ;
addw X, A ;
ldw A, X+ ;
ldw B, X+ ;/
FMULS16 A, A, T4, T2 ;A = sqrt(A * A + B *

; 0..32767 (scalar)
FMULS16 B, B, T8, T6 ;
addw T2, T6 ;
adcw T4, T8 ;
SQRT32 ;/
pushw Z ;CL = log(A)
ldi CL, 0 ;
ldiw Z, logs*2 ;
lpmw B, Z+ ;
cpw A, B ;
brcs PC+3 ;
inc CL ;
rjmp PC-6 ;
popw Z ;/
ld AH, Y ;Update bar length with peak holding
subi AH, 2 ; <-peak decay rate
brcs PC+3 ;
cp AH, CL ;
brcc PC+2 ;
mov AH, CL ;
st Y+, AH ;/
dec CH
rjne ub_l1
ret
rfsh_bars: ;Draw spectrum bars
sbrc _Flags, 1 ;Skip refresh if in pause.
ret ;/
ldi DL, LCD_H-8+1 ; (This routine refreshes LCD with bar length
ldiw B, 0 ; list on the LvlBuf[64]. 1st value is DC
rcall lcd_setadr ; component, 2nd value is fundamental freq
rcall ds_hl ; and last value is highest freq)
clr BL
inc BH
subi DL, 8
brcc PC-5
ret
ds_hl: ldiw Y, LvlBuf
ldi CL, FFT_N/2
ld AL, Y+
sub AL, DL
brcc PC+3
ldi AH, 0
rjmp PC+9
ldi AH, 0xFF
cpi AL, 8
brcc PC+6
ldi AH, bit7
subi AL, 1
brcs PC+3
asr AH
rjmp PC-3
mov AL, AH
rcall lcd_put
dec CL
brne PC-16
ret
; These tables must be rebuilt when change FFT_N
t_cos_sin: ; {cos(x),sin(x)} table (0 <= x < pi, in 64 steps)
.dw 32767, 0, 32727, 1607, 32609, 3211, 32412, 4807
.dw 32137, 6392, 31785, 7961, 31356, 9511, 30851, 11038
.dw 30272, 12539, 29621, 14009, 28897, 15446, 28105, 16845
.dw 27244, 18204, 26318, 19519, 25329, 20787, 24278, 22004
.dw 23169, 23169, 22004, 24278, 20787, 25329, 19519, 26318
.dw 18204, 27244, 16845, 28105, 15446, 28897, 14009, 29621
.dw 12539, 30272, 11038, 30851, 9511, 31356, 7961, 31785
.dw 6392, 32137, 4807, 32412, 3211, 32609, 1607, 32727
.dw 0, 32767, -1607, 32727, -3211, 32609, -4807, 32412
.dw -6392, 32137, -7961, 31785, -9511, 31356, -11038, 30851
.dw -12539, 30272, -14009, 29621, -15446, 28897, -16845, 28105
.dw -18204, 27244, -19519, 26318, -20787, 25329, -22004, 24278
.dw -23169, 23169, -24278, 22005, -25329, 20787, -26318, 19519
.dw -27244, 18204, -28105, 16845, -28897, 15446, -29620, 14009
.dw -30272, 12539, -30851, 11038, -31356, 9511, -31784, 7961
.dw -32137, 6392, -32412, 4807, -32609, 3211, -32727, 1607
t_hamming: ; Hamming window (for 128 samples)
.dw 2621, 2639, 2693, 2784, 2910, 3073, 3270, 3502
.dw 3768, 4068, 4401, 4765, 5161, 5587, 6042, 6525
.dw 7036, 7571, 8132, 8715, 9320, 9945, 10588, 11249
.dw 11926, 12616, 13318, 14031, 14753, 15482, 16216, 16954
.dw 17694, 18433, 19171, 19905, 20634, 21356, 22069, 22772
.dw 23462, 24138, 24799, 25443, 26068, 26673, 27256, 27816
.dw 28352, 28862, 29345, 29800, 30226, 30622, 30987, 31319
.dw 31619, 31885, 32117, 32315, 32477, 32603, 32694, 32748
.dw 32767, 32748, 32694, 32603, 32477, 32315, 32117, 31885
.dw 31619, 31319, 30987, 30622, 30226, 29800, 29345, 28862
.dw 28352, 27816, 27256, 26673, 26068, 25443, 24799, 24138
.dw 23462, 22772, 22069, 21356, 20634, 19905, 19171, 18433
.dw 17694, 16954, 16216, 15482, 14753, 14031, 13318, 12616
.dw 11926, 11249, 10588, 9945, 9320, 8715, 8132, 7571
.dw 7036, 6526, 6042, 5587, 5161, 4765, 4401, 4068
.dw 3768, 3502, 3270, 3073, 2910, 2784, 2693, 2639
t_desc: ; Descramble table (for 128 point FFT)
.dw 0*4, 64*4, 32*4, 96*4, 16*4, 80*4, 48*4, 112*4
.dw 8*4, 72*4, 40*4, 104*4, 24*4, 88*4, 56*4, 120*4
.dw 4*4, 68*4, 36*4, 100*4, 20*4, 84*4, 52*4, 116*4
.dw 12*4, 76*4, 44*4, 108*4, 28*4, 92*4, 60*4, 124*4
.dw 2*4, 66*4, 34*4, 98*4, 18*4, 82*4, 50*4, 114*4
.dw 10*4, 74*4, 42*4, 106*4, 26*4, 90*4, 58*4, 122*4
.dw 6*4, 70*4, 38*4, 102*4, 22*4, 86*4, 54*4, 118*4
.dw 14*4, 78*4, 46*4, 110*4, 30*4, 94*4, 62*4, 126*4
Как просчитать єти таблички (в конце), и зачем они?