cog/Libraries/MAC/Files/Source/MACLib/Assembly/Assembly.nas

182 lines
3.9 KiB
Plaintext
Executable File

%include "Tools.inc"
segment_code
;
; void Adapt ( short* pM, const short* pAdapt, int nDirection, int nOrder )
;
; [esp+16] nOrder
; [esp+12] nDirection
; [esp+ 8] pAdapt
; [esp+ 4] pM
; [esp+ 0] Return Address
align 16
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
proc Adapt
mov eax, [esp + 4] ; pM
mov ecx, [esp + 8] ; pAdapt
mov edx, [esp + 16] ; nOrder
shr edx, 4
cmp dword [esp + 12], byte 0 ; nDirection
jle short AdaptSub
AdaptAddLoop:
movq mm0, [eax]
paddw mm0, [ecx]
movq [eax], mm0
movq mm1, [eax + 8]
paddw mm1, [ecx + 8]
movq [eax + 8], mm1
movq mm2, [eax + 16]
paddw mm2, [ecx + 16]
movq [eax + 16], mm2
movq mm3, [eax + 24]
paddw mm3, [ecx + 24]
movq [eax + 24], mm3
add eax, byte 32
add ecx, byte 32
dec edx
jnz AdaptAddLoop
emms
ret
align 16
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
AdaptSub: je short AdaptDone
AdaptSubLoop:
movq mm0, [eax]
psubw mm0, [ecx]
movq [eax], mm0
movq mm1, [eax + 8]
psubw mm1, [ecx + 8]
movq [eax + 8], mm1
movq mm2, [eax + 16]
psubw mm2, [ecx + 16]
movq [eax + 16], mm2
movq mm3, [eax + 24]
psubw mm3, [ecx + 24]
movq [eax + 24], mm3
add eax, byte 32
add ecx, byte 32
dec edx
jnz AdaptSubLoop
emms
AdaptDone:
endproc
;
; int CalculateDotProduct ( const short* pA, const short* pB, int nOrder )
;
; [esp+12] nOrder
; [esp+ 8] pB
; [esp+ 4] pA
; [esp+ 0] Return Address
align 16
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
proc CalculateDotProduct
mov eax, [esp + 4] ; pA
mov ecx, [esp + 8] ; pB
mov edx, [esp + 12] ; nOrder
shr edx, 4
pxor mm7, mm7
loopDot: movq mm0, [eax]
pmaddwd mm0, [ecx]
paddd mm7, mm0
movq mm1, [eax + 8]
pmaddwd mm1, [ecx + 8]
paddd mm7, mm1
movq mm2, [eax + 16]
pmaddwd mm2, [ecx + 16]
paddd mm7, mm2
movq mm3, [eax + 24]
pmaddwd mm3, [ecx + 24]
add eax, byte 32
add ecx, byte 32
paddd mm7, mm3
dec edx
jnz loopDot
movq mm6, mm7
psrlq mm7, 32
paddd mm6, mm7
movd [esp + 4], mm6
emms
mov eax, [esp + 4]
endproc
;
; BOOL GetMMXAvailable ( void );
;
proc GetMMXAvailable
pushad
pushfd
pop eax
mov ecx, eax
xor eax, 0x200000
push eax
popfd
pushfd
pop eax
cmp eax, ecx
jz short return ; no CPUID command, so no MMX
mov eax,1
CPUID
test edx,0x800000
return: popad
setnz al
and eax, byte 1
endproc
end