16,748
社区成员
发帖
与我相关
我的任务
分享
SSE优化算法:
function CGPosCharSSE(SubChar: Char ; SrcString: PChar; Len: Integer; Order:Boolean=True): Integer;
// SubChar -> AL; SrcString -> EDX; Len -> ECX Order -> [ebp+8]
asm
push esi
push ebx
test ecx, ecx
jz @NotFound
test edx, edx
jz @NotFound
xor esi,esi
mov ah,al
movd xmm1, eax
pshuflw xmm1, xmm1, 0
pshufd xmm1, xmm1, 0
mov eax, [ebp+8]
test eax,eax //为0则表示Order =false
je @Reverse //为0倒序查找
{---------------顺序查找------------------}
@OrderCmp:
movups xmm0,[edx+esi]
pcmpeqb xmm0, xmm1
pmovmskb eax, xmm0
test eax, eax
jnz @OrderFound
add esi,$10
cmp esi,ecx
jl @OrderCmp
jmp @Notfound
{---------------倒序查找------------------}
@Reverse:
mov esi,ecx
sub esi,$10
@ReverseCmp:
movups xmm0,[edx+esi]
pcmpeqb xmm0, xmm1
pmovmskb eax, xmm0
test eax, eax
jnz @ReverseFound
sub esi,$10
cmp esi,-$10
jl @ReverseCmp
@NotFound:
xor eax, eax
jmp @Exit
@OrderFound:
bsf eax, eax
jmp @SetRet
@ReverseFound:
bsr eax, eax
@SetRet:
add eax,esi
add eax,1
cmp eax,ecx
jg @Notfound //越界大于长度
cmp eax,1
jl @Notfound //越界小于1
@Exit:
pop ebx
pop esi
end;
常规优化算法:
function CGPosChar(SubChar: Char ; SrcString: PChar; Len: Integer; Order:Boolean=True): Integer;
// SubChar -> AL; SrcString -> EDX; Len -> ECX Order -> [ebp+8]
asm
push esi
push ebx
push edx
push edi
test ecx, ecx
jz @Notfound
test edx, edx
jz @Notfound
xor ebx,ebx
mov ah, al
mov bx, ax
shl eax, $10
or ebx, eax
xor esi,esi
mov eax, [ebp+8]
test eax,eax //为0则表示Order =false
je @Reverse //为0倒序查找
{---------------顺序查找------------------}
@OrderCmp:
mov eax,[edx+esi]
xor eax,ebx
lea edi, [eax-$01010101]
not eax
and eax, edi
and eax, $80808080
jnz @OrderFound
add esi,4
cmp esi,ecx
jl @OrderCmp
jmp @Notfound
{---------------倒序查找------------------}
@Reverse:
mov esi,ecx
sub esi,4
@ReverseCmp:
mov eax,[edx+esi]
xor eax,ebx
lea edi, [eax-$01010101]
not eax
and eax, edi
and eax, $80808080
jnz @ReverseFound
sub esi,4
cmp esi,-4
jg @ReverseCmp
@Notfound:
xor eax, eax
jmp @Exit
@OrderFound:
bsf eax, eax
jmp @SetRet
@ReverseFound:
bsr eax, eax
@SetRet:
shr eax, 3
add eax,esi
add eax,1
cmp eax,ecx
jg @Notfound //越界大于长度
cmp eax,1
jl @Notfound //越界小于1
@Exit:
pop edi
pop edx
pop ebx
pop esi
end;
CPU SSE支持检测函数:
function CheckSupportSSE(SupportFlag: Byte): Boolean;
// SupportFlag in:[1-6] is check SSE1,SSE2,SSE3,SSSE3,SSE41,SSE42.
function GetCpuId: DWORD;
asm
push ecx
push edx
mov eax,1
cpuid
mov eax,edx //RetValue
pop edx
pop ecx
end;
const
_SSE_FLAG: array[0..5] of DWORD = ($2000000, $4000000, 1, $200, $80000, $100000);
var
_Flag: DWORD;
begin
Result := False;
_Flag := GetCpuId;
Result:= (_SSE_FLAG[SupportFlag] and _Flag) <> 0;
end;