21,459
社区成员
发帖
与我相关
我的任务
分享
#include <stdio.h>
#include <windows.h>
#define TestCount 10000
DWORD TestD[655360];
//应该总假定是双字串
void AsmMemZero0(unsigned long * p, unsigned long t)
{
__asm
{
xor eax, eax
cld
mov edi, dword ptr [p]
mov edx, t
test edx, 0xFFFFFFFF
je exit0
test edi, 15 //是否16字节对齐
je AsmZero1
mov ecx, edi
and ecx, 0xFFFFFFF0
add ecx, 16
sub ecx, edi //得到需要双字复制部分,要和edx比较谁大
shr ecx, 2 //转换为双字长度
mov ebx, ecx
cmp ecx, edx
ja AsmZero2 //不足,转尾处理
AsmZero01:
mov dword ptr [edi], eax
add edi, 4
sub ecx, 1
jne AsmZero01
sub edx, ebx
je exit0 //是否刚好
AsmZero1: //16字节对齐
mov ecx, edx
shr ecx, 2
je AsmZero2 //是否不足16字节
pxor xmm0, xmm0
AsmZero11:
movdqa [edi], xmm0
add edi, 16
sub ecx, 1
jne AsmZero11
AsmZero2:
mov ecx, edx
and ecx, 15
je exit0
AsmZero3:
mov dword ptr [edi], eax
add edi, 4
sub ecx, 1
jne AsmZero3
exit0:
}
}
//应该总假定是双字串
void AsmMemZero1(unsigned long * p, unsigned long t)
{
__asm
{
xor eax, eax
cld
mov edi, dword ptr [p]
mov edx, t
test edx, 0xFFFFFFFF
je exit0
test edi, 15 //是否16字节对齐
je AsmZero1
mov ecx, edi
and ecx, 0xFFFFFFF0
add ecx, 16
sub ecx, edi //得到需要双字复制部分,要和edx比较谁大
shr ecx, 2 //转换为双字长度
mov ebx, ecx
cmp ecx, edx
ja AsmZero2 //不足,转尾处理
AsmZero01:
mov dword ptr [edi], eax
add edi, 4
sub ecx, 1
jne AsmZero01
sub edx, ebx
je exit0 //是否刚好
AsmZero1: //16字节对齐
mov ecx, edx
pxor xmm0, xmm0
shr ecx, 2
je AsmZero2 //是否不足16字节
AsmZero11:
movntdq [edi], xmm0
add edi, 16
sub ecx, 1
jne AsmZero11
AsmZero2:
mov ecx, edx
and ecx, 15
je exit0
AsmZero3:
mov dword ptr [edi], eax
add edi, 4
sub ecx, 1
jne AsmZero3
exit0:
sfence
}
}
//应该总假定是双字串
void AsmMemZeroALU(unsigned long * p, unsigned long t)
{
__asm
{
xor eax, eax
cld
mov edi, dword ptr [p]
mov ecx, t
test ecx, 0xFFFFFFFF
je exit0
rep stosd
exit0:
}
}
int main(void)
{
UINT64 s_u64Frequency = 1;
UINT64 s_u64Start, s_u64End;
unsigned long i, TestSize;
printf("Input Test Size: ");
scanf("%ld", &TestSize);
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
QueryPerformanceFrequency((LARGE_INTEGER *)&s_u64Frequency );
QueryPerformanceCounter((LARGE_INTEGER *)&s_u64Start );
for (i = 0; i < TestCount; i ++)
AsmMemZero0(&TestD[0], TestSize);
QueryPerformanceCounter((LARGE_INTEGER *)&s_u64End );
printf( "func 0 Elapsed time: %.3f ms\n",
(double)(( s_u64End - s_u64Start ) * 1000.0 / (double)s_u64Frequency ));
QueryPerformanceFrequency((LARGE_INTEGER *)&s_u64Frequency );
QueryPerformanceCounter((LARGE_INTEGER *)&s_u64Start );
for (i = 0; i < TestCount; i ++)
AsmMemZero1(&TestD[0], TestSize);
QueryPerformanceCounter((LARGE_INTEGER *)&s_u64End );
printf( "func 1 Elapsed time: %.3f ms\n",
(double)(( s_u64End - s_u64Start ) * 1000.0 / (double)s_u64Frequency ));
return 0;
}