64,636
社区成员
发帖
与我相关
我的任务
分享
#include <Windows.h>
#include <emmintrin.h>
int add(int a,int b)
{
int c;
__asm
{
push eax;
mov eax,[a];
add eax,[b];
mov [c],eax;
pop eax;
}
return c;
}
void mymemcpy(void* pDes, void* pSrc,int nByte)
{
__asm
{
push eax;
push ebx;
push ecx;
mov eax,pDes;
mov ebx,pSrc;
mov ecx,nByte
sub ecx,80h;
loop1:
movdqa xmm0,xmmword ptr[eax];
movdqa xmm1,xmmword ptr[eax];
movdqa xmm2,xmmword ptr[eax];
movdqa xmm3,xmmword ptr[eax];
movdqa xmm4,xmmword ptr[eax];
movdqa xmm5,xmmword ptr[eax];
movdqa xmm6,xmmword ptr[eax];
movdqa xmm7,xmmword ptr[eax];
movdqa xmmword ptr[ebx],xmm1;
movdqa xmmword ptr[ebx],xmm2;
movdqa xmmword ptr[ebx],xmm3;
movdqa xmmword ptr[ebx],xmm4;
movdqa xmmword ptr[ebx],xmm5;
movdqa xmmword ptr[ebx],xmm6;
movdqa xmmword ptr[ebx],xmm7;
add eax,80h;
add ebx,80h;
sub ecx,80h;
jnb loop1;
pop ecx;
pop ebx;
pop eax;
}
}
void* CALLBACK TP_Memcpy(IN OUT void* pDest,
IN const void* pSrc,
IN int nSize)
{
if(nSize==0||pSrc==NULL || pDest==NULL)
return NULL;
void *pTemp = pDest;
__asm
{
MOV EDX, pSrc
MOV EBX, pTemp
Mov EAX, nSize
neg eax // EAX取相反数,CF标识其是否为0
add eax, 63
jbe skipblastloop // CF或ZF为1时跳转
blastloop:
movq mm0, [edx] // 一次8×8=64个字节
movq mm1, [edx+8]
movq mm2, [edx+16]
movq mm3, [edx+24]
movq mm4, [edx+32]
movq mm5, [edx+40]
movq mm6, [edx+48]
movq mm7, [edx+56]
movntq [ebx], mm0
movntq [ebx+8], mm1
movntq [ebx+16], mm2
movntq [ebx+24], mm3
movntq [ebx+32], mm4
movntq [ebx+40], mm5
movntq [ebx+48], mm6
movntq [ebx+56], mm7
add ebx, 64
add edx, 64
add eax, 64
jnc blastloop // 没有进位(CF=0)则跳转
skipblastloop:
sub eax, 63-7
jns noextras // SF=0
quadloop:
movq mm0, [edx] // 一次8个字节
movntq [ebx], mm0
add edx, 8
add ebx, 8
add eax, 8
jnc quadloop
noextras:
sub eax, 7 // 一次1个字节
jz nooddballs
mov ecx, eax
neg ecx
mov esi, edx
mov edi, ebx
rep movsb // 重复复制1个字节,ECX次,从ESI到EDI
nooddballs:
EMMS
}
return pDest;
}
int _tmain(int argc, _TCHAR* argv[])
{
char* p=(char*)_mm_malloc(1024*1024*500,16);
unsigned int t1= GetTickCount();
mymemcpy(p,p,1024*1024*500);
t1=GetTickCount()-t1;
printf("%u\n",t1);
t1= GetTickCount();
memcpy(p,p,1024*1024*500);
t1=GetTickCount()-t1;
printf("%u\n",t1);
t1= GetTickCount();
TP_Memcpy(p,p,1024*1024*500);
t1=GetTickCount()-t1;
printf("%u\n",t1);
_mm_free(p);
getchar();
return 0;
}