64,637
社区成员
发帖
与我相关
我的任务
分享
void HexDump(char *buf,int len,int addr) {
int i,j,k;
char binstr[80];
for (i=0;i<len;i++) {
if (0==(i%16)) {
sprintf(binstr,"%08x -",i+addr);
sprintf(binstr,"%s %02x",binstr,(unsigned char)buf[i]);
} else if (15==(i%16)) {
sprintf(binstr,"%s %02x",binstr,(unsigned char)buf[i]);
sprintf(binstr,"%s ",binstr);
for (j=i-15;j<=i;j++) {
sprintf(binstr,"%s%c",binstr,('!'<buf[j]&&buf[j]<='~')?buf[j]:'.');
}
printf("%s\n",binstr);
} else {
sprintf(binstr,"%s %02x",binstr,(unsigned char)buf[i]);
}
}
if (0!=(i%16)) {
k=16-(i%16);
for (j=0;j<k;j++) {
sprintf(binstr,"%s ",binstr);
}
sprintf(binstr,"%s ",binstr);
k=16-k;
for (j=i-k;j<i;j++) {
sprintf(binstr,"%s%c",binstr,('!'<buf[j]&&buf[j]<='~')?buf[j]:'.');
}
printf("%s\n",binstr);
}
}
memset(a,1,sizeof(a))
memset(a,1,sizeof(a)
#include <windows.h>
//#include <intrin.h>
#include <stdio.h>
#define COUNT 8000
__declspec(align(16)) float data[COUNT];
int i;
unsigned __int64 nCtr2,nCtr1;
int main() {
// for (i=0;i<COUNT;i++) {data[i]=1.0f;printf("%g\n",data[i]);}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
__asm {
push ecx
push edi
mov ecx,COUNT
mov eax,0xBF800000 //-1.0f
lea edi,data
rep stosd
pop edi
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
__asm {
push ecx
mov eax,0xBF800000 //-1.0f
mov ecx,COUNT/80
movd xmm0,eax
lea eax,data
pshufd xmm0,xmm0,0
__loop:
movdqa [eax+64],xmm0
movdqa [eax+48],xmm0
movdqa [eax+32],xmm0
movdqa [eax+16],xmm0
movdqa [eax],xmm0
add eax,80
sub ecx,1
jnz __loop
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
// data[0]=-1.0f;
// __m128 m128=_mm_load_ps1((float *)&data[0]);
// for (i=0;i<COUNT;i+=4) _mm_store_ps1(&data[i],m128);
__asm {
push ecx
mov eax,0xBF800000 //-1.0f
movd xmm0,eax
shufps xmm0,xmm0,0
lea eax,data
mov ecx,eax
add ecx,COUNT*4 //COUNT*sizeof(float)
loop1:
movaps oword ptr [eax],xmm0
add eax,16
cmp eax,ecx
jb loop1
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
// for (i=0;i<COUNT;i++) printf("%g\n",data[i]);
return 0;
}
//36679
//2228
//4806
//
#include <stdio.h>
#include <windows.h>
#include <intrin.h>
#define COUNT 5000000
__declspec(align(16)) float data[COUNT];
unsigned __int64 nCtr2,nCtr1;
int main() {
// for (i=0;i<COUNT;i++) {data[i]=1.0f;printf("%g\n",data[i]);}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
__asm {
push edi
mov ecx,COUNT
mov eax,0xBF800000 //-1.0f
lea edi,data
rep stosd
pop edi
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
__asm {
mov eax,0xBF800000 //-1.0f
mov ecx,COUNT /80
movd xmm0,eax
lea eax,data
pshufd xmm0,xmm0,0
__loop:
movdqa [eax+64],xmm0
movdqa [eax+48],xmm0
movdqa [eax+32],xmm0
movdqa [eax+16],xmm0
movdqa [eax],xmm0
add eax,80
sub ecx,1
jnz __loop
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
data[0]=-1.0f;
__m128 m128=_mm_load_ps1((float *)&data[0]);
for (int i=0;i<COUNT;i+=4) _mm_store_ps1(&data[i],m128);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
// for (i=0;i<COUNT;i++) printf("%g\n",data[i]);
return 0;
}
#include <windows.h>
//#include <intrin.h>
#include <stdio.h>
__declspec(align(16)) float data[500];
int i;
unsigned __int64 nCtr2,nCtr1;
int main() {
// for (i=0;i<500;i++) {data[i]=1.0f;printf("%g\n",data[i]);}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
__asm {
push ecx
push edi
mov ecx,500
mov eax,0xBF800000 //-1.0f
lea edi,data
rep stosd
pop edi
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
__asm {
push ecx
push edi
mov eax,0xBF800000 //-1.0f
mov ecx,25
movd xmm0,eax
lea edi,data
pshufd xmm0,xmm0,0
__loop:
movdqu [edi+64],xmm0
movdqu [edi+48],xmm0
movdqu [edi+32],xmm0
movdqu [edi+16],xmm0
movdqu [edi],xmm0
add edi,80
sub ecx,1
jnz __loop
pop edi
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
// data[0]=-1.0f;
// __m128 m128=_mm_load_ps1((float *)&data[0]);
// for (i=0;i<500;i+=4) _mm_store_ps1(&data[i],m128);
__asm {
push ecx
mov eax,0xBF800000 //-1.0f
movd xmm0,eax
shufps xmm0,xmm0,0
lea eax,data
mov ecx,eax
add ecx,500*4 //500*sizeof(float)
loop1:
movaps oword ptr [eax],xmm0
add eax,16
cmp eax,ecx
jl loop1
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
// for (i=0;i<500;i++) printf("%g\n",data[i]);
return 0;
}
//824
//1107
//648
//
#include <windows.h>
//#include <intrin.h>
#include <stdio.h>
__declspec(align(16)) float data[500];
int i;
unsigned __int64 nCtr2,nCtr1;
int main() {
// for (i=0;i<500;i++) {data[i]=1.0f;printf("%g\n",data[i]);}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
__asm {
push ecx
push edi
mov ecx,500
mov eax,0xBF800000 //-1.0f
lea edi,data
rep stosd
pop edi
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
__asm {
push ecx
push edi
mov eax,0xBF800000 //-1.0f
mov ecx,25
movd xmm0,eax
lea edi,data
pshufd xmm0,xmm0,0
__loop:
movdqu [edi+64],xmm0
movdqu [edi+48],xmm0
movdqu [edi+32],xmm0
movdqu [edi+16],xmm0
movdqu [edi],xmm0
add edi,80
sub ecx,1
jnz __loop
pop edi
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
// data[0]=-1.0f;
// __m128 m128=_mm_load_ps1((float *)&data[0]);
// for (i=0;i<500;i+=4) _mm_store_ps1(&data[i],m128);
__asm {
push ecx
mov eax,0xBF800000 //-1.0f
movd xmm0,eax
shufps xmm0,xmm0,0
lea eax,data
mov ecx,i
loop1:
movaps oword ptr [eax],xmm0
add eax,16
cmp eax,ecx
jl loop1
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
// for (i=0;i<500;i++) printf("%g\n",data[i]);
return 0;
}
//796
//1107
//499
//
#include <windows.h>
#include <intrin.h>
#include <stdio.h>
__declspec(align(16)) float data[500];
int i;
unsigned __int64 nCtr2,nCtr1;
int main() {
// for (i=0;i<500;i++) {data[i]=1.0f;printf("%g\n",data[i]);}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
__asm {
push ecx
push edi
mov ecx,500
mov eax,0xBF800000 //-1.0f
lea edi,data
rep stosd
pop edi
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
__asm {
push ecx
push edi
mov eax,0xBF800000 //-1.0f
mov ecx,25
movd xmm0,eax
lea edi,data
pshufd xmm0,xmm0,0
__loop:
movdqu [edi+64],xmm0
movdqu [edi+48],xmm0
movdqu [edi+32],xmm0
movdqu [edi+16],xmm0
movdqu [edi],xmm0
add edi,80
sub ecx,1
jnz __loop
pop edi
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
data[0]=-1.0f;
__m128 m128=_mm_load_ps1((float *)&data[0]);
for (i=0;i<500;i+=4) _mm_store_ps1(&data[i],m128);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
// for (i=0;i<500;i++) printf("%g\n",data[i]);
return 0;
}
//837
//1067
//823
//
版主是不是该加精啊!#include <windows.h>
#include <stdio.h>
float data[500];
int i;
unsigned __int64 nCtr2,nCtr1;
int main() {
// for (i=0;i<500;i++) {data[i]=1.0f;printf("%g\n",data[i]);}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
__asm {
push ecx
push edi
mov ecx,500
mov eax,0xBF800000 //-1.0f
lea edi,data
rep stosd
pop edi
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr1);
__asm {
push ecx
push edi
mov eax,0xBF800000 //-1.0f
mov ecx,25
movd xmm0,eax
lea edi,data
pshufd xmm0,xmm0,0
__loop:
movdqu [edi+64],xmm0
movdqu [edi+48],xmm0
movdqu [edi+32],xmm0
movdqu [edi+16],xmm0
movdqu [edi],xmm0
add edi,80
sub ecx,1
jnz __loop
pop edi
pop ecx
}
QueryPerformanceCounter((LARGE_INTEGER *) &nCtr2);
printf("%I64u\n",nCtr2-nCtr1);
// for (i=0;i<500;i++) printf("%g\n",data[i]);
return 0;
}
//837
//2592
//
不要迷信书、考题、老师、回帖;
要迷信CPU、编译器、调试器、运行结果。
并请结合“盲人摸太阳”和“驾船出海时一定只带一个指南针。”加以理解。
任何理论、权威、传说、真理、标准、解释、想象、知识……都比不上摆在眼前的事实!