21,458
社区成员
发帖
与我相关
我的任务
分享
typedef struct SIMDx86Matrix
{
float m[16];
} SIMDx86Matrix;
void SIMDx86Matrix_AlignedSum(SIMDx86Matrix* Out, const SIMDx86Matrix* In)
#if 0
// 下面是AT&T的汇编代码, 用了SSE指令
asm(
"movaps (%0), %%xmm0\n"
"movaps 16(%0), %%xmm1\n"
"movaps 32(%0), %%xmm2\n"
"movaps 48(%0), %%xmm3\n"
"addps (%1), %%xmm0\n"
"addps 16(%1), %%xmm1\n"
"addps 32(%1), %%xmm2\n"
"addps 48(%1), %%xmm3\n"
"movaps %%xmm0, (%0)\n"
"movaps %%xmm1, 16(%0)\n"
"movaps %%xmm2, 32(%0)\n"
"movaps %%xmm3, 48(%0)\n"
:
: "r" (Out), "r" (In)
);
#else
//这个是我转的INTEL的
__asm {
movaps xmm0, xmmword ptr [Out];
movaps xmm1, xmmword ptr [Out + 16];
movaps xmm2, xmmword ptr [Out + 32];
movaps xmm3, xmmword ptr [Out + 48];
addps xmm0, xmmword ptr [In];
addps xmm1, xmmword ptr [In + 16];
addps xmm2, xmmword ptr [In + 32];
addps xmm3, xmmword ptr [In + 48];
movaps xmmword ptr [Out], xmm0;
movaps xmmword ptr [Out + 16], xmm1;
movaps xmmword ptr [Out + 32], xmm2;
movaps xmmword ptr [Out + 48], xmm3;
}
#endif
// 以下省略