21,459
社区成员
发帖
与我相关
我的任务
分享
__asm__(
****
: /* no output */
: "X"(this) /* input */
);
__asm__ /*__volatile__*/("movl (%1), %%eax ; \n\t"
"movups (%%ecx), %%xmm4 ; \n\t"
"movups 16(%%ecx), %%xmm5 ; \n\t"
"movups 32(%%ecx), %%xmm6 ; \n\t"
"movups 48(%%ecx), %%xmm7 ; \n\t"
"movss (%%eax), %%xmm0 ; \n\t"
"shufps $0, %%xmm0, %%xmm0 ; \n\t"
"mulps %%xmm4, %%xmm0 ; \n\t"
"movss 4(%%eax), %%xmm1 ; \n\t"
"shufps $0,%%xmm1, %%xmm1 ; \n\t"
"mulps %%xmm5, %%xmm1 ; \n\t"
"movss 8(%%eax), %%xmm2 ; \n\t"
"shufps $0,%%xmm2, %%xmm2 ; \n\t"
"mulps %%xmm6, %%xmm2 ; \n\t"
"movss 12(%%eax), %%xmm3 ; \n\t"
"shufps $0,%%xmm3, %%xmm3 ; \n\t"
"mulps %%xmm7, %%xmm3 ; \n\t"
"lea %0, %%eax ; \n\t"
"addps %%xmm1, %%xmm0 ; \n\t"
"addps %%xmm3, %%xmm2 ; \n\t"
"addps %%xmm2, %%xmm0 ; \n\t"
"movups %%xmm0, (%%eax) ; \n\t"
:"=m"(Result):"a"(&P):"%ecx");
#if ASM_X86
#ifdef __GNUC__
__asm__ /*__volatile__*/("movl (%1), %%eax ; \n\t"
"\n\t"
"movups (%%ecx), %%xmm4 ; \n\t"
"movups 16(%%ecx), %%xmm5 ; \n\t"
"movups 32(%%ecx), %%xmm6 ; \n\t"
"movups 48(%%ecx), %%xmm7 ; \n\t"
"\n\t"
"movss (%%eax), %%xmm0 ; \n\t"
"shufps $0, %%xmm0, %%xmm0 ; \n\t"
"mulps %%xmm4, %%xmm0 ; \n\t"
"\n\t"
"movss 4(%%eax), %%xmm1 ; \n\t"
"shufps $0,%%xmm1, %%xmm1 ; \n\t"
"mulps %%xmm5, %%xmm1 ; \n\t"
"\n\t"
"movss 8(%%eax), %%xmm2 ; \n\t"
"shufps $0,%%xmm2, %%xmm2 ; \n\t"
"mulps %%xmm6, %%xmm2 ; \n\t"
"\n\t"
"movss 12(%%eax), %%xmm3 ; \n\t"
"shufps $0,%%xmm3, %%xmm3 ; \n\t"
"mulps %%xmm7, %%xmm3 ; \n\t"
"\n\t"
"lea (%0), %%eax ; \n\t"
"addps %%xmm1, %%xmm0 ; \n\t"
"addps %%xmm3, %%xmm2 ; \n\t"
"addps %%xmm2, %%xmm0 ; \n\t"
"movups %%xmm0, (%%eax) ; \n\t"
"\n\t"
:"=m"(Result) /* output */
:"a"(&P) /* input */
:"%ecx");
#else
#error Please implement for your compiler.
#endif
//"movl (%2), %%ecx ; \n\t"
#else
Result.X = P.X * M[0][0] + P.Y * M[1][0] + P.Z * M[2][0] + P.W * M[3][0];
Result.Y = P.X * M[0][1] + P.Y * M[1][1] + P.Z * M[2][1] + P.W * M[3][1];
Result.Z = P.X * M[0][2] + P.Y * M[1][2] + P.Z * M[2][2] + P.W * M[3][2];
Result.W = P.X * M[0][3] + P.Y * M[1][3] + P.Z * M[2][3] + P.W * M[3][3];
#endif