69,373
社区成员
发帖
与我相关
我的任务
分享
typedef struct {
float x;
float y;
float z;
float w;
} vec4f;
typedef struct {
double x;
double y;
double z;
double w;
} vec4d;
void vec4f_add(vec4f* restrict a, vec4f* restrict b) {
a->x += b->x;
a->y += b->y;
a->z += b->z;
a->w += b->w;
}
void vec4d_add(vec4d* restrict a, vec4d* restrict b) {
a->x += b->x;
a->y += b->y;
a->z += b->z;
a->w += b->w;
}
vec.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <vec4f_add>:
0: c5 f8 10 0f vmovups (%rdi),%xmm1
4: c5 f8 10 06 vmovups (%rsi),%xmm0
8: c5 f0 58 c0 vaddps %xmm0,%xmm1,%xmm0
c: c5 f8 11 07 vmovups %xmm0,(%rdi)
10: c3 retq
11: 66 66 66 66 66 66 2e data32 data32 data32 data32 data32 nopw %cs:0x0(%rax,%rax,1)
18: 0f 1f 84 00 00 00 00
1f: 00
0000000000000020 <vec4d_add>:
20: c5 f9 10 0f vmovupd (%rdi),%xmm1
24: c5 f9 10 06 vmovupd (%rsi),%xmm0
28: c4 e3 75 18 4f 10 01 vinsertf128 $0x1,0x10(%rdi),%ymm1,%ymm1
2f: c4 e3 7d 18 46 10 01 vinsertf128 $0x1,0x10(%rsi),%ymm0,%ymm0
36: c5 f5 58 c0 vaddpd %ymm0,%ymm1,%ymm0
3a: c5 f9 11 07 vmovupd %xmm0,(%rdi)
3e: c4 e3 7d 19 47 10 01 vextractf128 $0x1,%ymm0,0x10(%rdi)
45: c5 f8 77 vzeroupper
48: c3 retq