inline函数竟然比堆栈函数跑的慢

hallowwar 2010-11-10 06:33:39
#include <stdio.h>
#include<sys/time.h>

#define ppp_hong(n) ((n)*(n))
extern int ppp(int n);
extern inline int ppp_inline(int n);

int
main(int argc, char *argv[])
{
unsigned run1,run2,run3;
struct timeval start,end,ss,ee;

gettimeofday(&start,NULL);
for(int i=0; i<10; )
{
printf("use ppp_hong :the square of %d is %d \n",i, ppp_inline(i));
i++;
}
gettimeofday(&end,NULL);

run1 = 1000000*(end.tv_sec-start.tv_sec)+end.tv_usec-start.\
tv_usec;
printf("the inline has runned %d \n",run1);

gettimeofday(&ss,NULL);
for(int ii=0; ii<10; )
{
printf("the square of %d is %d \n",ii, ppp(ii));
ii++;
}
gettimeofday(&ee,NULL);

run2 = 1000000*(ee.tv_sec-ss.tv_sec)+ee.tv_usec-ss.\
tv_usec;
printf("the inline has runned %d \n",run2);

printf("the diff is :\n");
printf("\t\tthe inline 's usec is %d \n",run1);
printf("\t\tthe stock 's usec is %d \n",run2);

return 1;
}


输出如下:
[root@localhost contrast]# ./main
use ppp_hong :the square of 0 is 0
use ppp_hong :the square of 1 is 1
use ppp_hong :the square of 2 is 4
use ppp_hong :the square of 3 is 9
use ppp_hong :the square of 4 is 16
use ppp_hong :the square of 5 is 25
use ppp_hong :the square of 6 is 36
use ppp_hong :the square of 7 is 49
use ppp_hong :the square of 8 is 64
use ppp_hong :the square of 9 is 81
the inline has runned 1520
the square of 0 is 0
the square of 1 is 1
the square of 2 is 4
the square of 3 is 9
the square of 4 is 16
the square of 5 is 25
the square of 6 is 36
the square of 7 is 49
the square of 8 is 64
the square of 9 is 81
the inline has runned 1200
the diff is :
the inline 's usec is 1520
the stock 's usec is 1200

inline函数竟然比堆栈函数跑的时间长,很是疑惑,函数长短的我都试了,结果一样。

后来我进行调试。结果让人惊讶:ppp_inline也在堆栈里面了:
(gdb) info stack
#0 ppp_inline (n=1) at /home/hh/work/makefile/tiaoshi/contrast/head.c:9
#1 0x080483be in main (argc=1, argv=0xbfff7fc4) at /home/hh/work/makefile/tiaoshi/contrast/main.c:14
(gdb) step
10 }

大家帮忙。
...全文
1085 20 打赏 收藏 转发到动态 举报
写回复
用AI写文章
20 条回复
切换为时间正序
请发表友善的回复…
发表回复
bluesea87 2010-11-18
  • 打赏
  • 举报
回复
楼主,你要main函数的汇编,inline函数意思是在调用它的地方直接展开,以省去指令跳转,压栈出栈的时间,同时在有cache的cpu下,更容易命中cache中的数据
手机写程序 2010-11-12
  • 打赏
  • 举报
回复
x86 linux下用gcc -O1编译,两个都做了inline的优化.



.text:08048424 public ppp
.text:08048424 ppp proc near ; CODE XREF: main+B0p
.text:08048424
.text:08048424 arg_0 = dword ptr 8
.text:08048424
.text:08048424 push ebp
.text:08048425 mov ebp, esp
.text:08048427 mov eax, [ebp+arg_0]
.text:0804842A imul eax, [ebp+arg_0]
.text:0804842E pop ebp
.text:0804842F retn
.text:0804842F ppp endp
.text:0804842F
.text:08048430
.text:08048430 ; 圹圹圹圹圹圹圹?S U B R O U T I N E 圹圹圹圹圹圹圹圹圹圹圹圹圹圹圹圹圹圹圹?
.text:08048430
.text:08048430 ; Attributes: bp-based frame
.text:08048430
.text:08048430 public ppp_inline
.text:08048430 ppp_inline proc near ; CODE XREF: main+33p
.text:08048430
.text:08048430 arg_0 = dword ptr 8
.text:08048430
.text:08048430 push ebp
.text:08048431 mov ebp, esp
.text:08048433 mov eax, [ebp+arg_0]
.text:08048436 imul eax, [ebp+arg_0]
.text:0804843A pop ebp
.text:0804843B retn
.text:0804843B ppp_inline endp
.text:0804843B
.text:0804843C
.text:0804843C ; 圹圹圹圹圹圹圹?S U B R O U T I N E 圹圹圹圹圹圹圹圹圹圹圹圹圹圹圹圹圹圹圹?
.text:0804843C
.text:0804843C ; Attributes: bp-based frame
.text:0804843C
.text:0804843C public main
.text:0804843C main proc near ; DATA XREF: _start+17o
.text:0804843C
.text:0804843C var_60 = dword ptr -60h
.text:0804843C var_5C = dword ptr -5Ch
.text:0804843C var_38 = dword ptr -38h
.text:0804843C var_34 = dword ptr -34h
.text:0804843C var_30 = dword ptr -30h
.text:0804843C var_2C = dword ptr -2Ch
.text:0804843C var_28 = dword ptr -28h
.text:0804843C var_24 = dword ptr -24h
.text:0804843C var_20 = dword ptr -20h
.text:0804843C var_1C = dword ptr -1Ch
.text:0804843C var_18 = dword ptr -18h
.text:0804843C var_14 = dword ptr -14h
.text:0804843C var_C = dword ptr -0Ch
.text:0804843C var_8 = dword ptr -8
.text:0804843C arg_0 = dword ptr 4
.text:0804843C
.text:0804843C lea ecx, [esp+arg_0]
.text:08048440 and esp, 0FFFFFFF0h
.text:08048443 push dword ptr [ecx-4]
.text:08048446 push ebp
.text:08048447 mov ebp, esp
.text:08048449 push ecx
.text:0804844A sub esp, 54h ; char *
.text:0804844D mov [esp+60h+var_5C], 0
.text:08048455 lea eax, [ebp+var_20]
.text:08048458 mov [esp+60h+var_60], eax
.text:0804845B call _gettimeofday
.text:08048460 mov [ebp+var_C], 0
.text:08048467 jmp short loc_8048478
.text:08048469 ; 哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪?
.text:08048469
.text:08048469 loc_8048469: ; CODE XREF: main+43j
.text:08048469 mov eax, [ebp+var_C]
.text:0804846C mov [esp+60h+var_60], eax
.text:0804846F call ppp_inline
.text:08048474 add [ebp+var_C], 1
.text:08048478
.text:08048478 loc_8048478: ; CODE XREF: main+2Bj
.text:08048478 cmp [ebp+var_C], 0F423Fh
.text:0804847F jle short loc_8048469
.text:08048481 mov [esp+60h+var_5C], 0
.text:08048489 lea eax, [ebp+var_28]
.text:0804848C mov [esp+60h+var_60], eax
.text:0804848F call _gettimeofday
.text:08048494 mov edx, [ebp+var_28]
.text:08048497 mov eax, [ebp+var_20]
.text:0804849A mov ecx, edx
.text:0804849C sub ecx, eax
.text:0804849E mov eax, ecx
.text:080484A0 imul edx, eax, 0F4240h
.text:080484A6 mov eax, [ebp+var_24]
.text:080484A9 add edx, eax
.text:080484AB mov eax, [ebp+var_1C]
.text:080484AE mov ecx, edx
.text:080484B0 sub ecx, eax
.text:080484B2 mov eax, ecx
.text:080484B4 mov [ebp+var_18], eax
.text:080484B7 mov eax, [ebp+var_18]
.text:080484BA mov [esp+60h+var_5C], eax
.text:080484BE mov [esp+60h+var_60], offset aTheInlineHasRu ; "the inline has runned %d \n"
.text:080484C5 call _printf
.text:080484CA mov [esp+60h+var_5C], 0
.text:080484D2 lea eax, [ebp+var_30]
.text:080484D5 mov [esp+60h+var_60], eax
.text:080484D8 call _gettimeofday
.text:080484DD mov [ebp+var_8], 0
.text:080484E4 jmp short loc_80484F5
.text:080484E6 ; 哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪哪?
.text:080484E6
.text:080484E6 loc_80484E6: ; CODE XREF: main+C0j
.text:080484E6 mov eax, [ebp+var_8]
.text:080484E9 mov [esp+60h+var_60], eax
.text:080484EC call ppp
.text:080484F1 add [ebp+var_8], 1
.text:080484F5
.text:080484F5 loc_80484F5: ; CODE XREF: main+A8j
.text:080484F5 cmp [ebp+var_8], 0F423Fh
.text:080484FC jle short loc_80484E6
.text:080484FE mov [esp+60h+var_5C], 0
.text:08048506 lea eax, [ebp+var_38]
.text:08048509 mov [esp+60h+var_60], eax
.text:0804850C call _gettimeofday
.text:08048511 mov edx, [ebp+var_38]
.text:08048514 mov eax, [ebp+var_30]
.text:08048517 mov ecx, edx
.text:08048519 sub ecx, eax
.text:0804851B mov eax, ecx
.text:0804851D imul edx, eax, 0F4240h
.text:08048523 mov eax, [ebp+var_34]
.text:08048526 add edx, eax
.text:08048528 mov eax, [ebp+var_2C]
.text:0804852B mov ecx, edx
.text:0804852D sub ecx, eax
.text:0804852F mov eax, ecx
.text:08048531 mov [ebp+var_14], eax
.text:08048534 mov eax, [ebp+var_14]
.text:08048537 mov [esp+60h+var_5C], eax
.text:0804853B mov [esp+60h+var_60], offset aTheInlineHasRu ; "the inline has runned %d \n"
.text:08048542 call _printf



手机写程序 2010-11-12
  • 打赏
  • 举报
回复
[Quote=引用 16 楼 hallowwar 的回复:]
引用 15 楼 eyey1 的回复:

引用 13 楼 hallowwar 的回复:
引用 12 楼 eyey1 的回复:

帮你在我的板子上跑了下.

the inline has runned 7
the inline has runned 656
the diff is :
the inline 's usec is 7
the stock 's usec is 656……
[/Quote]
我的工程有点庞大,不好找的.
手机写程序 2010-11-12
  • 打赏
  • 举报
回复
[Quote=引用 14 楼 hallowwar 的回复:]
以上的汇编刚才搞错了, return ((n)*(n)) 的两个汇编应该是这样的:
(gdb) disassemble ppp
Dump of assembler code for function ppp:
0x08048380 <ppp+0>: push %ebp
0x08048381 <ppp+1>: mov %esp,%ebp
0x08048383 <ppp+3>: mov 0x……
[/Quote]
还是一样嘛,但看到了乘法指令.建议你直间用gcc编译,不要用gdb.如果发汇编,建议用IDA pro free.
hallowwar 2010-11-12
  • 打赏
  • 举报
回复
[Quote=引用 15 楼 eyey1 的回复:]

引用 13 楼 hallowwar 的回复:
引用 12 楼 eyey1 的回复:

帮你在我的板子上跑了下.

the inline has runned 7
the inline has runned 656
the diff is :
the inline 's usec is 7
the stock 's usec is 656
很明显inline起作用了,你不要用G……
[/Quote]

把你的两个汇编函数贴上来看看。
手机写程序 2010-11-12
  • 打赏
  • 举报
回复
[Quote=引用 13 楼 hallowwar 的回复:]
引用 12 楼 eyey1 的回复:

帮你在我的板子上跑了下.

the inline has runned 7
the inline has runned 656
the diff is :
the inline 's usec is 7
the stock 's usec is 656
很明显inline起作用了,你不要用GDB试下.

简直不可思议啊,我的不论是在板子……
[/Quote]

哪里都没改,我用的是QT,我们从不用gdb,把你的代码插进来而已.我的板子是ARM11,400MHZ.代码还是这些.


#include<sys/time.h>

int ppp(int n)
{
return ((n)*(n));
}

inline int ppp_inline(int n)
{
return ((n)*(n));
}

void XXXXXXX::run()
{

unsigned run1,run2,run3;
struct timeval start,end,ss,ee;

gettimeofday(&start,NULL);
for(int i=0; i<10000; )//这里
{
ppp_inline(i);//这里
i++;
}
gettimeofday(&end,NULL);

run1 = 1000000*(end.tv_sec-start.tv_sec)+end.tv_usec-start.\
tv_usec;
printf("the inline has runned %d \n",run1);

gettimeofday(&ss,NULL);
for(int ii=0; ii<10000; )//这里
{
ppp(ii);//这里
ii++;
}
gettimeofday(&ee,NULL);

run2 = 1000000*(ee.tv_sec-ss.tv_sec)+ee.tv_usec-ss.\
tv_usec;
printf("the inline has runned %d \n",run2);

printf("the diff is :\n");
printf("\t\tthe inline 's usec is %d \n",run1);
printf("\t\tthe stock 's usec is %d \n",run2);

}




hallowwar 2010-11-12
  • 打赏
  • 举报
回复
以上的汇编刚才搞错了, return ((n)*(n)) 的两个汇编应该是这样的:
(gdb) disassemble ppp
Dump of assembler code for function ppp:
0x08048380 <ppp+0>: push %ebp
0x08048381 <ppp+1>: mov %esp,%ebp
0x08048383 <ppp+3>: mov 0x8(%ebp),%eax
0x08048386 <ppp+6>: imul 0x8(%ebp),%eax
0x0804838a <ppp+10>: leave
0x0804838b <ppp+11>: ret
End of assembler dump.
(gdb) disassemble ppp_inline
Dump of assembler code for function ppp_inline:
0x0804838c <ppp_inline+0>: push %ebp
0x0804838d <ppp_inline+1>: mov %esp,%ebp
0x0804838f <ppp_inline+3>: mov 0x8(%ebp),%eax
0x08048392 <ppp_inline+6>: imul 0x8(%ebp),%eax
0x08048396 <ppp_inline+10>: leave
0x08048397 <ppp_inline+11>: ret
End of assembler dump.
hallowwar 2010-11-12
  • 打赏
  • 举报
回复
[Quote=引用 12 楼 eyey1 的回复:]

帮你在我的板子上跑了下.

the inline has runned 7
the inline has runned 656
the diff is :
the inline 's usec is 7
the stock 's usec is 656
很明显inline起作用了,你不要用GDB试下.
[/Quote]
简直不可思议啊,我的不论是在板子还是在电脑跑都没有你这种效果,你改动了哪里?
手机写程序 2010-11-12
  • 打赏
  • 举报
回复
帮你在我的板子上跑了下.

the inline has runned 7
the inline has runned 656
the diff is :
the inline 's usec is 7
the stock 's usec is 656
很明显inline起作用了,你不要用GDB试下.
手机写程序 2010-11-12
  • 打赏
  • 举报
回复
汇编也不是很懂,但你给的结果很明显两个都没inline进去啊,是用GDB,而GDB不做优化的缘故吧.
hallowwar 2010-11-12
  • 打赏
  • 举报
回复
还是那样,汇编我是看不懂,哪位自己跑出合理的结果来看看啊。
hallowwar 2010-11-12
  • 打赏
  • 举报
回复
改到1000----------------------------------------------
the diff is :
the inline 's usec is 12520060
the stock 's usec is 6285936
改到100000----------------------------------------------
the diff is :
the inline 's usec is 625607655
the stock 's usec is 625252782


(gdb) disassemble ppp
Dump of assembler code for function ppp:
0x0804851c <ppp+0>: push %ebp
0x0804851d <ppp+1>: mov %esp,%ebp
0x0804851f <ppp+3>: sub $0x8,%esp
0x08048522 <ppp+6>: movl $0x0,0xfffffffc(%ebp)
0x08048529 <ppp+13>: mov 0xfffffffc(%ebp),%eax
0x0804852c <ppp+16>: cmp 0x8(%ebp),%eax
0x0804852f <ppp+19>: jl 0x8048533 <ppp+23>
0x08048531 <ppp+21>: jmp 0x804854d <ppp+49>
0x08048533 <ppp+23>: sub $0x8,%esp
0x08048536 <ppp+26>: pushl 0xfffffffc(%ebp)
0x08048539 <ppp+29>: push $0x80486bb
0x0804853e <ppp+34>: call 0x80482b4
0x08048543 <ppp+39>: add $0x10,%esp
0x08048546 <ppp+42>: lea 0xfffffffc(%ebp),%eax
0x08048549 <ppp+45>: incl (%eax)
0x0804854b <ppp+47>: jmp 0x8048529 <ppp+13>
0x0804854d <ppp+49>: leave
0x0804854e <ppp+50>: ret
End of assembler dump.
(gdb) disassemble ppp_inline
Dump of assembler code for function ppp_inline:
0x0804854f <ppp_inline+0>: push %ebp
0x08048550 <ppp_inline+1>: mov %esp,%ebp
0x08048552 <ppp_inline+3>: sub $0x8,%esp
0x08048555 <ppp_inline+6>: movl $0x0,0xfffffffc(%ebp)
0x0804855c <ppp_inline+13>: mov 0xfffffffc(%ebp),%eax
0x0804855f <ppp_inline+16>: cmp 0x8(%ebp),%eax
0x08048562 <ppp_inline+19>: jl 0x8048566 <ppp_inline+23>
0x08048564 <ppp_inline+21>: jmp 0x8048580 <ppp_inline+49>
0x08048566 <ppp_inline+23>: sub $0x8,%esp
0x08048569 <ppp_inline+26>: pushl 0xfffffffc(%ebp)
0x0804856c <ppp_inline+29>: push $0x80486bb
0x08048571 <ppp_inline+34>: call 0x80482b4
0x08048576 <ppp_inline+39>: add $0x10,%esp
0x08048579 <ppp_inline+42>: lea 0xfffffffc(%ebp),%eax
0x0804857c <ppp_inline+45>: incl (%eax)
0x0804857e <ppp_inline+47>: jmp 0x804855c <ppp_inline+13>
0x08048580 <ppp_inline+49>: leave
0x08048581 <ppp_inline+50>: ret
0x08048582 <ppp_inline+51>: nop
0x08048583 <ppp_inline+52>: nop
heke_ken 2010-11-11
  • 打赏
  • 举报
回复
LZ最好把编译器生成的汇编代码拿来分析下
按照正常情况,inline函数会增大程序的体积,尤其在循环中,如果编译器不够智能,是会为你做出很多重复代码的,堆栈调用无非就是压栈出栈的过程,常理,inline是为了空间换时间,速度应该是比堆栈调用快,但LZ出现的这种情况要具体分析,时间是耗费在哪个阶段,是系统调用时间还是用户时间
hallowwar 2010-11-11
  • 打赏
  • 举报
回复
楼上所言两点我都更正了,结果还是没变。
CFLAGS=-c -O -Wall -std=c99
手机写程序 2010-11-11
  • 打赏
  • 举报
回复
[Quote=引用 6 楼 yjpcn 的回复:]
lz的测试方法一点都不严谨!!!!!!!!!!
[/Quote]
同意.
手机写程序 2010-11-11
  • 打赏
  • 举报
回复
改几个地方:

int
main(int argc, char *argv[])
{
unsigned run1,run2,run3;
struct timeval start,end,ss,ee;

gettimeofday(&start,NULL);
for(int i=0; i<10000; )//这里
{
ppp_inline(i);//这里
i++;
}
gettimeofday(&end,NULL);

run1 = 1000000*(end.tv_sec-start.tv_sec)+end.tv_usec-start.\
tv_usec;
printf("the inline has runned %d \n",run1);

gettimeofday(&ss,NULL);
for(int ii=0; ii<10000; )//这里
{
ppp(ii);//这里
ii++;
}
gettimeofday(&ee,NULL);

run2 = 1000000*(ee.tv_sec-ss.tv_sec)+ee.tv_usec-ss.\
tv_usec;
printf("the inline has runned %d \n",run2);

printf("the diff is :\n");
printf("\t\tthe inline 's usec is %d \n",run1);
printf("\t\tthe stock 's usec is %d \n",run2);

return 1;
}


还有,有时候编译器会把不加inline的函数编译成inline.也可能加inline也没用.
yjpcn 2010-11-11
  • 打赏
  • 举报
回复

lz的测试方法一点都不严谨!!!!!!!!!!
bluesea87 2010-11-10
  • 打赏
  • 举报
回复
还有定义内联函数必须定义(实现体)在第一次被调用之前,结合以上(printf打印的长度)两点,就可以验证你的结果。
bluesea87 2010-11-10
  • 打赏
  • 举报
回复
printf("use ppp_hong :the square of %d is %d \n",i, ppp_inline(i));

printf("the square of %d is %d \n",ii, ppp(ii));

这个两条,打印长短差这么多,当然前面花的时间就大很多
hallowwar 2010-11-10
  • 打赏
  • 举报
回复
int  ppp(int n)
{
return ((n)*(n));
}

inline int ppp_inline(int n)
{
return ((n)*(n));
}
两个外部函数。

21,595

社区成员

发帖
与我相关
我的任务
社区描述
硬件/嵌入开发 驱动开发/核心开发
社区管理员
  • 驱动开发/核心开发社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧