又求解释:

iamnobody 2011-10-13 03:04:22
还是做了一个strlen()函数的测试:
my::strlen() PK std::strlen();
问题1:

我用内联汇编用scas指令写的函数用时竟然稳定地保持在std::strlen()“优化”板本的三倍,他的“优化”后的核心代码是:


01181040 mov cl,byte ptr [eax]
01181042 inc eax
01181043 test cl,cl
01181045 jne main+40h (1181040h)


然后我用内联汇编写:



size_t strlen(char* pch){

char* pcrr;
_asm{
mov edi,pch
xor ecx,ecx
cld
dec ecx
xor eax,eax
repne scasb
mov pcrr,edi

}
return pcrr - pch;
}

优化后也差不多这个样。
为什么 scasb指针会这么慢,竟然比不上普通的mov 和 jne

问题2:std::strlen()的代码也有看过,在不release时比release时快,这个之前讨论过了。下面是根据类似原理写的strlen(),想不到在我的编译器上还是被优化成了这个鸟样:


size_t strlen(char* pch){

char * pcrr = pch - 1;
int x;
do{
pcrr++;
if (((int)pcrr) & 3 == 0){
do{
pcrr += 4;
x = *(int*)pcrr;
}while(!((x - 0x01010101) & ~x & 0x80808080));
pcrr -= 4;
}
}while(*pcrr != '\0');
return pcrr - pch;
}


release后:

00121057 inc esi
00121058 cmp byte ptr [esi],0
0012105B jne main+57h (121057h)


问题2:怎么控制编译器的这种低效的优化?我记得以前它还会按照我写法四个字节比较的。以前优化后是这样的:

012610D0 mov eax,dword ptr [ebx]
012610D2 lea edx,[eax-1010101h]
012610D8 not eax
012610DA and edx,eax
012610DC add ebx,4
012610DF test edx,80808080h
012610E5 je main+90h (12610D0h)



主函数代码:

const int num = 1024 * 1024 * 32;
static char arr[num];
memset(arr,'1',num);
0118100C push 2000000h
01181011 push 31h
01181013 push offset arr (1183370h)
01181018 call memset (1181BFAh)
arr[num - 1] = 0;
clock_t beg = clock();
0118101D mov ebx,dword ptr [__imp__clock (11820D4h)]
01181023 add esp,0Ch
01181026 mov byte ptr ds:[318336Fh],0
0118102D call ebx
0118102F mov dword ptr [esp+14h],eax
int len1 = strlen(arr);
01181033 mov eax,offset arr (1183370h)
01181038 lea edx,[eax+1]
0118103B jmp main+40h (1181040h)
0118103D lea ecx,[ecx]
01181040 mov cl,byte ptr [eax]
01181042 inc eax
01181043 test cl,cl
01181045 jne main+40h (1181040h)
01181047 sub eax,edx
01181049 mov dword ptr [esp+10h],eax
clock_t mid = clock();
0118104D call ebx
0118104F mov esi,eax
int len2 = my::strlen(arr);
01181051 mov edi,offset arr (1183370h)
01181056 xor ecx,ecx
01181058 cld
01181059 dec ecx
0118105A xor eax,eax
0118105C repne scas byte ptr es:[edi]
0118105E mov dword ptr [esp+0Ch],edi
clock_t end = clock();
01181062 call ebx

...全文
163 8 打赏 收藏 转发到动态 举报
AI 作业
写回复
用AI写文章
8 条回复
切换为时间正序
请发表友善的回复…
发表回复
iamnobody 2011-10-13
  • 打赏
  • 举报
回复
[Quote=引用 6 楼 keiy 的回复:]
问题1:可查intel每条指令的clocks,自己算一下.不过我暂时只找到scasb为12个时钟周期,还要考虑不同CPU的相同指令时钟周期不同,加上流水线优化等,但我感觉这个是可能的.
问题2:编译器都有控制优化指令-O,你试一下你不同-O参数的结果,当然也可能达不到你的要求,那就是编译器的问题了.所以编译器所带的库文件,很多也是ASM写的而非C/C++,以前研究过borland的字串strcp……
[/Quote]

rep scasb 9 + 4*cx
7 if cx=0
查到了,是pentium的。没什么不妥。。。
同样的次数竟然比普通循环慢那么多。。。
lengxujun 2011-10-13
  • 打赏
  • 举报
回复
对于编译器来说,在做优化的时候,对于达到同样功能的代码段,如果它越难“理解”你的代码逻辑,所能做出的优化效果越差。也就是说,你用编写的表面上更高效的代码,可能比不上表面更糟糕但更容易被编译器“理解”的代码。这也就是你在debug模式下的代码(表面上更高效但不那么被编译器“理解”,代码未经过优化),而release模式下优化时缺翻译成了更低效的代码(因为编译器没“理解”的真正意图)。
这些涉及到编译器的优化理论,lz可以专门研究一下(管窥优化等等之类的)。

一家之言,仅供参考。
柯本 2011-10-13
  • 打赏
  • 举报
回复
问题1:可查intel每条指令的clocks,自己算一下.不过我暂时只找到scasb为12个时钟周期,还要考虑不同CPU的相同指令时钟周期不同,加上流水线优化等,但我感觉这个是可能的.
问题2:编译器都有控制优化指令-O,你试一下你不同-O参数的结果,当然也可能达不到你的要求,那就是编译器的问题了.所以编译器所带的库文件,很多也是ASM写的而非C/C++,以前研究过borland的字串strcpy就是用asm写的,它是用每次拷贝一字长(4字节)以提高速度

;[]-----------------------------------------------------------------[]
;| QSTRCPY.ASM -- copy string src to string dest (quick version) |
;[]-----------------------------------------------------------------[]

DWALIGN equ 1 ; set to 1 to enable dword-aligning of string

;
; C/C++ Run Time Library - Version 11.0
;
; Copyright (c) 1991, 2002 by Borland Software Corporation
; All Rights Reserved.
;

; $Revision: 9.4.2.1 $

include RULES.ASI

; Segments Definitions

Header@

;-----------------------------------------------------------------------
;
;Name __strcpy__ - copy string src to string dest
;
;Usage char *__strcpy__ (char *dest, const char *src);
;
;Prototype in string.h
;
;Description Copy the ASCIIZ string *src to the buffer *dest. It is the
; callers responsibility to ensure that the dest buffer is
; large enough to contain the string, and to guard against
; supplying NULL arguments.
;
; NOTE: this is the "quick" version of strcpy; it cheats
; by fetching 32-bit words, which can GP fault if the
; src string is near the end of a memory region and DWALIGN
; is not enabled above.
;
;Return value __strcpy__ returns dest.
;
;-----------------------------------------------------------------------

Code_seg@

Func@ __strcpy__, public, _RTLENTRYF, <pointer dest>, <pointer src>

Link@
mov ecx, src ; get source string
mov edx, dest ; get destination buffer
if DWALIGN
mov eax, ecx
and eax, 3
jmp jmptab[eax*4]

jmptab dd offset FLAT:fetch
dd offset FLAT:fetch3
dd offset FLAT:fetch2
dd offset FLAT:fetch1

; Copy three bytes

fetch3:
mov al, [ecx]
or al, al
je return0
mov [edx], al
add ecx, 1
add edx, 1

; Copy two bytes

fetch2:
mov al, [ecx]
or al, al
je return0
mov [edx], al
add ecx, 1
add edx, 1

; Copy one byte

fetch1:
mov al, [ecx]
or al, al
je return0
mov [edx], al
add ecx, 1
add edx, 1

; jmp fetch

endif ; DWALIGN

fetch:
mov eax, [ecx] ; get four bytes from source
or al, al ; check byte 0 for null
jz return0
or ah, ah ; check byte 1 for null
jz return1
test eax, 000ff0000h ; check byte 2 for null
jz return2
test eax, 0ff000000h ; check byte 3 for null
jz return3
mov [edx], eax
add ecx, 4
add edx, 4
jmp fetch
return3:
mov [edx], eax ; copy bytes 0-3
mov eax, dest ; return dest
Unlink@
Return@
return2:
mov [edx], ax ; copy bytes 0-2
mov byte ptr [edx+2], 0
mov eax, dest ; return dest
Unlink@
Return@
return1:
mov [edx], ax ; copy bytes 0-1
mov eax, dest ; return dest
Unlink@
Return@
return0:
mov [edx], al ; copy byte 0
mov eax, dest ; return dest
Unlink@
Return@

EndFunc@ __strcpy__

Code_EndS@

end

qq120848369 2011-10-13
  • 打赏
  • 举报
回复
表示没学过汇编。
zuo187qiang 2011-10-13
  • 打赏
  • 举报
回复
帮顶...菜鸟看不懂....
PG 2011-10-13
  • 打赏
  • 举报
回复
LZ数据帝啊,膜拜~~帮顶~~~
周晓荣 2011-10-13
  • 打赏
  • 举报
回复
mark~~~

帮顶,等会看。
iamnobody 2011-10-13
  • 打赏
  • 举报
回复
问题2是我2了,忘了加括号:(((int)pcrr) & 3 == 0) =》 ((((int)pcrr) & 3) == 0){

,请高手回答问题1:::
The C programming Language 第二版英文版 內容列表 Table of Contents Preface.......................................................... Preface to the first edition..................................... Introduction..................................................... Chapter 1 - A Tutorial Introduction.............................. 1.1 Getting Started................................ 1.2 Variables and Arithmetic Expressions........... 1.3 The for statement.............................. 1.4 Symbolic Constants............................. 1.5 Character Input and Output..................... 1.5.1 File Copying.......................... 1.5.2 Character Counting.................... 1.5.3 Line Counting......................... 1.5.4 Word Counting......................... 1.6 Arrays......................................... 1.7 Functions...................................... 1.8 Arguments - Call by Value...................... 1.9 Character Arrays............................... 1.10 External Variables and Scope.................. Chapter 2 - Types, Operators and Expressions..................... 2.1 Variable Names................................. 2.2 Data Types and Sizes........................... 2.3 Constants...................................... 2.4 Declarations................................... 2.5 Arithmetic Operators........................... 2.6 Relational and Logical Operators............... 2.7 Type Conversions............................... 2.8 Increment and Decrement Operators.............. 2.9 Bitwise Operators.............................. 2.10 Assignment Operators and Expressions.......... 2.11 Conditional Expressions....................... 2.12 Precedence and Order of Evaluation............ Chapter 3 - Control Flow......................................... 3.1 Statements and Blocks.......................... 3.2 If-Else........................................ 3.3 Else-If........................................ 3.4 Switch......................................... 3.5 Loops - While and For.......................... 3.6 Loops - Do-While............................... 3.7 Break and Continue............................. 3.8 Goto and labels................................ Chapter 4 - Functions and Program Structure...................... 4.1 Basics of Functions............................ 4.2 Functions Returning Non-integers............... 4.3 External Variables............................. 4.4 Scope Rules.................................... 4.5 Header Files................................... 4.6 Static Variables................................ 4.7 Register Variables.............................. 4.8 Block Structure................................. 4.9 Initialization.................................. 4.10 Recursion...................................... 4.11 The C Preprocessor............................. 4.11.1 File Inclusion........................ 4.11.2 Macro Substitution.................... 4.11.3 Conditional Inclusion................. Chapter 5 - Pointers and Arrays.................................. 5.1 Pointers and Addresses......................... 5.2 Pointers and Function Arguments................ 5.3 Pointers and Arrays............................ 5.4 Address Arithmetic............................. 5.5 Character Pointers and Functions............... 5.6 Pointer Arrays; Pointers to Pointers........... 5.7 Multi-dimensional Arrays....................... 5.8 Initialization of Pointer Arrays............... 5.9 Pointers vs. Multi-dimensional Arrays.......... 5.10 Command-line Arguments........................ 5.11 Pointers to Functions......................... 5.12 Complicated Declarations...................... Chapter 6 - Structures........................................... 6.1 Basics of Structures........................... 6.2 Structures and Functions....................... 6.3 Arrays of Structures........................... 6.4 Pointers to Structures......................... 6.5 Self-referential Structures.................... 6.6 Table Lookup................................... 6.7 Typedef........................................ 6.8 Unions......................................... 6.9 Bit-fields..................................... Chapter 7 - Input and Output..................................... 7.1 Standard Input and Output....................... 7.2 Formatted Output - printf....................... 7.3 Variable-length Argument Lists.................. 7.4 Formatted Input - Scanf......................... 7.5 File Access..................................... 7.6 Error Handling - Stderr and Exit................ 7.7 Line Input and Output........................... 7.8 Miscellaneous Functions......................... 7.8.1 String Operations...................... 7.8.2 Character Class Testing and Conversion. 7.8.3 Ungetc................................. 7.8.4 Command Execution...................... 7.8.5 Storage Management..................... 7.8.6 Mathematical Functions................. 7.8.7 Random Number generation............... Chapter 8 - The UNIX System Interface............................ 8.1 File Descriptors............................... 8.2 Low Level I/O - Read and Write................. 8.3 Open, Creat, Close, Unlink..................... 8.4 Random Access - Lseek.......................... 8.5 Example - An implementation of Fopen and Getc.. 8.6 Example - Listing Directories.................. 8.7 Example - A Storage Allocator.................. Appendix A - Reference Manual.................................... A.1 Introduction................................... A.2 Lexical Conventions............................ A.2.1 Tokens................................ A.2.2 Comments.............................. A.2.3 Identifiers........................... A.2.4 Keywords.............................. A.2.5 Constants............................. A.2.6 String Literals....................... A.3 Syntax Notation................................ A.4 Meaning of Identifiers......................... A.4.1 Storage Class......................... A.4.2 Basic Types........................... A.4.3 Derived types......................... A.4.4 Type Qualifiers....................... A.5 Objects and Lvalues............................ A.6 Conversions.................................... A.6.1 Integral Promotion.................... A.6.2 Integral Conversions.................. A.6.3 Integer and Floating.................. A.6.4 Floating Types........................ A.6.5 Arithmetic Conversions................ A.6.6 Pointers and Integers................. A.6.7 Void.................................. A.6.8 Pointers to Void...................... A.7 Expressions.................................... A.7.1 Pointer Conversion.................... A.7.2 Primary Expressions................... A.7.3 Postfix Expressions................... A.7.4 Unary Operators....................... A.7.5 Casts................................. A.7.6 Multiplicative Operators.............. A.7.7 Additive Operators.................... A.7.8 Shift Operators....................... A.7.9 Relational Operators.................. A.7.10 Equality Operators................... A.7.11 Bitwise AND Operator................. A.7.12 Bitwise Exclusive OR Operator........ A.7.13 Bitwise Inclusive OR Operator........ A.7.14 Logical AND Operator................. A.7.15 Logical OR Operator.................. A.7.16 Conditional Operator................. A.7.17 Assignment Expressions............... A.7.18 Comma Operator.......................... A.7.19 Constant Expressions.................... A.8 Declarations..................................... A.8.1 Storage Class Specifiers................. A.8.2 Type Specifiers.......................... A.8.3 Structure and Union Declarations......... A.8.4 Enumerations............................. A.8.5 Declarators.............................. A.8.6 Meaning of Declarators................... A.8.7 Initialization........................... A.8.8 Type names............................... A.8.9 Typedef.................................. A.8.10 Type Equivalence........................ A.9 Statements....................................... A.9.1 Labeled Statements....................... A.9.2 Expression Statement..................... A.9.3 Compound Statement....................... A.9.4 Selection Statements..................... A.9.5 Iteration Statements..................... A.9.6 Jump statements.......................... A.10 External Declarations........................... A.10.1 Function Definitions.................... A.10.2 External Declarations................... A.11 Scope and Linkage............................... A.11.1 Lexical Scope........................... A.11.2 Linkage................................. A.12 Preprocessing................................... A.12.1 Trigraph Sequences...................... A.12.2 Line Splicing........................... A.12.3 Macro Definition and Expansion.......... A.12.4 File Inclusion.......................... A.12.5 Conditional Compilation................. A.12.6 Line Control............................ A.12.7 Error Generation........................ A.12.8 Pragmas................................. A.12.9 Null directive.......................... A.12.10 Predefined names....................... A.13 Grammar......................................... Appendix B - Standard Library.................................... B.1.1 File Operations................................ B.1.2 Formatted Output......................... B.1.3 Formatted Input.......................... B.1.4 Character Input and Output Functions..... B.1.5 Direct Input and Output Functions........ B.1.6 File Positioning Functions............... B.1.7 Error Functions.......................... B.2 Character Class Tests: ................. B.3 String Functions: ..................... B.4 Mathematical Functions: ................. B.5 Utility Functions: ....................

65,199

社区成员

发帖
与我相关
我的任务
社区描述
C++ 语言相关问题讨论,技术干货分享,前沿动态等
c++ 技术论坛(原bbs)
社区管理员
  • C++ 语言社区
  • encoderlee
  • paschen
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
  1. 请不要发布与C++技术无关的贴子
  2. 请不要发布与技术无关的招聘、广告的帖子
  3. 请尽可能的描述清楚你的问题,如果涉及到代码请尽可能的格式化一下

试试用AI创作助手写篇文章吧