看LIBC源码中关于strncpy实现的问题

lxmuyu 2013-08-21 09:08:18
/* Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library.

The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */

#include <string.h>
#include <memcopy.h>

#undef strncpy

#ifndef STRNCPY
#define STRNCPY strncpy
#endif

char *
STRNCPY (char *s1, const char *s2, size_t n)
{
char c;
char *s = s1;

--s1;

if (n >= 4)
{
size_t n4 = n >> 2;

for (;;)
{
c = *s2++;
*++s1 = c;
if (c == '\0')
break;
c = *s2++;
*++s1 = c;
if (c == '\0')
break;
c = *s2++;
*++s1 = c;
if (c == '\0')
break;
c = *s2++;
*++s1 = c;
if (c == '\0')
break;
if (--n4 == 0)
goto last_chars;
}
n = n - (s1 - s) - 1;
if (n == 0)
return s;
goto zero_fill;
}

last_chars:
n &= 3;
if (n == 0)
return s;

do
{
c = *s2++;
*++s1 = c;
if (--n == 0)
return s;
}
while (c != '\0');

zero_fill:
do
*++s1 = '\0';
while (--n > 0);

return s;
}
libc_hidden_builtin_def (strncpy)

网上查了下相关的资料,有一个回答是这样
这不明摆着吗,现在的机器字长多半是32位,4字节,n>>2就是把char划成32个位一起处理,到了汇编层次,这个代码看起来就像这样:
loop:
inc %esi
inc %edi
movb src(, %esi), %al
movb %al, dst(, %edi)
inc %esi
inc %edi
movb src(, %esi), %al
movb %al, dst(, %edi)
inc %esi
inc %edi
movb src(, %esi), %al
movb %al, dst(, %edi)
inc %esi
inc %edi
movb src(, %esi), %al
movb %al, dst(, %edi)
jmp loop
然后编译器只要稍微聪明一些,就能够将它们拼凑到一起:
loop:
inc %esi
inc %edi
movl src(, %esi, 4), %eax
movl %eax, dst(, %edi, 4)
jmp loop


问题:
1 这样实现是怎么做到优化的?
  2 最后的编译器是否像回答中的那样 ?
...全文
314 10 打赏 收藏 转发到动态 举报
写回复
用AI写文章
10 条回复
切换为时间正序
请发表友善的回复…
发表回复
lxmuyu 2013-08-22
  • 打赏
  • 举报
回复
引用 6 楼 zhao4zhong1 的回复:
;接上帖
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------

;
; Copy down to avoid propogation in overlapping buffers.
;
        align   @WordSize
CopyDown:
        lea     esi,[esi+ecx-4] ;U - point to 4 bytes before src buffer end
        lea     edi,[edi+ecx-4] ;V - point to 4 bytes before dest buffer end
;
; See if the destination start is dword aligned
;

        test    edi,11b         ;U - test if dword aligned
        jnz     short CopyLeadDown ;V - if not, jump

        shr     ecx,2           ;U - shift down to dword count
        and     edx,11b         ;V - trailing byte count

        cmp     ecx,8           ;U - test if small enough for unwind copy
        jb      short CopyUnwindDown ;V - if so, then jump

        std                     ;N - set direction flag
        rep     movsd           ;N - move all of our dwords
        cld                     ;N - clear direction flag back

        jmp     dword ptr TrailDownVec[edx*4] ;N - process trailing bytes

        align   @WordSize
CopyUnwindDown:
        neg     ecx             ;U - negate dword count for table merging
                                ;V - spare

        jmp     dword ptr UnwindDownVec[ecx*4+28] ;N - unwind copy

        align   @WordSize
CopyLeadDown:

        mov     eax,edi         ;U - get destination offset
        mov     edx,11b         ;V - prepare for mask

        cmp     ecx,4           ;U - check for really short string
        jb      short ByteCopyDown ;V - branch to just copy bytes

        and     eax,11b         ;U - get offset within first dword
        sub     ecx,eax         ;U - to update size after lead copied

        jmp     dword ptr LeadDownVec[eax*4-4] ;N - process leading bytes

        align   @WordSize
ByteCopyDown:
        jmp     dword ptr TrailDownVec[ecx*4] ;N - process just bytes

        align   @WordSize
LeadDownVec     dd      LeadDown1, LeadDown2, LeadDown3

        align   @WordSize
LeadDown1:
        mov     al,[esi+3]      ;U - load first byte
        and     edx,ecx         ;V - trailing byte count

        mov     [edi+3],al      ;U - write out first byte
        sub     esi,1           ;V - point to last src dword

        shr     ecx,2           ;U - shift down to dword count
        sub     edi,1           ;V - point to last dest dword

        cmp     ecx,8           ;U - test if small enough for unwind copy
        jb      short CopyUnwindDown ;V - if so, then jump

        std                     ;N - set direction flag
        rep     movsd           ;N - move all of our dwords
        cld                     ;N - clear direction flag

        jmp     dword ptr TrailDownVec[edx*4] ;N - process trailing bytes

        align   @WordSize
LeadDown2:
        mov     al,[esi+3]      ;U - load first byte
        and     edx,ecx         ;V - trailing byte count

        mov     [edi+3],al      ;U - write out first byte
        mov     al,[esi+2]      ;V - get second byte from source

        shr     ecx,2           ;U - shift down to dword count
        mov     [edi+2],al      ;V - write second byte to destination

        sub     esi,2           ;U - point to last src dword
        sub     edi,2           ;V - point to last dest dword

        cmp     ecx,8           ;U - test if small enough for unwind copy
        jb      short CopyUnwindDown ;V - if so, then jump

        std                     ;N - set direction flag
        rep     movsd           ;N - move all of our dwords
        cld                     ;N - clear direction flag

        jmp     dword ptr TrailDownVec[edx*4] ;N - process trailing bytes

        align   @WordSize
LeadDown3:
        mov     al,[esi+3]      ;U - load first byte
        and     edx,ecx         ;V - trailing byte count

        mov     [edi+3],al      ;U - write out first byte
        mov     al,[esi+2]      ;V - get second byte from source

        mov     [edi+2],al      ;U - write second byte to destination
        mov     al,[esi+1]      ;V - get third byte from source

        shr     ecx,2           ;U - shift down to dword count
        mov     [edi+1],al      ;V - write third byte to destination

        sub     esi,3           ;U - point to last src dword
        sub     edi,3           ;V - point to last dest dword

        cmp     ecx,8           ;U - test if small enough for unwind copy
        jb      CopyUnwindDown  ;V - if so, then jump

        std                     ;N - set direction flag
        rep     movsd           ;N - move all of our dwords
        cld                     ;N - clear direction flag

        jmp     dword ptr TrailDownVec[edx*4] ;N - process trailing bytes

;------------------------------------------------------------------

        align   @WordSize
UnwindDownVec   dd      UnwindDown7, UnwindDown6, UnwindDown5, UnwindDown4
                dd      UnwindDown3, UnwindDown2, UnwindDown1, UnwindDown0

UnwindDown7:
        mov     eax,[esi+ecx*4+28] ;U - get dword from source
                                   ;V - spare
        mov     [edi+ecx*4+28],eax ;U - put dword into destination
UnwindDown6:
        mov     eax,[esi+ecx*4+24] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4+24],eax ;U - put dword into destination
UnwindDown5:
        mov     eax,[esi+ecx*4+20] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4+20],eax ;U - put dword into destination
UnwindDown4:
        mov     eax,[esi+ecx*4+16] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4+16],eax ;U - put dword into destination
UnwindDown3:
        mov     eax,[esi+ecx*4+12] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4+12],eax ;U - put dword into destination
UnwindDown2:
        mov     eax,[esi+ecx*4+8] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4+8],eax ;U - put dword into destination
UnwindDown1:
        mov     eax,[esi+ecx*4+4] ;U(entry)/V(not) - get dword from source
                                  ;V(entry) - spare
        mov     [edi+ecx*4+4],eax ;U - put dword into destination

        lea     eax,[ecx*4]     ;V - compute update for pointer

        add     esi,eax         ;U - update source pointer
        add     edi,eax         ;V - update destination pointer
UnwindDown0:
        jmp     dword ptr TrailDownVec[edx*4] ;N - process trailing bytes

;-----------------------------------------------------------------------------

        align   @WordSize
TrailDownVec    dd      TrailDown0, TrailDown1, TrailDown2, TrailDown3

        align   @WordSize
TrailDown0:
        mov     eax,[dst]       ;U - return pointer to destination
                                ;V - spare
        pop     esi             ;U - restore esi
        pop     edi             ;V - restore edi
        M_EXIT

        align   @WordSize
TrailDown1:
        mov     al,[esi+3]      ;U - get byte from source
                                ;V - spare
        mov     [edi+3],al      ;U - put byte in destination
        mov     eax,[dst]       ;V - return pointer to destination
        pop     esi             ;U - restore esi
        pop     edi             ;V - restore edi
        M_EXIT

        align   @WordSize
TrailDown2:
        mov     al,[esi+3]      ;U - get first byte from source
                                ;V - spare
        mov     [edi+3],al      ;U - put first byte into destination
        mov     al,[esi+2]      ;V - get second byte from source
        mov     [edi+2],al      ;U - put second byte into destination
        mov     eax,[dst]       ;V - return pointer to destination
        pop     esi             ;U - restore esi
        pop     edi             ;V - restore edi
        M_EXIT

        align   @WordSize
TrailDown3:
        mov     al,[esi+3]      ;U - get first byte from source
                                ;V - spare
        mov     [edi+3],al      ;U - put first byte into destination
        mov     al,[esi+2]      ;V - get second byte from source
        mov     [edi+2],al      ;U - put second byte into destination
        mov     al,[esi+1]      ;V - get third byte from source
        mov     [edi+1],al      ;U - put third byte into destination
        mov     eax,[dst]       ;V - return pointer to destination
        pop     esi             ;U - restore esi
        pop     edi             ;V - restore edi
        M_EXIT

_MEM_   endp
        end


以上三帖内容来自C:\Program Files\Microsoft Visual Studio 10.0\VC\crt\src\intel\memcpy.asm
汇编不会,看代码看不懂。。。 继续等回答。。。
赵4老师 2013-08-22
  • 打赏
  • 举报
回复
;接上帖
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------

;
; Copy down to avoid propogation in overlapping buffers.
;
        align   @WordSize
CopyDown:
        lea     esi,[esi+ecx-4] ;U - point to 4 bytes before src buffer end
        lea     edi,[edi+ecx-4] ;V - point to 4 bytes before dest buffer end
;
; See if the destination start is dword aligned
;

        test    edi,11b         ;U - test if dword aligned
        jnz     short CopyLeadDown ;V - if not, jump

        shr     ecx,2           ;U - shift down to dword count
        and     edx,11b         ;V - trailing byte count

        cmp     ecx,8           ;U - test if small enough for unwind copy
        jb      short CopyUnwindDown ;V - if so, then jump

        std                     ;N - set direction flag
        rep     movsd           ;N - move all of our dwords
        cld                     ;N - clear direction flag back

        jmp     dword ptr TrailDownVec[edx*4] ;N - process trailing bytes

        align   @WordSize
CopyUnwindDown:
        neg     ecx             ;U - negate dword count for table merging
                                ;V - spare

        jmp     dword ptr UnwindDownVec[ecx*4+28] ;N - unwind copy

        align   @WordSize
CopyLeadDown:

        mov     eax,edi         ;U - get destination offset
        mov     edx,11b         ;V - prepare for mask

        cmp     ecx,4           ;U - check for really short string
        jb      short ByteCopyDown ;V - branch to just copy bytes

        and     eax,11b         ;U - get offset within first dword
        sub     ecx,eax         ;U - to update size after lead copied

        jmp     dword ptr LeadDownVec[eax*4-4] ;N - process leading bytes

        align   @WordSize
ByteCopyDown:
        jmp     dword ptr TrailDownVec[ecx*4] ;N - process just bytes

        align   @WordSize
LeadDownVec     dd      LeadDown1, LeadDown2, LeadDown3

        align   @WordSize
LeadDown1:
        mov     al,[esi+3]      ;U - load first byte
        and     edx,ecx         ;V - trailing byte count

        mov     [edi+3],al      ;U - write out first byte
        sub     esi,1           ;V - point to last src dword

        shr     ecx,2           ;U - shift down to dword count
        sub     edi,1           ;V - point to last dest dword

        cmp     ecx,8           ;U - test if small enough for unwind copy
        jb      short CopyUnwindDown ;V - if so, then jump

        std                     ;N - set direction flag
        rep     movsd           ;N - move all of our dwords
        cld                     ;N - clear direction flag

        jmp     dword ptr TrailDownVec[edx*4] ;N - process trailing bytes

        align   @WordSize
LeadDown2:
        mov     al,[esi+3]      ;U - load first byte
        and     edx,ecx         ;V - trailing byte count

        mov     [edi+3],al      ;U - write out first byte
        mov     al,[esi+2]      ;V - get second byte from source

        shr     ecx,2           ;U - shift down to dword count
        mov     [edi+2],al      ;V - write second byte to destination

        sub     esi,2           ;U - point to last src dword
        sub     edi,2           ;V - point to last dest dword

        cmp     ecx,8           ;U - test if small enough for unwind copy
        jb      short CopyUnwindDown ;V - if so, then jump

        std                     ;N - set direction flag
        rep     movsd           ;N - move all of our dwords
        cld                     ;N - clear direction flag

        jmp     dword ptr TrailDownVec[edx*4] ;N - process trailing bytes

        align   @WordSize
LeadDown3:
        mov     al,[esi+3]      ;U - load first byte
        and     edx,ecx         ;V - trailing byte count

        mov     [edi+3],al      ;U - write out first byte
        mov     al,[esi+2]      ;V - get second byte from source

        mov     [edi+2],al      ;U - write second byte to destination
        mov     al,[esi+1]      ;V - get third byte from source

        shr     ecx,2           ;U - shift down to dword count
        mov     [edi+1],al      ;V - write third byte to destination

        sub     esi,3           ;U - point to last src dword
        sub     edi,3           ;V - point to last dest dword

        cmp     ecx,8           ;U - test if small enough for unwind copy
        jb      CopyUnwindDown  ;V - if so, then jump

        std                     ;N - set direction flag
        rep     movsd           ;N - move all of our dwords
        cld                     ;N - clear direction flag

        jmp     dword ptr TrailDownVec[edx*4] ;N - process trailing bytes

;------------------------------------------------------------------

        align   @WordSize
UnwindDownVec   dd      UnwindDown7, UnwindDown6, UnwindDown5, UnwindDown4
                dd      UnwindDown3, UnwindDown2, UnwindDown1, UnwindDown0

UnwindDown7:
        mov     eax,[esi+ecx*4+28] ;U - get dword from source
                                   ;V - spare
        mov     [edi+ecx*4+28],eax ;U - put dword into destination
UnwindDown6:
        mov     eax,[esi+ecx*4+24] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4+24],eax ;U - put dword into destination
UnwindDown5:
        mov     eax,[esi+ecx*4+20] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4+20],eax ;U - put dword into destination
UnwindDown4:
        mov     eax,[esi+ecx*4+16] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4+16],eax ;U - put dword into destination
UnwindDown3:
        mov     eax,[esi+ecx*4+12] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4+12],eax ;U - put dword into destination
UnwindDown2:
        mov     eax,[esi+ecx*4+8] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4+8],eax ;U - put dword into destination
UnwindDown1:
        mov     eax,[esi+ecx*4+4] ;U(entry)/V(not) - get dword from source
                                  ;V(entry) - spare
        mov     [edi+ecx*4+4],eax ;U - put dword into destination

        lea     eax,[ecx*4]     ;V - compute update for pointer

        add     esi,eax         ;U - update source pointer
        add     edi,eax         ;V - update destination pointer
UnwindDown0:
        jmp     dword ptr TrailDownVec[edx*4] ;N - process trailing bytes

;-----------------------------------------------------------------------------

        align   @WordSize
TrailDownVec    dd      TrailDown0, TrailDown1, TrailDown2, TrailDown3

        align   @WordSize
TrailDown0:
        mov     eax,[dst]       ;U - return pointer to destination
                                ;V - spare
        pop     esi             ;U - restore esi
        pop     edi             ;V - restore edi
        M_EXIT

        align   @WordSize
TrailDown1:
        mov     al,[esi+3]      ;U - get byte from source
                                ;V - spare
        mov     [edi+3],al      ;U - put byte in destination
        mov     eax,[dst]       ;V - return pointer to destination
        pop     esi             ;U - restore esi
        pop     edi             ;V - restore edi
        M_EXIT

        align   @WordSize
TrailDown2:
        mov     al,[esi+3]      ;U - get first byte from source
                                ;V - spare
        mov     [edi+3],al      ;U - put first byte into destination
        mov     al,[esi+2]      ;V - get second byte from source
        mov     [edi+2],al      ;U - put second byte into destination
        mov     eax,[dst]       ;V - return pointer to destination
        pop     esi             ;U - restore esi
        pop     edi             ;V - restore edi
        M_EXIT

        align   @WordSize
TrailDown3:
        mov     al,[esi+3]      ;U - get first byte from source
                                ;V - spare
        mov     [edi+3],al      ;U - put first byte into destination
        mov     al,[esi+2]      ;V - get second byte from source
        mov     [edi+2],al      ;U - put second byte into destination
        mov     al,[esi+1]      ;V - get third byte from source
        mov     [edi+1],al      ;U - put third byte into destination
        mov     eax,[dst]       ;V - return pointer to destination
        pop     esi             ;U - restore esi
        pop     edi             ;V - restore edi
        M_EXIT

_MEM_   endp
        end


以上三帖内容来自C:\Program Files\Microsoft Visual Studio 10.0\VC\crt\src\intel\memcpy.asm
赵4老师 2013-08-22
  • 打赏
  • 举报
回复
;接上帖
;
; Code to do optimal memory copies for non-dword-aligned destinations.
;

; The following length check is done for two reasons:
;
;    1. to ensure that the actual move length is greater than any possiale
;       alignment move, and
;
;    2. to skip the multiple move logic for small moves where it would
;       be faster to move the bytes with one instruction.
;

        align   @WordSize
CopyLeadUp:

        mov     eax,edi         ;U - get destination offset
        mov     edx,11b         ;V - prepare for mask

        sub     ecx,4           ;U - check for really short string - sub for adjust
        jb      short ByteCopyUp ;V - branch to just copy bytes

        and     eax,11b         ;U - get offset within first dword
        add     ecx,eax         ;V - update size after leading bytes copied

        jmp     dword ptr LeadUpVec[eax*4-4] ;N - process leading bytes

        align   @WordSize
ByteCopyUp:
        jmp     dword ptr TrailUpVec[ecx*4+16] ;N - process just bytes

        align   @WordSize
CopyUnwindUp:
        jmp     dword ptr UnwindUpVec[ecx*4] ;N - unwind dword copy

        align   @WordSize
LeadUpVec       dd      LeadUp1, LeadUp2, LeadUp3

        align   @WordSize
LeadUp1:
        and     edx,ecx         ;U - trailing byte count
        mov     al,[esi]        ;V - get first byte from source

        mov     [edi],al        ;U - write second byte to destination
        mov     al,[esi+1]      ;V - get second byte from source

        mov     [edi+1],al      ;U - write second byte to destination
        mov     al,[esi+2]      ;V - get third byte from source

        shr     ecx,2           ;U - shift down to dword count
        mov     [edi+2],al      ;V - write third byte to destination

        add     esi,3           ;U - advance source pointer
        add     edi,3           ;V - advance destination pointer

        cmp     ecx,8           ;U - test if small enough for unwind copy
        jb      short CopyUnwindUp ;V - if so, then jump

        rep     movsd           ;N - move all of our dwords

        jmp     dword ptr TrailUpVec[edx*4] ;N - process trailing bytes

        align   @WordSize
LeadUp2:
        and     edx,ecx         ;U - trailing byte count
        mov     al,[esi]        ;V - get first byte from source

        mov     [edi],al        ;U - write second byte to destination
        mov     al,[esi+1]      ;V - get second byte from source

        shr     ecx,2           ;U - shift down to dword count
        mov     [edi+1],al      ;V - write second byte to destination

        add     esi,2           ;U - advance source pointer
        add     edi,2           ;V - advance destination pointer

        cmp     ecx,8           ;U - test if small enough for unwind copy
        jb      short CopyUnwindUp ;V - if so, then jump

        rep     movsd           ;N - move all of our dwords

        jmp     dword ptr TrailUpVec[edx*4] ;N - process trailing bytes

        align   @WordSize
LeadUp3:
        and     edx,ecx         ;U - trailing byte count
        mov     al,[esi]        ;V - get first byte from source

        mov     [edi],al        ;U - write second byte to destination
        add     esi,1           ;V - advance source pointer

        shr     ecx,2           ;U - shift down to dword count
        add     edi,1           ;V - advance destination pointer

        cmp     ecx,8           ;U - test if small enough for unwind copy
        jb      short CopyUnwindUp ;V - if so, then jump

        rep     movsd           ;N - move all of our dwords

        jmp     dword ptr TrailUpVec[edx*4] ;N - process trailing bytes

        align   @WordSize
UnwindUpVec     dd      UnwindUp0, UnwindUp1, UnwindUp2, UnwindUp3
                dd      UnwindUp4, UnwindUp5, UnwindUp6, UnwindUp7

UnwindUp7:
        mov     eax,[esi+ecx*4-28] ;U - get dword from source
                                   ;V - spare
        mov     [edi+ecx*4-28],eax ;U - put dword into destination
UnwindUp6:
        mov     eax,[esi+ecx*4-24] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4-24],eax ;U - put dword into destination
UnwindUp5:
        mov     eax,[esi+ecx*4-20] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4-20],eax ;U - put dword into destination
UnwindUp4:
        mov     eax,[esi+ecx*4-16] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4-16],eax ;U - put dword into destination
UnwindUp3:
        mov     eax,[esi+ecx*4-12] ;U(entry)/V(not) - get dword from source
                                   ;V(entry) - spare
        mov     [edi+ecx*4-12],eax ;U - put dword into destination
UnwindUp2:
        mov     eax,[esi+ecx*4-8] ;U(entry)/V(not) - get dword from source
                                  ;V(entry) - spare
        mov     [edi+ecx*4-8],eax ;U - put dword into destination
UnwindUp1:
        mov     eax,[esi+ecx*4-4] ;U(entry)/V(not) - get dword from source
                                  ;V(entry) - spare
        mov     [edi+ecx*4-4],eax ;U - put dword into destination

        lea     eax,[ecx*4]     ;V - compute update for pointer

        add     esi,eax         ;U - update source pointer
        add     edi,eax         ;V - update destination pointer
UnwindUp0:
        jmp     dword ptr TrailUpVec[edx*4] ;N - process trailing bytes

;-----------------------------------------------------------------------------

        align   @WordSize
TrailUpVec      dd      TrailUp0, TrailUp1, TrailUp2, TrailUp3

        align   @WordSize
TrailUp0:
        mov     eax,[dst]       ;U - return pointer to destination
        pop     esi             ;V - restore esi
        pop     edi             ;U - restore edi
                                ;V - spare
        M_EXIT

        align   @WordSize
TrailUp1:
        mov     al,[esi]        ;U - get byte from source
                                ;V - spare
        mov     [edi],al        ;U - put byte in destination
        mov     eax,[dst]       ;V - return pointer to destination
        pop     esi             ;U - restore esi
        pop     edi             ;V - restore edi
        M_EXIT

        align   @WordSize
TrailUp2:
        mov     al,[esi]        ;U - get first byte from source
                                ;V - spare
        mov     [edi],al        ;U - put first byte into destination
        mov     al,[esi+1]      ;V - get second byte from source
        mov     [edi+1],al      ;U - put second byte into destination
        mov     eax,[dst]       ;V - return pointer to destination
        pop     esi             ;U - restore esi
        pop     edi             ;V - restore edi
        M_EXIT

        align   @WordSize
TrailUp3:
        mov     al,[esi]        ;U - get first byte from source
                                ;V - spare
        mov     [edi],al        ;U - put first byte into destination
        mov     al,[esi+1]      ;V - get second byte from source
        mov     [edi+1],al      ;U - put second byte into destination
        mov     al,[esi+2]      ;V - get third byte from source
        mov     [edi+2],al      ;U - put third byte into destination
        mov     eax,[dst]       ;V - return pointer to destination
        pop     esi             ;U - restore esi
        pop     edi             ;V - restore edi
        M_EXIT

;未完待续
赵4老师 2013-08-22
  • 打赏
  • 举报
回复
       page    ,132
        title   memcpy - Copy source memory bytes to destination
;***
;memcpy.asm - contains memcpy and memmove routines
;
;       Copyright (c) Microsoft Corporation. All rights reserved.
;
;Purpose:
;       memcpy() copies a source memory buffer to a destination buffer.
;       Overlapping buffers are not treated specially, so propogation may occur.
;       memmove() copies a source memory buffer to a destination buffer.
;       Overlapping buffers are treated specially, to avoid propogation.
;
;*******************************************************************************

        .xlist
        include cruntime.inc
        .list

M_EXIT  macro
        ret                     ; _cdecl return
        endm    ; M_EXIT

        CODESEG
    extrn   _VEC_memcpy:near
    extrn   __sse2_available:dword

page
;***
;memcpy - Copy source buffer to destination buffer
;
;Purpose:
;       memcpy() copies a source memory buffer to a destination memory buffer.
;       This routine does NOT recognize overlapping buffers, and thus can lead
;       to propogation.
;       For cases where propogation must be avoided, memmove() must be used.
;
;       Algorithm:
;
;           Same as memmove. See Below
;
;
;memmove - Copy source buffer to destination buffer
;
;Purpose:
;       memmove() copies a source memory buffer to a destination memory buffer.
;       This routine recognize overlapping buffers to avoid propogation.
;       For cases where propogation is not a problem, memcpy() can be used.
;
;   Algorithm:
;
;       void * memmove(void * dst, void * src, size_t count)
;       {
;               void * ret = dst;
;
;               if (dst <= src || dst >= (src + count)) {
;                       /*
;                        * Non-Overlapping Buffers
;                        * copy from lower addresses to higher addresses
;                        */
;                       while (count--)
;                               *dst++ = *src++;
;                       }
;               else {
;                       /*
;                        * Overlapping Buffers
;                        * copy from higher addresses to lower addresses
;                        */
;                       dst += count - 1;
;                       src += count - 1;
;
;                       while (count--)
;                               *dst-- = *src--;
;                       }
;
;               return(ret);
;       }
;
;
;Entry:
;       void *dst = pointer to destination buffer
;       const void *src = pointer to source buffer
;       size_t count = number of bytes to copy
;
;Exit:
;       Returns a pointer to the destination buffer in AX/DX:AX
;
;Uses:
;       CX, DX
;
;Exceptions:
;*******************************************************************************

ifdef MEM_MOVE
        _MEM_     equ <memmove>
else  ; MEM_MOVE
        _MEM_     equ <memcpy>
endif  ; MEM_MOVE

%       public  _MEM_
_MEM_   proc \
        dst:ptr byte, \
        src:ptr byte, \
        count:IWORD

              ; destination pointer
              ; source pointer
              ; number of bytes to copy

;       push    ebp             ;U - save old frame pointer
;       mov     ebp, esp        ;V - set new frame pointer

        push    edi             ;U - save edi
        push    esi             ;V - save esi

        mov     esi,[src]       ;U - esi = source
        mov     ecx,[count]     ;V - ecx = number of bytes to move

        mov     edi,[dst]       ;U - edi = dest

;
; Check for overlapping buffers:
;       If (dst <= src) Or (dst >= src + Count) Then
;               Do normal (Upwards) Copy
;       Else
;               Do Downwards Copy to avoid propagation
;

        mov     eax,ecx         ;V - eax = byte count...

        mov     edx,ecx         ;U - edx = byte count...
        add     eax,esi         ;V - eax = point past source end

        cmp     edi,esi         ;U - dst <= src ?
        jbe     short CopyUp    ;V - yes, copy toward higher addresses

        cmp     edi,eax         ;U - dst < (src + count) ?
        jb      CopyDown        ;V - yes, copy toward lower addresses

;
; Copy toward higher addresses.
;
CopyUp:
;
; First, see if we can use a "fast" copy SSE2 routine
        ; block size greater than min threshold?
        cmp     ecx,080h
        jb      Dword_align
        ; SSE2 supported?
        cmp     DWORD PTR __sse2_available,0
        je      Dword_align
        ; alignments equal?
        push    edi
        push    esi
        and     edi,15
        and     esi,15
        cmp     edi,esi
        pop     esi
        pop     edi
        jne     Dword_align

        ; do fast SSE2 copy, params already set
        jmp     _VEC_memcpy
        ; no return
;
; The algorithm for forward moves is to align the destination to a dword
; boundary and so we can move dwords with an aligned destination.  This
; occurs in 3 steps.
;
;   - move x = ((4 - Dest & 3) & 3) bytes
;   - move y = ((L-x) >> 2) dwords
;   - move (L - x - y*4) bytes
;

Dword_align:
        test    edi,11b         ;U - destination dword aligned?
        jnz     short CopyLeadUp ;V - if we are not dword aligned already, align

        shr     ecx,2           ;U - shift down to dword count
        and     edx,11b         ;V - trailing byte count

        cmp     ecx,8           ;U - test if small enough for unwind copy
        jb      short CopyUnwindUp ;V - if so, then jump

        rep     movsd           ;N - move all of our dwords

        jmp     dword ptr TrailUpVec[edx*4] ;N - process trailing bytes

;未完待续
lxmuyu 2013-08-22
  • 打赏
  • 举报
回复
其实在问这个问题的时候就有些担心,因为这个已经不是C语言的范畴了,再往下分析就是汇编的东西,学汇编的事开过好几次头,最终都没有坚持下来,原因大家都懂的。。。 这个帖子就像是茴字的四种写法,是作学问的人都考虑的,我想多了,先把C好好学好才是,至于以后会不再回来整,那就是以后的事了。。。 谢谢大家的回帖,算是指明了方向,也断了我念想了~结帖
zhctj159 2013-08-22
  • 打赏
  • 举报
回复
引用 8 楼 zhao4zhong1 的回复:
汇编不会就打算这辈子都不会下去还是现在就去学?
赵4老师 2013-08-22
  • 打赏
  • 举报
回复
汇编不会就打算这辈子都不会下去还是现在就去学? 计算机组成原理→DOS命令→汇编语言→C语言(不包括C++)、代码书写规范→数据结构、编译原理、操作系统→计算机网络、数据库原理、正则表达式→其它语言(包括C++)、架构…… 先 http://www.microsoft.com/visualstudio/chs/downloads#d-2010-express 点开Visual C++ 2010 Express下面的语言选‘简体中文’,再点立即安装 再参考C:\Program Files\Microsoft Visual Studio 10.0\VC\crt\src\intel\strncpy.asm

        page    ,132
        title   strncpy - copy at most n characters of string
;***
;strncpy.asm - copy at most n characters of string
;
;       Copyright (c) Microsoft Corporation. All rights reserved.
;
;Purpose:
;       defines strncpy() - copy at most n characters of string
;
;*******************************************************************************

        .xlist
        include cruntime.inc
        .list

page
;***
;char *strncpy(dest, source, count) - copy at most n characters
;
;Purpose:
;       Copies count characters from the source string to the
;       destination.  If count is less than the length of source,
;       NO NULL CHARACTER is put onto the end of the copied string.
;       If count is greater than the length of sources, dest is padded
;       with null characters to length count.
;
;       Algorithm:
;       char *
;       strncpy (dest, source, count)
;       char *dest, *source;
;       unsigned count;
;       {
;         char *start = dest;
;
;         while (count && (*dest++ = *source++))
;             count--;
;         if (count)
;             while (--count)
;                 *dest++ = '\0';
;         return(start);
;       }
;
;Entry:
;       char *dest     - pointer to spot to copy source, enough space
;                        is assumed.
;       char *source   - source string for copy
;       unsigned count - characters to copy
;
;Exit:
;       returns dest, with the character copied there.
;
;Uses:
;
;Exceptions:
;
;*******************************************************************************

        CODESEG

        public  strncpy
strncpy proc \
        dest:ptr byte, \
        source:ptr byte, \
        count:dword

        OPTION PROLOGUE:NONE, EPILOGUE:NONE

        .FPO    ( 0, 3, 0, 0, 0, 0 )

        mov     ecx,[esp + 0ch]     ; ecx = count
        push    edi                 ; preserve edi
        test    ecx,ecx
        jz      finish              ; leave if count is zero

        push    esi                 ; preserve edi
        push    ebx                 ; preserve ebx
        mov     ebx,ecx             ; store count for tail loop
        mov     esi,[esp + 14h]     ; esi -> source string
        test    esi,3               ; test if source string is aligned on 32 bits
        mov     edi,[esp + 10h]     ; edi -> dest string
        jnz     short src_misaligned    ; (almost always source is aligned)

        shr     ecx,2               ; convert ecx to dword count
        jnz     main_loop_entrance
        jmp     short copy_tail_loop    ; 0 < count < 4

; simple byte loop until string is aligned

src_misaligned:
        mov     al,byte ptr [esi]   ; copy a byte from source to dest
        add     esi,1
        mov     [edi],al
        add     edi,1
        sub     ecx,1
        jz      fill_tail_end1      ; if count == 0, leave
        test    al,al               ; was last copied byte zero?
        jz      short align_dest    ; if so, go align dest and pad it out
                                    ; with zeros
        test    esi,3               ; esi already aligned ?
        jne     short src_misaligned
        mov     ebx,ecx             ; store count for tail loop
        shr     ecx,2
        jnz     short main_loop_entrance

tail_loop_start:
        and     ebx,3               ; ebx = count_before_main_loop%4
        jz      short fill_tail_end1    ; if ebx == 0 then leave without
                                        ; appending a null byte

; while ( EOS (end-of-string) not found and count > 0 ) copy bytes

copy_tail_loop:
        mov     al,byte ptr [esi]   ; load byte from source
        add     esi,1
        mov     [edi],al            ; store byte to dest
        add     edi,1
        test    al,al               ; EOS found?
        je      short fill_tail_zero_bytes  ; '\0' was already copied
        sub     ebx,1
        jnz     copy_tail_loop
fill_tail_end1:
        mov     eax,[esp + 10h]     ; prepare return value
        pop     ebx
        pop     esi
        pop     edi
        ret

; EOS found. Pad with null characters to length count

align_dest:
        test    edi,3               ; dest string aligned?
        jz      dest_align_loop_end
dest_align_loop:
        mov     [edi],al
        add     edi,1
        sub     ecx,1               ; count == 0?
        jz      fill_tail_end       ; if so, finished
        test    edi,3               ; is edi aligned ?
        jnz     dest_align_loop
dest_align_loop_end:
        mov     ebx,ecx             ; ebx > 0
        shr     ecx,2               ; convert ecx to count of dwords
        jnz     fill_dwords_with_EOS
        ; pad tail bytes
finish_loop:                        ; 0 < ebx < 4
        mov     [edi],al
        add     edi,1
fill_tail_zero_bytes:
        sub     ebx,1
        jnz     finish_loop
        pop     ebx
        pop     esi
finish:
        mov     eax,[esp + 8]       ; return in eax pointer to dest string
        pop     edi
        ret

; copy (source) string to (dest). Also look for end of (source) string

main_loop:                          ; edx contains first dword of source string
        mov     [edi],edx           ; store one more dword
        add     edi,4               ; kick dest pointer
        sub     ecx,1
        jz      tail_loop_start

main_loop_entrance:
        mov     edx,7efefeffh
        mov     eax,dword ptr [esi] ; read 4 bytes (dword)
        add     edx,eax
        xor     eax,-1
        xor     eax,edx
        mov     edx,[esi]           ; it's in cache now
        add     esi,4               ; kick dest pointer
        test    eax,81010100h
        je      short main_loop

        ; may have found zero byte in the dword

        test    dl,dl               ; is it byte 0
        je      short byte_0
        test    dh,dh               ; is it byte 1
        je      short byte_1
        test    edx,00ff0000h       ; is it byte 2
        je      short byte_2
        test    edx,0ff000000h      ; is it byte 3
        jne     short main_loop     ; taken if bits 24-30 are clear and bit
                                    ; 31 is set

; a null character was found, so dest needs to be padded out with null chars
; to count length.

        mov     [edi],edx
        jmp     short fill_with_EOS_dwords

byte_2:
        and     edx,0ffffh          ; fill high 2 bytes with 0
        mov     [edi],edx
        jmp     short fill_with_EOS_dwords

byte_1:
        and     edx,0ffh            ; fill high 3 bytes with 0
        mov     [edi],edx
        jmp     short fill_with_EOS_dwords

byte_0:
        xor     edx,edx             ; fill whole dword with 0
        mov     [edi],edx

; End of string was found. Pad out dest string with dwords of 0

fill_with_EOS_dwords:               ; ecx > 0   (ecx is dword counter)
        add     edi,4
        xor     eax,eax             ; it is instead of ???????????????????
        sub     ecx,1
        jz      fill_tail           ; we filled all dwords

fill_dwords_with_EOS:
        xor     eax,eax
fill_with_EOS_loop:
        mov     [edi],eax
        add     edi,4
        sub     ecx,1
        jnz     short fill_with_EOS_loop
fill_tail:                          ; let's pad tail bytes with zero
        and     ebx,3               ; ebx = ebx % 4
        jnz     finish_loop         ; taken, when there are some tail bytes
fill_tail_end:
        mov     eax,[esp + 10h]
        pop     ebx
        pop     esi
        pop     edi
        ret

strncpy endp
        end


AnYidan 2013-08-21
  • 打赏
  • 举报
回复
这么多branch, 严重影响pipeline
lxmuyu 2013-08-21
  • 打赏
  • 举报
回复
引用 1 楼 max_min_ 的回复:
你在哪里看的?效率太低了点吧! 看这个strncpy源码
写的这版是glibc-2.17,cygwin写的代码简洁 就像我问的,glibc里这样写是不是为了编译器优化? while中采用4个字节是为了32位机器处理的更快么?
max_min_ 2013-08-21
  • 打赏
  • 举报
回复
你在哪里看的?效率太低了点吧! 看这个strncpy源码

69,373

社区成员

发帖
与我相关
我的任务
社区描述
C语言相关问题讨论
社区管理员
  • C语言
  • 花神庙码农
  • 架构师李肯
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧