上面 C#的 unsafe的代码的速度和 Buffer.BlockCopy比,基本一样速度,甚至没有Buffer.BlockCopy快!!!!! 下面代码好象还如 Buffer.BlockCopy快,一个个指针移动的话,更不如Buffer.BlockCopy快了,何况还是4个4个移动!
public static void BlockCopy(ref byte[] src, int srcIndex, ref byte[] dst, int dstIndex, int count)
{
if (src == null || srcIndex < 0 ||
dst == null || dstIndex < 0 || count < 0)
{
throw new System.ArgumentException();
}
int srcLen = src.Length;
int dstLen = dst.Length;
if (srcLen - srcIndex < count || dstLen - dstIndex < count)
{
throw new System.ArgumentException();
}
// The following fixed statement pins the location of the src and dst objects
// in memory so that they will not be moved by garbage collection.
fixed (byte* pSrc = src, pDst = dst)
{
byte* ps = pSrc;
byte* pd = pDst;
// Loop over the count in blocks of 4 bytes, copying an integer (4 bytes) at a time:
for (int i = 0; i < count / 4; i++)
{
*((int*)pd) = *((int*)ps);
pd += 4;
ps += 4;
}
// Complete the copy by moving any bytes that weren't moved in blocks of 4:
for (int i = 0; i < count % 4; i++)
{
*pd = *ps;
pd++;
ps++;
}
}
}
下面是Buffer.BlockCopy调用的实际C++代码,没有直接API,分32位和64位两种方式调用。
#if defined(_X86_)
//This is a replacement for the memmove intrinsic.
//It performs better than the CRT one and the inline version.
// On WIN64 the CRT implementation of memmove is actually faster than the CLR implementation of m_memmove().
void m_memmove(BYTE* dmem, BYTE* smem, int size)
{
CONTRACTL
{
NOTHROW;
GC_NOTRIGGER;
MODE_COOPERATIVE;
PRECONDITION(CheckPointer(dmem));
PRECONDITION(CheckPointer(smem));
PRECONDITION(size >= 0);
SO_TOLERANT;
}
CONTRACTL_END;
#if defined(_WIN64) || defined(ALIGN_ACCESS)
// Bail out and use the slow version if the destination and the source don't have the same alignment.
if ( ( ((SIZE_T)dmem) & (sizeof(SIZE_T) - 1) ) !=
( ((SIZE_T)smem) & (sizeof(SIZE_T) - 1) ) )
{
memmove(dmem, smem, size);
}
else
#endif // _WIN64 || ALIGN_ACCESS
if (dmem <= smem)
{
// make sure the destination is pointer-aligned
while (( ((SIZE_T)dmem) & (sizeof(SIZE_T) - 1) ) != 0 && size >= (int)(sizeof(SIZE_T) - 1))
{
*dmem++ = *smem++;
size -= 1;
}
// copy 16 bytes at a time
if (size >= 16)
{
size -= 16;
do
{
((DWORD *)dmem)[0] = ((DWORD *)smem)[0];
((DWORD *)dmem)[1] = ((DWORD *)smem)[1];
((DWORD *)dmem)[2] = ((DWORD *)smem)[2];
((DWORD *)dmem)[3] = ((DWORD *)smem)[3];
dmem += 16;
smem += 16;
}
while ((size -= 16) >= 0);
}
#if defined(_WIN64) || defined(ALIGN_ACCESS)
if (!IS_ALIGNED((SIZE_T)dmem, sizeof(SIZE_T)))
{
while (size > 0)
{
*dmem++ = *smem++;
size -= 1;
}
}
else
#endif // _WIN64 || ALIGN_ACCESS
{
// still 8 bytes or more left to copy?
if (size & 8)
{
((DWORD *)dmem)[0] = ((DWORD *)smem)[0];
((DWORD *)dmem)[1] = ((DWORD *)smem)[1];
dmem += 8;
smem += 8;
}
// still 4 bytes or more left to copy?
if (size & 4)
{
((DWORD *)dmem)[0] = ((DWORD *)smem)[0];
dmem += 4;
smem += 4;
}
// still 2 bytes or more left to copy?
if (size & 2)
{
((WORD *)dmem)[0] = ((WORD *)smem)[0];
dmem += 2;
smem += 2;
}
// still 1 byte left to copy?
if (size & 1)
{
dmem[0] = smem[0];
dmem += 1;
smem += 1;
}
}
}
else
{
smem += size;
dmem += size;
// make sure the destination is pointer-aligned
while (( ((SIZE_T)dmem) & (sizeof(SIZE_T) - 1) ) != 0 && size >= (int)(sizeof(SIZE_T) - 1))
{
*--dmem = *--smem;
size -= 1;
}
// copy 16 bytes at a time
if (size >= 16)
{
size -= 16;
do
{
dmem -= 16;
smem -= 16;
((DWORD *)dmem)[3] = ((DWORD *)smem)[3];
((DWORD *)dmem)[2] = ((DWORD *)smem)[2];
((DWORD *)dmem)[1] = ((DWORD *)smem)[1];
((DWORD *)dmem)[0] = ((DWORD *)smem)[0];
}
while ((size -= 16) >= 0);
}
#if defined(_WIN64) || defined(ALIGN_ACCESS)
if (!IS_ALIGNED((SIZE_T)dmem, sizeof(SIZE_T)))
{
while (size > 0)
{
*--dmem = *--smem;
size -= 1;
}
}
else
#endif // _WIN64 || ALIGN_ACCESS
{
// still 8 bytes or more left to copy?
if (size & 8)
{
dmem -= 8;
smem -= 8;
((DWORD *)dmem)[1] = ((DWORD *)smem)[1];
((DWORD *)dmem)[0] = ((DWORD *)smem)[0];
}
// still 4 bytes or more left to copy?
if (size & 4)
{
dmem -= 4;
smem -= 4;
((DWORD *)dmem)[0] = ((DWORD *)smem)[0];
}
// still 2 bytes or more left to copy?
if (size & 2)
{
dmem -= 2;
smem -= 2;
((WORD *)dmem)[0] = ((WORD *)smem)[0];
}
// still 1 byte left to copy?
if (size & 1)
{
dmem -= 1;
smem -= 1;
dmem[0] = smem[0];
}
}
}
}
#else
#define m_memmove(a, b, c) memmove((a), (b), (c))
#endif // _X86_
CopyMemory
The CopyMemory function copies a block of memory from one location to another.
VOID CopyMemory(
PVOID Destination, // pointer to address of copy destination
CONST VOID *Source, // pointer to address of block to copy
DWORD Length // size, in bytes, of block to copy
);
Parameters
Destination
Pointer to the starting address of the copied block's destination.
Source
Pointer to the starting address of the block of memory to copy.
Length
Specifies the size, in bytes, of the block of memory to copy.
Return Values
This function has no return value.
Remarks
If the source and destination blocks overlap, the results are undefined. For overlapped blocks, use the MoveMemory function.
QuickInfo
Windows NT: Requires version 3.1 or later.
Windows: Requires Windows 95 or later.
Windows CE: Unsupported.
Header: Declared in winbase.h.
See Also
Memory Management Overview, Memory Management Functions, CopyMemoryVlm, FillMemory, FillMemoryVlm, MoveMemory, MoveMemoryVlm, ZeroMemory