cudaMemcpy2D与cudaMemcpy的复制时间问题

zpc_wy 2009-12-25 01:35:11
1、[/size] float *ac, *bc, *cc;
clock_t start, endTime;
size_t pitch_a, pitch_b, pitch_c;
int newn = ((n + BLOCK_SIZE - 1) / BLOCK_SIZE) * BLOCK_SIZE;

start = clock();

cudaMallocPitch((void**) &ac, &pitch_a, sizeof(float) * newn, newn);
cudaMallocPitch((void**) &bc, &pitch_b, sizeof(float) * newn, newn);
cudaMallocPitch((void**) &cc, &pitch_c, sizeof(float) * newn, newn);

cudaMemset(ac, 0, pitch_a * newn);
cudaMemset(bc, 0, pitch_b * newn);

cudaMemcpy2D(ac, pitch_a, a, sizeof(float) * lda, sizeof(float) * n, n, cudaMemcpyHostToDevice);
cudaMemcpy2D(bc, pitch_b, b, sizeof(float) * ldb, sizeof(float) * n, n, cudaMemcpyHostToDevice);


2、// allocate host memory for matrices A and B
unsigned int size_A = WA * HA;
unsigned int mem_size_A = sizeof(float) * size_A;

float* h_A = (float*) malloc(mem_size_A);
unsigned int size_B = WB * HB;
unsigned int mem_size_B = sizeof(float) * size_B;
float* h_B = (float*) malloc(mem_size_B);

// initialize host memory
randomInit(h_A, size_A);
randomInit(h_B, size_B);

// allocate device memory
float* d_A;
cutilSafeCall(cudaMalloc((void**) &d_A, mem_size_A));
float* d_B;
cutilSafeCall(cudaMalloc((void**) &d_B, mem_size_B));

// copy host memory to device
cutilSafeCall(cudaMemcpy(d_A, h_A, mem_size_A,
cudaMemcpyHostToDevice) );
cutilSafeCall(cudaMemcpy(d_B, h_B, mem_size_B,
cudaMemcpyHostToDevice) );



cudaMemcpy2D花的时间比cudaMemcpy多了将近一倍。。。不懂这是为什么?然后我把第一个程序中的cudaMemcpy2D换成cudaMemcpy后花的时间还是不变??
...全文
840 6 打赏 收藏 转发到动态 举报
写回复
用AI写文章
6 条回复
切换为时间正序
请发表友善的回复…
发表回复
zpc_wy 2009-12-26
  • 打赏
  • 举报
回复
看了看以前的帖子,确实是测时的问题,加上cudaThreadSynchronize()之后测得copy时间才是对的吧?!
zpc_wy 2009-12-26
  • 打赏
  • 举报
回复
1、//创建时间
unsigned int time = 0;
cutCreateTimer(&time);
cutStartTimer(time);
cudaMemcpy2D(c,n*sizeof(float),dc,pitch_c,n*sizeof (float),n,cudaMemcpyDeviceToHost);
//stop the time
cutStopTimer(time);
printf("Processing time: %f (ms) \n", cutGetTimerValue(time));
cutDeleteTimer(time);
2、 //创建时间
unsigned int time = 0;
cutCreateTimer(&time);
cutStartTimer(time);
cudaMemcpy2D(c, sizeof(float) * n, cc, pitch_c, sizeof(float) * n, n, cudaMemcpyDeviceToHost);
//stop the time
cutStopTimer(time);
printf("Processing time: %f (ms) \n", cutGetTimerValue(time));
cutDeleteTimer(time);


但是还是奇怪在两个不同程序中的cudaMemcpy2D(n、pitch是一样的,类型是float),时间相差很大
zpc_wy 2009-12-26
  • 打赏
  • 举报
回复
后来我发现问题所在了,当时我确实只对这两个函数测时的,但是由于我没有rebuild,所以还是原来的结果,误导我了,真正第一个代码多花的时间是在Kahan求和公式上
  • 打赏
  • 举报
回复
[Quote=引用 5 楼 zpc_wy 的回复:]
看了看以前的帖子,确实是测时的问题,加上cudaThreadSynchronize()之后测得copy时间才是对的吧?!
[/Quote]

开始测时1
cudamemcopy.
结束测时1(1:cpu->gpu时间)

开始测时2
启动kernel.
cudaThreadSynchronize.
结束测时2(2:kernel执行时间)

开始测时3
cudamemcopy.
结束测时3(3:gpu->cpu时间)
  • 打赏
  • 举报
回复
你的测时对嘛?
zpc_wy 2009-12-25
  • 打赏
  • 举报
回复
补充一下,lda、ldb、n、WA 等的大小是一样的,即阶数相同
16 1.4 EventManagement RT ............................................................................................................................... 17 1.4.1 cudaEventCreate ............................................................................................................................ 18 1.4.2 cudaEventRecord ........................................................................................................................... 19 1.4.3 cudaEventQuery ............................................................................................................................. 20 1.4.4 cudaEventSynchronize ................................................................................................................... 21 1.4.5 cudaEventDestroy .......................................................................................................................... 22 1.4.6 cudaEventElapsedTime .................................................................................................................. 23 1.5 MemoryManagement RT ........................................................................................................................... 24 1.5.1 cudaMalloc .................................................................................................................................... 25 1.5.2 cudaMallocPitch ............................................................................................................................ 26 1.5.3 cudaFree ........................................................................................................................................ 27 1.5.4 cudaMallocArray ........................................................................................................................... 28 1.5.5 cudaFreeArray ............................................................................................................................... 29 1.5.6 cudaMallocHost ............................................................................................................................. 30 1.5.7 cudaFreeHost ................................................................................................................................. 31 1.5.8 cudaMemset ................................................................................................................................... 32 1.5.9 cudaMemset2D .............................................................................................................................. 33 ii 1.5.10 cudaMemcpy .............................................................................................................................. 34 1.5.11 cudaMemcpy2D ......................................................................................................................... 35 1.5.12 cudaMemcpyToArray ................................................................................................................. 36 1.5.13 cudaMemcpy2DToArray ............................................................................................................ 37 1.5.14 cudaMemcpyFromArray ............................................................................................................. 38 1.5.15 cudaMemcpy2DFromArray ........................................................................................................ 39 1.5.16 cudaMemcpyArrayToArray ........................................................................................................ 40 1.5.17 cudaMemcpy2DArrayToArray .................................................................................................... 41 1.5.18 cudaMemcpyToSymbol .............................................................................................................. 42 1.5.19 cudaMemcpyFromSymbol .......................................................................................................... 43 1.5.20 cudaGetSymbolAddress .............................................................................................................. 44 1.5.21 cudaGetSymbolSize .................................................................................................................... 45 1.5.22 cudaMalloc3D ............................................................................................................................ 46 1.5.23 cudaMalloc3DArray ................................................................................................................... 48 1.5.24 cudaMemset3D ........................................................................................................................... 50 1.5.25 cudaMemcpy3D ......................................................................................................................... 52 1.6 TextureReferenceManagement RT ........................................................................................................... 54 1.6.1 LowLevelApi ............................................................................................................................... 55 1.6.2 HighLevelApi ............................................................................................................................... 63 1.7 ExecutionControl RT ............................................................................................................................... 68 1.7.1 cudaConfigureCall ........................................................................................................................ 69 1.7.2 cudaLaunch .................................................................................................................................. 70 1.7.3 cudaSetupArgument ..................................................................................................................... 71 1.8 OpenGlInteroperability RT ...................................................................................................................... 72 1.8.1 cudaGLSetGLDevice .................................................................................................................... 73 1.8.2 cudaGLRegisterBufferObject ........................................................................................................ 74 1.8.3 cudaGLMapBufferObject ............................................................................................................. 75 1.8.4 cudaGLUnmapBufferObject ......................................................................................................... 76 1.8.5 cudaGLUnregisterBufferObject .................................................................................................... 77 1.9 Direct3dInteroperability RT ..................................................................................................................... 78 1.9.1 cudaD3D9SetDirect3DDevice ....................................................................................................... 79 1.9.2 cudaD3D9GetDirect3DDevice ...................................................................................................... 80 1.9.3 cudaD3D9RegisterResource ......................................................................................................... 81 1.9.4 cudaD3D9UnregisterResource ...................................................................................................... 83 v 1.9.5 cudaD3D9MapResources ...............................................................................................................84 1.9.6 cudaD3D9UnmapResources ...........................................................................................................85 1.9.7 cudaD3D9ResourceSetMapFlags ....................................................................................................86 1.9.8 cudaD3D9ResourceGetSurfaceDimensions .....................................................................................88 1.9.9 cudaD3D9ResourceGetMappedPointer ...........................................................................................89 1.9.10 cudaD3D9ResourceGetMappedSize .............................................................................................90 1.9.11 cudaD3D9ResourceGetMappedPitch ............................................................................................91 1.9.12 cudaD3D9Begin ...........................................................................................................................92 1.9.13 cudaD3D9End ..............................................................................................................................93 1.9.14 cudaD3D9RegisterVertexBuffer ...................................................................................................94 1.9.15 cudaD3D9MapVertexBuffer .........................................................................................................95 1.9.16 cudaD3D9UnmapVertexBuffer ....................................................................................................96 1.9.17 cudaD3D9UnregisterVertexBuffer ................................................................................................97 1.9.18 cudaD3D9GetDevice ....................................................................................................................98 1.10 ErrorHandling RT ...................................................................................................................................99 1.10.1 cudaGetLastError ....................................................................................................................... 100 1.10.2 cudaGetErrorString .................................................................................................................... 102 2 DriverApiReference 103 2.1 Initialization ............................................................................................................................................ 104 2.1.1 cuInit ........................................................................................................................................... 105 2.2 DeviceManagement ................................................................................................................................. 106 2.2.1 cuDeviceComputeCapability ........................................................................................................ 107 2.2.2 cuDeviceGet ................................................................................................................................ 108 2.2.3 cuDeviceGetAttribute ................................................................................................................... 109 2.2.4 cuDeviceGetCount ....................................................................................................................... 111 2.2.5 cuDeviceGetName ....................................................................................................................... 112 2.2.6 cuDeviceGetProperties ................................................................................................................. 113 2.2.7 cuDeviceTotalMem ...................................................................................................................... 115 2.3 ContextManagement ............................................................................................................................... 116 2.3.1 cuCtxAttach ................................................................................................................................. 117 2.3.2 cuCtxCreate ................................................................................................................................. 118 2.3.3 cuCtxDetach ................................................................................................................................ 120 2.3.4 cuCtxGetDevice ........................................................................................................................... 121 v 2.3.5 cuCtxPopCurrent ......................................................................................................................... 122 2.3.6 cuCtxPushCurrent ........................................................................................................................ 123 2.3.7 cuCtxSynchronize ........................................................................................................................ 124 2.4 ModuleManagement ............................................................................................................................... 125 2.4.1 cuModuleGetFunction ................................................................................................................. 126 2.4.2 cuModuleGetGlobal .................................................................................................................... 127 2.4.3 cuModuleGetTexRef ................................................................................................................... 128 2.4.4 cuModuleLoad ............................................................................................................................ 129 2.4.5 cuModuleLoadData ..................................................................................................................... 130 2.4.6 cuModuleLoadFatBinary ............................................................................................................. 131 2.4.7 cuModuleUnload ......................................................................................................................... 132 2.5 StreamManagement ................................................................................................................................ 133 2.5.1 cuStreamCreate ........................................................................................................................... 134 2.5.2 cuStreamDestroy ......................................................................................................................... 135 2.5.3 cuStreamQuery ............................................................................................................................ 136 2.5.4 cuStreamDestroy ......................................................................................................................... 137 2.6 EventManagement .................................................................................................................................. 138 2.6.1 cuEventCreate ............................................................................................................................. 139 2.6.2 cuEventDestroy ........................................................................................................................... 140 2.6.3 cuEventElapsedTime ................................................................................................................... 141 2.6.4 cuEventQuery .............................................................................................................................. 142 2.6.5 cuEventRecord ............................................................................................................................ 143 2.6.6 cuEventSynchronize .................................................................................................................... 144 2.7 ExecutionControl .................................................................................................................................... 145 2.7.1 cuLaunch ..................................................................................................................................... 146 2.7.2 cuLaunchGrid .............................................................................................................................. 147 2.7.3 cuParamSetSize ........................................................................................................................... 148 2.7.4 cuParamSetTexRef ...................................................................................................................... 149 2.7.5 cuParamSetf ................................................................................................................................ 150 2.7.6 cuParamSeti ................................................................................................................................ 151 2.7.7 cuParamSetv ................................................................................................................................ 152 2.7.8 cuFuncSetBlockShape ................................................................................................................. 153 2.7.9 cuFuncSetSharedSize ................................................................................................................... 154 2.8 MemoryManagement .............................................................................................................................. 155 v 2.8.1 cuArrayCreate ............................................................................................................................. 156 2.8.2 cuArrayDestroy ........................................................................................................................... 158 2.8.3 cuArrayGetDescriptor .................................................................................................................. 159 2.8.4 cuMemAlloc ................................................................................................................................ 160 2.8.5 cuMemAllocHost ........................................................................................................................ 161 2.8.6 cuMemAllocPitch ........................................................................................................................ 162 2.8.7 cuMemFree ................................................................................................................................. 164 2.8.8 cuMemFreeHost .......................................................................................................................... 165 2.8.9 cuMemGetAddressRange ............................................................................................................ 166 2.8.10 cuMemGetInfo .......................................................................................................................... 167 2.8.11 cuMemcpy2D ............................................................................................................................ 168 2.8.12 cuMemcpy3D ............................................................................................................................ 171 2.8.13 cuMemcpyAtoA ........................................................................................................................ 174 2.8.14 cuMemcpyAtoD ........................................................................................................................ 175 2.8.15 cuMemcpyAtoH ........................................................................................................................ 176 2.8.16 cuMemcpyDtoA ........................................................................................................................ 177 2.8.17 cuMemcpyDtoD ........................................................................................................................ 178 2.8.18 cuMemcpyDtoH ........................................................................................................................ 179 2.8.19 cuMemcpyHtoA ........................................................................................................................ 180 2.8.20 cuMemcpyHtoD ........................................................................................................................ 181 2.8.21 cuMemset .................................................................................................................................. 182 2.8.22 cuMemset2D ............................................................................................................................. 183 2.9 TextureReferenceManagement ................................................................................................................ 184 2.9.1 cuTexRefCreate ........................................................................................................................... 185 2.9.2 cuTexRefDestroy ......................................................................................................................... 186 2.9.3 cuTexRefGetAddress ................................................................................................................... 187 2.9.4 cuTexRefGetAddressMode .......................................................................................................... 188 2.9.5 cuTexRefGetArray ...................................................................................................................... 189 2.9.6 cuTexRefGetFilterMode .............................................................................................................. 190 2.9.7 cuTexRefGetFlags ....................................................................................................................... 191 2.9.8 cuTexRefGetFormat .................................................................................................................... 192 2.9.9 cuTexRefSetAddress ................................................................................................................... 193 2.9.10 cuTexRefSetAddressMode ......................................................................................................... 194 2.9.11 cuTexRefSetArray ..................................................................................................................... 195 vii 2.9.12 cuTexRefSetFilterMode ............................................................................................................. 196 2.9.13 cuTexRefSetFlags ...................................................................................................................... 197 2.9.14 cuTexRefSetFormat .................................................................................................................... 198 2.10 OpenGlInteroperability .......................................................................................................................... 199 2.10.1 cuGLCtxCreate .......................................................................................................................... 200 2.10.2 cuGLInit .................................................................................................................................... 201 2.10.3 cuGLMapBufferObject ............................................................................................................... 202 2.10.4 cuGLRegisterBufferObject ......................................................................................................... 203 2.10.5 cuGLUnmapBufferObject .......................................................................................................... 204 2.10.6 cuGLUnregisterBufferObject ...................................................................................................... 205 2.11 Direct3dInteroperability ........................................................................................................................ 206 2.11.1 cuD3D9CtxCreate ...................................................................................................................... 207 2.11.2 cuD3D9GetDirect3DDevice ....................................................................................................... 208 2.11.3 cuD3D9RegisterResource ........................................................................................................... 209 2.11.4 cuD3D9UnregisterResource ....................................................................................................... 211 2.11.5 cuD3D9MapResources ............................................................................................................... 212 2.11.6 cuD3D9UnmapResources ........................................................................................................... 213 2.11.7 cuD3D9ResourceSetMapFlags ................................................................................................... 214 2.11.8 cuD3D9ResourceGetSurfaceDimensions .................................................................................... 215 2.11.9 cuD3D9ResourceGetMappedPointer .......................................................................................... 216 2.11.10 cuD3D9ResourceGetMappedSize ............................................................................................. 217 2.11.11 cuD3D9ResourceGetMappedPitch ............................................................................................ 218 2.11.12cuD3D9Begin ........................................................................................................................... 219 2.11.13cuD3D9End .............................................................................................................................. 220 2.11.14cuD3D9GetDevice .................................................................................................................... 221 2.11.15cuD3D9MapVertexBuffer ......................................................................................................... 222 2.11.16 cuD3D9RegisterVertexBuffer ................................................................................................... 223 2.11.17 cuD3D9UnmapVertexBuffer .................................................................................................... 224 2.11.18 cuD3D9UnregisterVertexBuffer ............................................................................................... 225 3 AtomicFunctions 226 3.1 ArithmeticFunctions ................................................................................................................................ 227 3.1.1 atomicAdd ................................................................................................................................... 228 3.1.2 atomicSub .................................................................................................................................... 229 vii 3.1.3 atomicExch ................................................................................................................................. 230 3.1.4 atomicMin ................................................................................................................................... 231 3.1.5 atomicMax .................................................................................................................................. 232 3.1.6 atomicInc .................................................................................................................................... 233 3.1.7 atomicDec ................................................................................................................................... 234 3.1.8 atomicCAS .................................................................................................................................. 235 3.2 BitwiseFunctions .................................................................................................................................... 236 3.2.1 atomicAnd ................................................................................................................................... 237 3.2.2 atomicOr ..................................................................................................................................... 238 3.2.3 atomicXor ...............................................................................................................................

231

社区成员

发帖
与我相关
我的任务
社区描述
CUDA on Windows XP
社区管理员
  • CUDA on Windows XP社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧