589
社区成员




#include<stdio.h>
#include<stdlib.h>
#include<memory.h>
#include<cuda.h>
#pragma comment(lib,"cuda.lib")
int main(int argc,char** argv)
{
CUdevice dev;
CUcontext ctx;
cuInit(0);
cuDeviceGet(&dev,0);
cuCtxCreate(&ctx,CU_CTX_SCHED_AUTO,dev);
CUdeviceptr dpDst,dpSrc;
cuMemAlloc(&dpDst, 50* 50*sizeof(int));
cuMemAlloc(&dpSrc,100*100*sizeof(int));
int* hPtr=new int[10000];
for(int y=0;y<100;++y){
for(int x=0;x<100;++x){
int idx=100*y+x;
hPtr[idx]=idx;
}
}
cuMemcpyHtoD(dpSrc,hPtr,10000*sizeof(int));
delete [] hPtr;
CUDA_MEMCPY2D planeMem;
memset(&planeMem,0,sizeof(planeMem));
planeMem.srcMemoryType=CU_MEMORYTYPE_DEVICE;
planeMem.srcDevice =dpSrc;
planeMem.srcXInBytes =25*sizeof(int);
planeMem.srcY =25;
planeMem.srcPitch =100*sizeof(int);
planeMem.dstMemoryType=CU_MEMORYTYPE_DEVICE;
planeMem.dstDevice =dpDst;
planeMem.dstXInBytes =0;
planeMem.dstY =0;
planeMem.dstPitch =50*sizeof(int);
planeMem.WidthInBytes =planeMem.dstPitch;
planeMem.Height =50;
cuMemcpy2DUnaligned(&planeMem);
FILE* fp=fopen("output.txt","wb");
hPtr=new int[2500];
cuMemcpyDtoH(hPtr,dpDst,2500*sizeof(int));
for(int y=0;y<50;++y){
for(int x=0;x<50;++x){
fprintf(fp,"%d ",hPtr[50*y+x]);
}
fprintf(fp,"\r\n");
}
delete [] hPtr;
fclose(fp);
cuMemFree(dpDst);
cuMemFree(dpSrc);
return 0;
}
planeMem.srcMemoryType =CU_MEMORYTYPE_DEVICE;
planeMem.srcDevice =pSrc;
planeMem.srcXInBytes =0; //src dmem start in the first row
planeMem.srcPitch =200*sizeof(float); //src span between two rows
planeMem.srcY =0; //the start col of copy form
planeMem.dstMemoryType =CU_MEMORYTYPE_DEVICE;
planeMem.dstDevice =pDst;
planeMem.dstXInBytes =0; //dst start byte in the first row of dmem
planeMem.dstY =0; //the start col of copy to
planeMem.dstPitch =100*sizeof(float); //span len when copyed form src
planeMem.WidthInBytes =100*sizeof(float); //dst width
planeMem.Height=80; //dst height
cuMemcpy2D(&planeMem);