CUDA读取纹理内存出错

mbb_333 2013-10-18 11:51:45

如题，这个问题很让人困惑，我使用CUDA数组和cudaMallocPitch分配的一维数组都不好用，都出现乱码
我的kernel函数为：





texture<float, cudaTextureType2D, cudaReadModeElementType> texSrc;

__inline__ __global__ void convolutionRows_kernel( float *d_Dst, int imageH, int imageW)

{ 

        int ix = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;

        int iy = __mul24(blockIdx.y, blockDim.y) + threadIdx.y; 

        while(ix < imageW && iy < imageH) 

       {

                 float fx = (float) ix + 0.5f; 

                 float fy = (float) iy + 0.5f; 

                 d_Dst[ix + __mul24(iy, imageW)] = tex2D(texSrc, fx, fy);

                 ix += blockDim.x; 

                 iy += blockDim.y; 

         } 

}

#1-- CUDA数组的使用是：
#1.1--下面的代码是绑定CUDA数组到texture及函数调用



static void convolutionRowsGPU(float* d_DstData, cudaArray* d_srcDataArray, cudaChannelFormatDesc floatTex, unsigned int imageHeight, unsigned int imageWidth)

{

          dim3 myBlock(16, 12);

          dim3 myGrid((imageWidth+(blockDim.x -1))/blockDim.x, (imageHeight +(blockDim.y -1))/ blockDim.y ); 

          //#1-- bind array to texture 

          CUDA_SAFE_CALL( cudaBindTextureToArray(texSrc, d_srcDataArray, floatTex)); 

          //#2 -- call kernel function for row convolution 

          convolutionRows_kernel <<< myGrid ,  myBlock >>>(d_DstData, imageHeight, imageWidth);

         cudaGetLastErrorWrapper("convolutionRows_kernel"); 

        //#3--unbind texture CUDA_SAFE_CALL(cudaUnbindTexture(texSrc)); 

}

#1.2--下面是CUDA数组相关的纹理使用



unsigned int imageHeight = 640; 

 unsigned int imageWidth = 480; 

float* h_sorce2DImage_f = new float[ imageHeight  * imageWidth ];

//分配CUDA数组并赋值

cudaArray* d_fDataArray; 

cudaChannelFormatDesc floatTex = cudaCreateChannelDesc<float>();

cudaMallocArrayWrapper(&d_fDataArray, & floatTex, imageWidth, imageHeight); 

//d_sorce2DImage_f 是定义的device端变量，大小为imageHeight * imageWidth *sizeof(float) 

cudaMemcpyToArrayWrapper(d_fDataArray, 0, 0, d_sorce2DImage_f, imageSize * sizeof(float), cudaMemcpyDeviceToDevice); 



//测试1，测试CUDA数组内容

cudaError_t myError = cudaMemcpy2DFromArray(h_sorce2DImage_f, imageWidth, d_fDataArray, 0, 0, imageWidth, imageHeight, cudaMemcpyDeviceToHost);

WiteDataToFile("filted1.txt", h_sorce2DImage_f, imageWidth, imageHeight);



//调用函数，主要是读取纹理内容

convolutionRowsGPU (d_sorce2DImage_f, d_fDataArray,floatTex, imageHeight, imageWidth);

//测试2，测试从纹理中读取的内容

cudaMemcpyWrapper(h_sorce2DImage_f, d_sorce2DImage_f, imageSize * sizeof(float), cudaMemcpyDeviceToHost);

WiteDataToFile("filted2.txt", h_sorce2DImage_f, imageWidth, imageHeight);



delete [] h_sorce2DImage_f ;

注：其中函数cudaMallocArrayWrapper()，cudaMemcpyToArrayWrapper()，cudaMemcpyWrapper()只是为了方便进行了封装，不过只是在调用对应CUDA的函数后加上宏 CUDA_SAFE_CALL ()而已。

#2--绑定cudaMallocPitch分配的线性内存到纹理内存的情况
这里面的原因是因为cudaMalloc分配的pitch不是2的倍数，在绑定内存时会出错。

#2.1--下面的代码是绑定cudaMallocPitch分配的线性内存到纹理内存及相关调用



static void convolutionRowsGPU(float* d_DstData, const float* d_bindLinearMem, unsigned int imageHeight, unsigned int imageWidth, size_t pitch)

{

        dim3 myBlock(16, 12); 

        dim3 myGrid((imageWidth+(blockDim.x -1))/blockDim.x, (imageHeight +(blockDim.y -1))/ blockDim.y );

        size_t offset; 

       //绑定纹理内存

        cudaChannelFormatDesc chennelDesc = cudaCreateChannelDesc(); 

        cudaBindTexture2D(&offset, texSrc, d_bindLinearMem, chennelDesc, imageWidth, imageHeight, pitch);

       //调用核函数

        convolutionRows_kernel <<<myGrid, myBlock>>(d_DstData, imageHeight, imageWidth);     

        cudaGetLastErrorWrapper("convolutionRows_kernel"); 

        //解绑定

        CUDA_SAFE_CALL(cudaUnbindTexture(texSrc)); 

 }

#2.2--下面是 cudaMallocPitch分配的线性内存相关的纹理使用



unsigned int imageHeight = 640; 

 unsigned int imageWidth = 480; 

float* h_sorce2DImage_f = new float[ imageHeight  * imageWidth ];



//cudaMallocPitchWrapper分配内存，并赋值

float* d_linearGloabalMem; 

size_t pitch; 

cudaMallocPitchWrapper(((void**)& d_linearGloabalMem), &pitch, imageWidth * sizeof(float), imageHeight);

myError = cudaMemcpy2D(d_linearGloabalMem, pitch, d_sorce2DImage_f, imageWidth * sizeof(float), imageWidth * sizeof(float), imageHeight, cudaMemcpyDeviceToDevice);



//测试3，测试cudaMallocPitch()分配的内存内容

myError = cudaMemcpy2D(h_sorce2DImage_f, imageWidth * sizeof(float), d_linearGloabalMem, pitch, imageWidth * sizeof(float), imageHeight, cudaMemcpyDeviceToHost);

WiteDataToFile("filted3.txt", h_sorce2DImage_f, imageWidth, imageHeight);



//调用函数，主要是读取纹理内容

convolutionRowsGPU (d_sorce2DImage_f, d_linearGloabalMem, imageHeight, imageWidth, pitch);



//测试4，测试从纹理中读取的内容

cudaMemcpyWrapper(h_sorce2DImage_f, d_sorce2DImage_f, imageSize * sizeof(float), cudaMemcpyDeviceToHost);

WiteDataToFile("filted4.txt", h_sorce2DImage_f, imageWidth, imageHeight);

delete [] h_sorce2DImage_f ;

很令人感到意外的是测试1 和测试3都有正确的结果，但是测试2 和测试4都是乱码，这个我确实不解，有那个大牛帮忙分析下。

...全文

382 13 打赏收藏转发到动态举报

写回复

用AI写文章

13 条回复

切换为时间正序

请发表友善的回复…

发表回复

豆渐渐飘落 2014-06-16

打赏
举报

好高级的样子观摩观摩

lexlex520 2014-06-16

打赏
举报

这是新语言吗？

qq_16598617 2014-06-16

打赏
举报

这是新语言吗？

u010710198 2014-06-16

打赏
举报

真的很深奥啊

baidu_16598717 2014-06-16

打赏
举报

很深奥啊！表示看不懂

qq_16595745 2014-06-16

打赏
举报

2013-10-18 11:51:45

chenruimintr 2014-06-15

打赏
举报

第一次回复

壹Ⅱ仨。 2014-06-15

打赏
举报

很深奥啊！表示看不懂

可爱又迷人的反派角色678 2014-06-15

打赏
举报

看不懂

ablusky 2014-06-15

打赏
举报

CSDN材料不错，就是都要积分，麻烦

ljfswufewww 2014-06-15

打赏
举报

看不太懂啊，这样子，能不能给下载积分呀

魂丶尐翔 2014-06-15

打赏
举报

//测试3，测试cudaMallocPitch()分配的内存内容myError = cudaMemcpy2D(h_sorce2DImage_f, imageWidth * sizeof(float), d_linearGloabalMem, pitch, imageWidth * sizeof(float), imageHeight, cudaMemcpyDeviceToHost);WiteDataToFile("filted3.txt", h_sorce2DImage_f, imageWidth, imageHeight);