589
社区成员




texture<float, cudaTextureType2D, cudaReadModeElementType> texSrc;
__inline__ __global__ void convolutionRows_kernel( float *d_Dst, int imageH, int imageW)
{
int ix = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
int iy = __mul24(blockIdx.y, blockDim.y) + threadIdx.y;
while(ix < imageW && iy < imageH)
{
float fx = (float) ix + 0.5f;
float fy = (float) iy + 0.5f;
d_Dst[ix + __mul24(iy, imageW)] = tex2D(texSrc, fx, fy);
ix += blockDim.x;
iy += blockDim.y;
}
}
static void convolutionRowsGPU(float* d_DstData, cudaArray* d_srcDataArray, cudaChannelFormatDesc floatTex, unsigned int imageHeight, unsigned int imageWidth)
{
dim3 myBlock(16, 12);
dim3 myGrid((imageWidth+(blockDim.x -1))/blockDim.x, (imageHeight +(blockDim.y -1))/ blockDim.y );
//#1-- bind array to texture
CUDA_SAFE_CALL( cudaBindTextureToArray(texSrc, d_srcDataArray, floatTex));
//#2 -- call kernel function for row convolution
convolutionRows_kernel <<< myGrid , myBlock >>>(d_DstData, imageHeight, imageWidth);
cudaGetLastErrorWrapper("convolutionRows_kernel");
//#3--unbind texture CUDA_SAFE_CALL(cudaUnbindTexture(texSrc));
}
unsigned int imageHeight = 640;
unsigned int imageWidth = 480;
float* h_sorce2DImage_f = new float[ imageHeight * imageWidth ];
//分配CUDA数组并赋值
cudaArray* d_fDataArray;
cudaChannelFormatDesc floatTex = cudaCreateChannelDesc<float>();
cudaMallocArrayWrapper(&d_fDataArray, & floatTex, imageWidth, imageHeight);
//d_sorce2DImage_f 是定义的device端变量,大小为imageHeight * imageWidth *sizeof(float)
cudaMemcpyToArrayWrapper(d_fDataArray, 0, 0, d_sorce2DImage_f, imageSize * sizeof(float), cudaMemcpyDeviceToDevice);
//测试1,测试CUDA数组内容
cudaError_t myError = cudaMemcpy2DFromArray(h_sorce2DImage_f, imageWidth, d_fDataArray, 0, 0, imageWidth, imageHeight, cudaMemcpyDeviceToHost);
WiteDataToFile("filted1.txt", h_sorce2DImage_f, imageWidth, imageHeight);
//调用函数,主要是读取纹理内容
convolutionRowsGPU (d_sorce2DImage_f, d_fDataArray,floatTex, imageHeight, imageWidth);
//测试2,测试从纹理中读取的内容
cudaMemcpyWrapper(h_sorce2DImage_f, d_sorce2DImage_f, imageSize * sizeof(float), cudaMemcpyDeviceToHost);
WiteDataToFile("filted2.txt", h_sorce2DImage_f, imageWidth, imageHeight);
delete [] h_sorce2DImage_f ;
static void convolutionRowsGPU(float* d_DstData, const float* d_bindLinearMem, unsigned int imageHeight, unsigned int imageWidth, size_t pitch)
{
dim3 myBlock(16, 12);
dim3 myGrid((imageWidth+(blockDim.x -1))/blockDim.x, (imageHeight +(blockDim.y -1))/ blockDim.y );
size_t offset;
//绑定纹理内存
cudaChannelFormatDesc chennelDesc = cudaCreateChannelDesc();
cudaBindTexture2D(&offset, texSrc, d_bindLinearMem, chennelDesc, imageWidth, imageHeight, pitch);
//调用核函数
convolutionRows_kernel <<<myGrid, myBlock>>(d_DstData, imageHeight, imageWidth);
cudaGetLastErrorWrapper("convolutionRows_kernel");
//解绑定
CUDA_SAFE_CALL(cudaUnbindTexture(texSrc));
}
unsigned int imageHeight = 640;
unsigned int imageWidth = 480;
float* h_sorce2DImage_f = new float[ imageHeight * imageWidth ];
//cudaMallocPitchWrapper分配内存,并赋值
float* d_linearGloabalMem;
size_t pitch;
cudaMallocPitchWrapper(((void**)& d_linearGloabalMem), &pitch, imageWidth * sizeof(float), imageHeight);
myError = cudaMemcpy2D(d_linearGloabalMem, pitch, d_sorce2DImage_f, imageWidth * sizeof(float), imageWidth * sizeof(float), imageHeight, cudaMemcpyDeviceToDevice);
//测试3,测试cudaMallocPitch()分配的内存内容
myError = cudaMemcpy2D(h_sorce2DImage_f, imageWidth * sizeof(float), d_linearGloabalMem, pitch, imageWidth * sizeof(float), imageHeight, cudaMemcpyDeviceToHost);
WiteDataToFile("filted3.txt", h_sorce2DImage_f, imageWidth, imageHeight);
//调用函数,主要是读取纹理内容
convolutionRowsGPU (d_sorce2DImage_f, d_linearGloabalMem, imageHeight, imageWidth, pitch);
//测试4,测试从纹理中读取的内容
cudaMemcpyWrapper(h_sorce2DImage_f, d_sorce2DImage_f, imageSize * sizeof(float), cudaMemcpyDeviceToHost);
WiteDataToFile("filted4.txt", h_sorce2DImage_f, imageWidth, imageHeight);
delete [] h_sorce2DImage_f ;