CUDA纹理对象简介

adagio_chen 2014-09-15 09:34:29

加精

大家知道，CUDA纹理只能定义为静态全局变量，而不能作为参数传到Kernel中



texture<float, 2, cudaReadModeElementType> cuda_tex;



__global__

void Kernel(...)

{

    float texel = tex2D(cuda_tex, ...);

}

如果要使用多个纹理，则需要定义多个texture，并且无法使用数组



texture<float, 2, cudaReadModeElementType> cuda_tex1;

texture<float, 2, cudaReadModeElementType> cuda_tex2;

texture<float, 2, cudaReadModeElementType> cuda_tex3;

...

很多时候，这样的程序显得既丑陋，又不易维护。

于是，在敏感词主义敏感词思想敏感词理论敏感词三个代表重要思想敏感词科学发展观的指导下，CUDA推出了纹理对象（texture object）这个东西（需要计算能力3.0以上）。

一个纹理对象是用cudaCreateTextureObject()产生的。cudaCreateTextureObject()有4个参数，常用的前三个分布是
cudaTextureObject_t *texObj：需要生产的纹理对象；
cudaResourceDesc *resDesc：资源描述符，用来获取述纹理数据；
cudaTextureDesc *texDesc：纹理描述符，用来描述纹理参数；
其中cudaTextureDesc定义如下：



struct cudaTextureDesc

{

enum cudaTextureAddressMode addressMode[3];

enum cudaTextureFilterMode filterMode;

enum cudaTextureReadMode readMode;

int sRGB;

int normalizedCoords;

unsigned int maxAnisotropy;

enum cudaTextureFilterMode mipmapFilterMode;

float mipmapLevelBias;

float minMipmapLevelClamp;

float maxMipmapLevelClamp;

};

以下的代码是一个使用纹理对象的简单例子：



// 简单纹理变换函数

__global__ void transformKernel(float* output,

cudaTextureObject_t texObj,

int width, int height,

float theta)

{

// 计算纹理坐标

unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;

unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;

float u = x / (float)width;

float v = y / (float)height;

// 坐标转换

u -= 0.5f;

v -= 0.5f;

float tu = u * cosf(theta) - v * sinf(theta) + 0.5f;

float tv = v * cosf(theta) + u * sinf(theta) + 0.5f;

// 从纹理中读取并写入全局存储

output[y * width + x] = tex2D<float>(texObj, tu, tv);

}



int main()

{

// 定义CUDA array

cudaChannelFormatDesc channelDesc =

cudaCreateChannelDesc(32, 0, 0, 0,

cudaChannelFormatKindFloat);

cudaArray* cuArray;

cudaMallocArray(&cuArray, &channelDesc, width, height);

// 拷贝数据到CUDA array

cudaMemcpyToArray(cuArray, 0, 0, h_data, size,

cudaMemcpyHostToDevice);

// 定义资源描述符

struct cudaResourceDesc resDesc;

memset(&resDesc, 0, sizeof(resDesc));

resDesc.resType = cudaResourceTypeArray;

resDesc.res.array.array = cuArray;

// 定义纹理对象参数

struct cudaTextureDesc texDesc;

memset(&texDesc, 0, sizeof(texDesc));

texDesc.addressMode[0] = cudaAddressModeWrap;

texDesc.addressMode[1] = cudaAddressModeWrap;

texDesc.filterMode = cudaFilterModeLinear;

texDesc.readMode = cudaReadModeElementType;

texDesc.normalizedCoords = 1;

// 生产纹理对象

cudaTextureObject_t texObj = 0;

cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);

// 分配用于保持结果的内存

float* output;

cudaMalloc(&output, width * height * sizeof(float));

// 调用Kernel

dim3 dimBlock(16, 16);

dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x,

(height + dimBlock.y - 1) / dimBlock.y);

transformKernel<<<dimGrid, dimBlock>>>(output,

texObj, width, height,

angle);

// 销毁纹理对象

cudaDestroyTextureObject(texObj);

// 释放内存

cudaFreeArray(cuArray);