580
社区成员
发帖
与我相关
我的任务
分享
// 类型
typedef struct
{
...
float3* dev_normal;
...
// CUDA流
cudaStream_t stream;
} GPUplan;
checkCudaErrors(cudaMalloc((void**)&plan[i].dev_normal, IMG_HEIGHT * IMG_WIDTH * sizeof(float3)));
__global__ void getNormalMapKernel(const ushort *dev_depth, float3 *normalMap, const float *K ,const float *T)
{
int x = threadIdx.x; // 得到线程索引
int y = blockIdx.x; // 得到块索引
float3 normal = make_float3(0.0, 0.0, 0.0);
if (x < IMG_WIDTH - 1 && y < IMG_HEIGHT - 1)
{
ushort depth = dev_depth[y * IMG_WIDTH + x];
ushort depth_right = dev_depth[y * IMG_WIDTH + x + 1];
ushort depth_down = dev_depth[(y + 1) * IMG_WIDTH + x];
if ( depth && absDevUshort(depth, depth_right) < 20 && absDevUshort(depth, depth_down) < 20)
{
/* 计算摄像机坐标,并减去平移向量
*
* cx = (x - px) * depth / fx - T1
* cy = (y - py) * depth / fy - T2
* cz = depth - T3
*/
float3 cameraPos = make_float3((x - K[1]) * depth / K[0] - T[0], (y - K[3]) * depth / K[2] - T[1], depth - T[2]);
float3 cameraPosRight = make_float3((x + 1 - K[1]) * depth_right / K[0] - T[0], (y - K[3]) * depth_right / K[2] - T[1], depth_right - T[2]);
float3 cameraPosDown = make_float3((x - K[1]) * depth_down / K[0] - T[0], (y + 1 - K[3]) * depth_down / K[2] - T[1], depth_down - T[2]);
// 计算叉乘 (depth_right - depth) X (depth_down - depth) 并归一化
normal = normalize(cross(cameraPosRight - cameraPos, cameraPosDown - cameraPos));
}
}
__syncthreads();
normalMap[y * IMG_WIDTH + x] = normal;
}
extern "C" void launch_getNormalMapKernel(const ushort *dev_depth, float3 *normalMap, const float *K, const float *T, cudaStream_t &stream)
{
// 块数和线程数
dim3 dimGrid(IMG_HEIGHT, 1, 1);
dim3 dimBlock(IMG_WIDTH, 1, 1);
getNormalMapKernel<<<dimGrid, dimBlock, 0, stream>>>(dev_depth, normalMap, K, T);
getLastCudaError("getNormalMapKernel() execution failed.\n");
}