579
社区成员
发帖
与我相关
我的任务
分享
__global__ void coalescedMultiply(float *a, float* b, float *c,
int N)
{
__shared__ float aTile[TILE_DIM][TILE_DIM];
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
float sum = 0.0f;
aTile[threadIdx.y][threadIdx.x] = a[row*TILE_DIM+threadIdx.x];
for (int i = 0; i < TILE_DIM; i++) {
sum += aTile[threadIdx.y][i]* b[i*N+col];
}
c[row*N+col] = sum;
}