看不到bank conflict，求指教

土豆南瓜粥 2014-04-01 08:11:05

#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <device_functions.h>
#include <stdio.h>
#include <assert.h>
//void checkCUDAError(const char *msg);
#define TILE_DIM 16

__global__ void transposeCoalesced(float *idata, float *odata,int width, int height)
{
// __shared__ float tile[TILE_DIM][TILE_DIM+1];
int xIndex = blockIdx.x * TILE_DIM + threadIdx.x;
int yIndex = blockIdx.y * TILE_DIM + threadIdx.y;
int index_in = xIndex + width * yIndex;
int index_out = yIndex + height * xIndex;
odata[index_out]=idata[index_in];

//odata[index_out]=2;
/* xIndex = blockIdx.y * TILE_DIM + threadIdx.x;
yIndex = blockIdx.x * TILE_DIM + threadIdx.y;
tile[threadIdx.y][threadIdx.x] = idata[index_in];

__syncthreads();

odata[index_out] = tile[threadIdx.x][threadIdx.y];
*/
}

/**
* Host function that prepares data array and passes it to the CUDA kernel.
*/
int main(void)
{
// pointer for host memory
float *odata, * h_a, * h_b;
float *idata;
int width;
int height;
printf("please enter the width of A\n");
scanf("%d", &width);
printf("please enter the height of A\n");
scanf("%d", &height);
h_a = (float *)malloc(sizeof(float)*width*height);
h_b = (float *)malloc(sizeof(float)*height*width);

for(int n=0;n<height;n++)
for(int i=width*n;i<width*(n+1);i++)
h_a[i]=n;

for(int i=0;i<height*width;i++)
h_b[i]=0;

// Part 1 of 5: allocate host and device memor

cudaMalloc((void **)&idata , sizeof(float)*width*height);
cudaMalloc((void **)&odata , sizeof(float)*height*width);

cudaMemcpy(idata,h_a,sizeof(float)*width*height,cudaMemcpyHostToDevice);
cudaMemcpy(odata,h_b,sizeof(float)*height*width,cudaMemcpyHostToDevice);

dim3 dimGrid(width / TILE_DIM,height / TILE_DIM);
dim3 dimBlock(TILE_DIM, TILE_DIM);

cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);

transposeCoalesced<<< dimGrid,dimBlock>>>( idata,odata,width,height );

cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime, start, stop);
cudaThreadSynchronize();
cudaMemcpy(h_b,odata,sizeof(float)*height*width,cudaMemcpyDeviceToHost);

// Part 5 of 5: verify the data returned to the host is correct
for(int i=0;i<height*width;i++)
{
printf("%f\t", h_b[i]);
}

printf("%f\n", elapsedTime);
cudaFree(idata);
cudaFree(odata);;
// free host memory
free(h_a);
free(h_b);
system("pause");
return 0;
}

...全文

218 14 打赏收藏转发到动态举报

写回复

14 条回复

切换为时间正序

请发表友善的回复…

发表回复

qq_16530673 2014-06-14

打赏
举报

回复

学习中。。。。

qq_16529397 2014-06-14

打赏
举报

回复

受委屈的我是事实

wahanhan 2014-06-14

打赏
举报

回复

正在学习中。。。

sword85236699 2014-06-14

打赏
举报

回复

不知道哦。。。。。。

lcaiwenxin 2014-06-13

打赏
举报

回复

这个不太清楚不懂诶才学疏浅

sinat_16515827 2014-06-13

打赏
举报

回复

不懂诶才学疏浅

zaa124 2014-06-13

打赏
举报

回复

不太懂!能说清点吗

lsabm1314 2014-06-13

打赏
举报

回复

这个真不清楚

pursuer99 2014-06-13

打赏
举报

回复

这个不太清楚

the_venus 2014-06-08

打赏
举报

回复

bank conflict 的表现只是速度慢了些，性能差了些而已。程序运行一切正常。

Double_Lan_2975 2014-04-12

打赏
举报

回复

LZ你是通过什么来观察bank conflict的？？

Composer (精进) composer.json组织架构，系统讲解了composer.json文件中所涉的所有字段。统一进行的梳理和分析。深入详细了学习了composer.json中的20多个属性。01 Composer (精进) composer.json 概述02 composer.json 组织架构 JSON 格式03 composer.json 属性 name 名称04 composer.json 字段描述05 composer.json 字段版本和类型06 composer.json 字段关键字和主页07 composer.json 许可证作者和支持08 composer.json 核心属性依赖 require09 composer.json 依赖 require-dev10 composer.json 依赖 conflict冲突11 composer.json 依赖 replace 代替12 composer.json 依赖提供和建议13 composer.json 自动加载 psr-414 composer.json 自动加载 psr-0 简单15 composer.json 自动加载 psr-0 复杂16 composer.json 自动加载二级命名空间17 composer.json 自动加载 psr-0 深入 18 composer.json 自动加载 classmap19 composer.json 自动加载 files20 composer.json 自动加载排除21 composer.json 自动加载开发阶段22 composer.json 稳定性过滤23 composer.json 首选稳定24 composer.json 仓库地址本地配置25 composer.json 本地配置26 Composer (精进) composer.json 总结

19 cold 冷 frig frigid ['fridʒid]达不到高潮的；寒冷的； 20 color 颜色 chrom monochrome ['mɑnə.kroʊm]黑白的；单色的 21 come 来 ven intervene [.intər'vin]干扰；出面；插嘴 22 connect 连接 junct ...

那么既然英国人学英语不需要记住（甚至根本就见不到）单词的汉语意思，那么中国人学英语为什么要去记住单词的汉语意思呢？这种做法大家不觉得奇怪吗？然而由于中国人学英语时都在背单词的汉语意思，因此大家反而觉...

那么既然英国人学英语不需要记住（甚至根本就见不到）单词的汉语意思，那么中国人学英语为什么要去记住单词的汉语意思呢？这种做法大家不觉得奇怪吗？然而由于中国人学英语时都在背单词的汉语意思，因此大家反而觉...

本文章转自https://blog.csdn.net/qq877507054/article/details/82909070，此文章刷新了我对单词的认知，虽然自己平时记单词也是分节来记忆的，但是不知道原来每...那么英国人在学英语单词的时候需不需要记住单词的...

579

社区成员

2,918

社区内容

发帖

与我相关

我的任务

社区管理员

加入社区

近7日
近30日
至今

加载中

查看更多榜单

社区公告

暂无公告

试试用AI创作助手写篇文章吧

+ 用AI写文章