请问为什么我的程序结果都是0呢?

over_sky 2016-01-06 05:03:08
我的程序目的就是分别计算一个数与其他数之间的距离,然后将符合一定条件的距离点的位置存储起来。输入数据目前是300*3.但是输出的数据都是0,应该是我的核函数没有正确执行,但是我自己找不到错误在哪?还有就是使用atomicAdd和__syncthreads()需要头文件吗?
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <fstream>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <device_functions.h>

using namespace std;
const int lineNum = 300;
const int threadsPerBlock = 64;
const int blocksPerGrid = (lineNum + threadsPerBlock - 1) % threadsPerBlock;
const int neighborMaxNum = 300;
const float R = 0.03;

__global__ void calDistance(int num, float standard_x, float standard_y, float standard_z, float *x_dev, float *y_dev, float *z_dev, int* result, int line, int width, float R)
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int count = 0;
if (tid < line)
{
float dist = pow(x_dev[tid] - standard_x, 2) + pow(y_dev[tid] - standard_y, 2) + pow(z_dev[tid] - standard_z, 2);
if ((dist < pow(R, 2)) && (count<width))
{
result[num*neighborMaxNum+count] = tid;
atomicAdd(&count, 1);
//cout << count << endl;
//tid += blockDim.x*blockIdx.x;
//__syncthreads();
}
}
}

int main()
{
cout << "begin" << endl;
ifstream input("test.txt");
float *x_Host=new float[lineNum];
float *y_Host=new float[lineNum];
float *z_Host=new float[lineNum];
//int *result_Host=[lineNum][neighborMaxNum] = {};
/*int **result_Host;
result_Host = new int *[lineNum];
for (int i = 0; i < lineNum; i++)
{
result_Host[i] = new int[neighborMaxNum];
}
for (int m = 0; m < lineNum; m++)
for (int n = 0; n < neighborMaxNum; n++)
{
result_Host[m][n] = -1;
}*/
int *result_Host = new int[lineNum*neighborMaxNum];
for (int i = 0; i < lineNum*neighborMaxNum; i++)
{
result_Host[i] = -1;
}
ofstream output("result2.txt");
for (int j = 0; j < lineNum; j++)
{
for (int k = 0; k < neighborMaxNum; k++)
{
output << result_Host[j*neighborMaxNum + k] << " ";
}
output << endl;
}
output.close();
float *x_dev, *y_dev, *z_dev;
int *result_device;
//thrust::host_vector<thrust::host_vector> result_host(lineNum);
//thrust::device_vector<thrust::device_vector> result_device(lineNum);

cudaMalloc((void**)&x_dev, lineNum*sizeof(float));
cudaMalloc((void**)&y_dev, lineNum*sizeof(float));
cudaMalloc((void**)&z_dev, lineNum*sizeof(float));
cudaMalloc((void**)&result_device, sizeof(int)*lineNum*neighborMaxNum);

for (int i = 0; i<lineNum; i++)
{
input >> x_Host[i] >> y_Host[i] >> z_Host[i];
}
input.close();

cudaMemcpy(x_dev, x_Host, lineNum*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(y_dev, y_Host, lineNum*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(z_dev, z_Host, lineNum*sizeof(float), cudaMemcpyHostToDevice);
float time;
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
for (int i = 0; i<lineNum; i++)
{
calDistance <<<blocksPerGrid, threadsPerBlock >>>(i, x_dev[i], y_dev[i], z_dev[i], x_dev, y_dev, z_dev, result_device, lineNum, neighborMaxNum, R);
cudaThreadSynchronize();
//cudaError_t error = cudaGetLastError();
//cout << cudaGetErrorString(error) << endl;
}
cout << "kernel" << endl;
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cout << "execute time " << time << endl;
//cudaMemcpy2D(result_Host, neighborMaxNum*sizeof(int), result_device, neighborMaxNum*sizeof(int), neighborMaxNum*sizeof(int), lineNum, cudaMemcpyDeviceToHost);
cudaMemcpy(result_Host, result_device, lineNum*neighborMaxNum*sizeof(int), cudaMemcpyDeviceToHost);
cout << "here1" << endl;
cudaFree(x_dev);
cudaFree(y_dev);
cudaFree(z_dev);
cudaFree(result_device);
//cudaFree(result_device);

//for (int j = 0; j<lineNum; j++)
// for (int k = 0; k<neighborMaxNum; k++)
// {
// if (result_Host[j][k] != 0)
// {
// cout << result_Host[j][k];
// }
// }
ofstream output2("result.txt");
for (int j = 0; j < lineNum; j++)
{
for (int k = 0; k < neighborMaxNum; k++)
{
output2 << result_Host[j*neighborMaxNum+k] << " ";
}
output2 << endl;
}
output2.close();
delete[] x_Host;
delete[] y_Host;
delete[] z_Host;
//for (int i = 0; i < lineNum; i++)
//{
// delete[] result_Host[i];
//}
delete[] result_Host;
cout << "finished" << endl;
return 0;

}


//#include<iostream>
//#include<fstream>
//
//using namespace std;
//const int lineNum = 1007428;
//int main()
//{
// cout << "begin" << endl;
// ifstream input("model_test.txt");
// float x_Host[lineNum] = {};
// float y_Host[lineNum] = {};
// float z_Host[lineNum] = {};
// //thrust::host_vector<thrust::host_vector> result_host(lineNum);
// //thrust::device_vector<thrust::device_vector> result_device(lineNum);
//
//
// for (int i = 0; i < lineNum; i++)
// {
// input >> x_Host[i] >> y_Host[i] >> z_Host[i];
// }
// input.close();
// return 0;
//}
...全文
138 回复 打赏 收藏 转发到动态 举报
写回复
用AI写文章
回复
切换为时间正序
请发表友善的回复…
发表回复

581

社区成员

发帖
与我相关
我的任务
社区描述
CUDA™是一种由NVIDIA推出的通用并行计算架构,该架构使GPU能够解决复杂的计算问题。 它包含了CUDA指令集架构(ISA)以及GPU内部的并行计算引擎。
社区管理员
  • CUDA编程社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧