374
社区成员
发帖
与我相关
我的任务
分享
#include <QCoreApplication>
#include"add1.h"
extern "C"
void runtest(int *host_a, int *host_b, int *host_c);//显卡处理函数
int main()
{
int a[datasize], b[datasize], c[datasize];
for (size_t i = 0; i < datasize; i++)
{
a[i] = i;
b[i] = i*i;
}
long now1 = clock();//存储图像处理开始时间
runtest(a,b,c);//调用显卡加速
printf("GPU运行时间为:%dms\n", int(((double)(clock() - now1)) / CLOCKS_PER_SEC * 1000));//输出GPU处理时间
long now2 = clock();//存储图像处理开始时间
for (size_t i = 0; i < datasize; i++)
{
for (size_t k = 0; k < 50000; k++)
{
c[i] = (a[i] + b[i]);
}
}
printf("CPU运行时间为:%dms\n", int(((double)(clock() - now2)) / CLOCKS_PER_SEC * 1000));//输出GPU处理时间
/*for (size_t i = 0; i < 100; i++)//查看计算结果
{
printf("%d+%d=%d\n", a[i], b[i], c[i]);
}*/
getchar();
return 0;
}
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "add1.h"
inline void checkCudaErrors(cudaError err)//错误处理函数
{
if (cudaSuccess != err)
{
fprintf(stderr, "CUDA Runtime API error: %s.\n", cudaGetErrorString(err));
return;
}
}
__global__ void add(int *a,int *b,int *c)//处理核函数
{
int tid = blockIdx.x*blockDim.x+threadIdx.x;
for (size_t k = 0; k < 50000; k++)
{
c[tid] = a[tid] + b[tid];
}
}
extern "C"
void runtest(int *host_a, int *host_b, int *host_c)
{
int *dev_a, *dev_b, *dev_c;
checkCudaErrors(cudaMalloc((void**)&dev_a, sizeof(int)* datasize));//分配显卡内存
checkCudaErrors(cudaMalloc((void**)&dev_b, sizeof(int)* datasize));
checkCudaErrors(cudaMalloc((void**)&dev_c, sizeof(int)* datasize));
checkCudaErrors(cudaMemcpy(dev_a, host_a, sizeof(int)* datasize, cudaMemcpyHostToDevice));//将主机待处理数据内存块复制到显卡内存中
checkCudaErrors(cudaMemcpy(dev_b, host_b, sizeof(int)* datasize, cudaMemcpyHostToDevice));
add << <datasize / 100, 100 >> >(dev_a, dev_b, dev_c);//调用显卡处理数据
checkCudaErrors(cudaMemcpy(host_c, dev_c, sizeof(int)* datasize, cudaMemcpyDeviceToHost));//将显卡处理完数据拷回来
cudaFree(dev_a);//清理显卡内存
cudaFree(dev_b);
cudaFree(dev_c);
return 0;
}
#ifndef ADD1_H
#define ADD1_H
#include<time.h>//时间相关头文件,可用其中函数计算图像处理速度
#include <iostream>
#include <stdio.h>
#define datasize 50000
extern "C"
void runtest(int *host_a, int *host_b, int *host_c);
#endif // ADD1_H