374
社区成员
发帖
与我相关
我的任务
分享
#include <stdio.h>
#include <iostream>
#include <iomanip>
#include <math.h>
#include "GPUTSPSolver.h"
using namespace std;
CGPUTSPSolver solverGPU;
int main(int argc, char **argv)
{
solverGPU.CleanCudaMemory();
solverGPU.PrepareCudaMemory();
solverGPU.GeneInitCudaMemory();
solverGPU.CleanCudaMemory();
return 0;
}
这个是main.cpp
#ifndef _GPU_TSP_SOLVER
#define _GPU_TSP_SOLVER
#endif
#ifdef WIN32
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#endif
#include "GPUTSPSolverKernel.cuh"
class CGPUSolver {
int THREADSPERBLOCK;
int nMaxPoplation;
int *d_gene;
int *d_gene_randi;
int *h_gene;
int *h_gene_randi;
curandState *d_cudaState;
public:
void GenInitCudaMemory(void);
void CleanCudaMemory(void);
void PrepareCudaMemory(void);
void GeneInitCudaMemory(void);
CGPUSolver();
~CGPUSolver();
}
#include "GPUTSPSolver.h"
#include <stdio.h>
#include <stdlib.h>
CGPUTSPSolver::CGPUTSPSolver(){
THREADSPERBLOCK = 256;
d_gene = NULL;
d_cudaState = NULL;
d_gene_randi = NULL;
h_gene = NULL;
h_gene_randi = NULL;
}
CGPUTSPSolver::~CGPUTSPSolver() {
}
void CGPUTSPSolver::CleanCudaMemory(void){
printf("cleaning cuda memories\n");
if (d_gene) cudaFree(d_gene);
if (d_gene_randi) cudaFree(d_gene_randi);
if (d_cudaState) cudaFree(d_cudaState);
if (h_gene) free(h_gene);
if (h_gene_randi) free(h_gene_randi);
}
void CGPUTSPSolver::PrepareCudaMemory(void){
printf("preparing cuda memories...\n");
cudaMalloc((void **)&d_gene , 2048 * 100 * sizeof(int));
cudaMalloc((void **)&d_gene_randi , 2048 * 100 * sizeof(int));
cudaMalloc((void **)&d_cudaState, sizeof(curandState));
h_gene = (int *) malloc(2048 * 100 * sizeof(int));
h_gene_randi = (int *) malloc(2048 * 100 * sizeof(int));
printf("cuda device memory successfully allocated (%d cities, %d genes)\n", 100, 2018);
}
void CGPUTSPSolver::GeneInitCudaMemory(void) {
int blocksPerGrid = (2048 + THREADSPERBLOCK - 1) / THREADSPERBLOCK;
if (!d_gene) printf("no gene memory\n");
printf("gene initialization with %d threads per block x (%d blocks)\n", THREADSPERBLOCK, blocksPerGrid);
d_geneInit(blocksPerGrid, THREADSPERBLOCK, d_cudaState, 2048, 100, d_gene, d_gene_randi);
printf("initSolver GPU\n");
cudaMemcpy(h_gene_randi, d_gene_randi, sizeof(int)*2048*100, cudaMemcpyDeviceToHost);
for(int i = 0;i<204800;i++){
printf("h_gene_randi[%d] = %d\n", i, h_gene_randi[i]);
};
printf("randi!");
}
#include "GPUTSPSolverKernel.cuh"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <curand.h>
#include <curand_kernel.h>
#include <stdio.h>
#include <math.h>
void d_geneInit(int blocks, int threads, curandState *pState, unsigned int nPopulation, unsigned int numCities, int *gene, int *d_gene_randi) {
printf("gene initialization at device started....\n");
d_geneInitKernel << < blocks, threads >> >(pState, nPopulation, numCities, gene, d_gene_randi);
printf("gene initialization at device done\n");
}
__global__ void d_geneInitKernel(curandState_t *pstate, unsigned int nPopulation, unsigned int numCities, int *gene, int *d_gene_randi) {
// tId: gene idx ( tId-th gene with numCities elements)
int tId = threadIdx.x + blockIdx.x * blockDim.x;
if (tId >= nPopulation) return;
// gene initialization - cards straight
for (int i = 0; i < numCities; i++) {
gene[tId*numCities+i] = i;
}
// gene shuffle
for (int i = 1; i < numCities; i++) {
int rIdx = randi(pstate, threadIdx.x+blockIdx.x+blockDim.x, (unsigned int) 1, (unsigned int) numCities - 1);
d_gene_randi[tId*numCities + i] = rIdx;
int t = gene[tId*numCities + i];
gene[tId * numCities + i] = gene[tId * numCities + rIdx];
gene[tId * numCities + rIdx] = t;
}
}
__device__ unsigned int randi(curandState *pState, int add, int min, int max) {
curandState localState = *pState;
unsigned int rndval = min + (curand(&localState) * add + add)%(max-min+1);
*pState = localState;
return rndval;
}
#ifndef _GPUTSPKERNEL__H_2015_YMKANG__
#define _GPUTSPKERNEL__H_2015_YMKANG__
#endif
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <curand.h>
#include <curand_kernel.h>
void d_geneInit(int blocks, int threads, curandState *pState, unsigned int nPopulation, unsigned int numCities, int *gene,int *d_gene_randi);
__global__ void d_geneInitKernel(curandState_t *state, unsigned int nPopulation, unsigned int numCities, int *gene, int *d_gene_randi);
__device__ unsigned int randi(curandState *pState, int add, int min, int max);