CUDA 第一次运行或者修改代码后点ctrl+F5时结果为-842150451, 第二次运行结果正常正常

Lyn1993 2012-07-22 03:56:10

我的环境是VS2010, win32 控制台程序, cuda4.1
每次第一次运行程序(ctrl+F5), 或者修改代码后第一次运行程序, 结果都会出错, 但是第二次就正常了, 不知道为什么. 附上我的源码:





//kernel function

//get the gradiant, for signal channel graph, kernel is [-1, 0, 1] and inverse of [-1, 0, 1]

__global__ static void gradiant(unsigned char* img, int* GradiantX, int* GradiantY, float* GradiantMagnitude, float* GradiantOrientation, int imgWidth, int imgHeight){

    int mulx, muly;

    int posx, posy;

    int X, Y, Z;

    mulx=ceil(imgWidth/(float)THREAD_NUM);

    muly=ceil(imgHeight/(float)BLOCK_NUM);



    for(int i=0; i<muly; i++){

        for(int j=0; j<mulx; j++){

            posy= i*BLOCK_NUM+ blockIdx.x;

            posx= j*THREAD_NUM+ threadIdx.x;

            if(posx<imgWidth&&posy<imgHeight){

                int index= posx+ posy* imgWidth;

                int up= posy-1; if(up<0)up=0;        int down= posy+1; if(down>=imgHeight)down= imgHeight-1;

                int indexUp= up*imgWidth+posx;    int indexDown= down*imgWidth+ posx;

                GradiantY[index]= img[indexDown]- img[indexUp];



                int left= posx-1; if(left<0)left=0;        int right= posx+1; if(right>=imgWidth)right=imgWidth-1;

                int indexLeft= posy*imgWidth+left;        int indexRigt= posy* imgWidth+ right;

                GradiantX[index]= img[indexRigt]- img[indexLeft];



                GradiantMagnitude[index]= sqrt( (float)(GradiantX[index]*GradiantX[index]+GradiantY[index]*GradiantY[index]) );

                //float angle= atan( ( (float)GradiantY[index] ) / (float)(GradiantX[index]+0.1 ) );

                //if(angle<0)angle+= PI;

                //GradiantOrientation[index]= angle;

                GradiantOrientation[index]= (float) (GradiantX[index]+GradiantY[index]);

            }

        }

    }

}





extern "C" void getGradiant(unsigned char* img, int* GradiantX, int* GradiantY, float* GradiantMagnitude, float* GradiantOrientation, int imgWidth, int imgHeight){

    unsigned char* d_img;

    int* d_GradiantX;

    int* d_GradiantY;

    float* d_GradiantMagnitude;

    float* d_GradiantOrientation;



    cudaMalloc( (void**)&d_img, sizeof(unsigned char)* imgWidth* imgHeight);

    cudaMalloc( (void**)&d_GradiantX, sizeof(int)* imgWidth* imgHeight);

    cudaMalloc( (void**)&d_GradiantY, sizeof(int)* imgWidth* imgHeight);

    cudaMalloc( (void**)&d_GradiantMagnitude, sizeof(float)* imgWidth* imgHeight);

    cudaMalloc( (void**)&d_GradiantOrientation, sizeof(float)* imgWidth* imgHeight);

    cudaMemcpy(d_img, img, sizeof(unsigned char)* imgWidth* imgHeight, cudaMemcpyHostToDevice);



    gradiant<<<BLOCK_NUM, THREAD_NUM>>>(d_img, d_GradiantX, d_GradiantY, d_GradiantMagnitude, GradiantOrientation, imgWidth, imgHeight);





    cudaMemcpy(GradiantX, d_GradiantX, sizeof(int)* imgWidth* imgHeight, cudaMemcpyDeviceToHost);

    cudaMemcpy(GradiantY, d_GradiantY, sizeof(int)* imgWidth* imgHeight, cudaMemcpyDeviceToHost);

    cudaMemcpy(GradiantMagnitude, d_GradiantMagnitude, sizeof(float)* imgWidth* imgHeight, cudaMemcpyDeviceToHost);

    cudaMemcpy(GradiantOrientation, d_GradiantOrientation, sizeof(float)*imgWidth*imgHeight, cudaMemcpyDeviceToHost);



}