580
社区成员
发帖
与我相关
我的任务
分享
// 实数与复数之间的互相转换。楼猪死脑子,只用C2C......
__global__
void RToC(int size, float* input, float2* output)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < size)
{
output[i].x = input[i];
output[i].y = 0;
}
}
__global__
void CToR(int size, float2* input, float* output)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < size)
{
output[i] = input[i].x;
}
}
template <class T>
__global__
void Scale(int size, T* input, float scale)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < size)
{
input[i] *= scale;
}
}
// 正向 FFT
void FFT1D(int nx, float* input, float2* output)
{
float* input_d;
checkCudaErrors( cudaMalloc((void**)&input_d, nx * sizeof(float)) );
checkCudaErrors( cudaMemcpy(input_d, input, nx * sizeof(float), cudaMemcpyHostToDevice) );
float2* input_c;
checkCudaErrors( cudaMalloc((void**)&input_c, nx * sizeof(float2)) );
int thread = 128;
int block = nx / 128 + 1;
RToC<<<block, thread>>>(nx, input_d, input_c);
cufftHandle plan;
cufftPlan1d(&plan, nx, CUFFT_C2C, 1);
cufftExecC2C(plan, input_c, input_c, CUFFT_FORWARD);
checkCudaErrors( cudaMemcpy(output, input_c, nx * sizeof(float2), cudaMemcpyDeviceToHost) );
cudaFree(input_c);
cudaFree(input_d);
}
// 逆向 FFT
void IFFT1D(int nx, float2* input, float* output)
{
float2* input_d;
checkCudaErrors( cudaMalloc((void**)&input_d, nx * sizeof(float2)) );
checkCudaErrors( cudaMemcpy(input_d, input, nx * sizeof(float2), cudaMemcpyHostToDevice) );
cufftHandle plan;
cufftPlan1d(&plan, nx, CUFFT_C2C, 1);
cufftExecC2C(plan, input_d, input_d, CUFFT_INVERSE);
float* output_d;
checkCudaErrors( cudaMalloc((void**)&output_d, nx * sizeof(float)) );
int thread = 128;
int block = nx / thread + 1;
CToR<<<block, thread>>>(nx, input_d, output_d);
Scale<<<block, thread>>>(nx, output_d, 1.0f / nx);
checkCudaErrors( cudaMemcpy(output, output_d, nx * sizeof(float), cudaMemcpyDeviceToHost) );
cudaFree(input_d);
cudaFree(output_d);
}