不用cuda的cufft库实现fft的代码
我想实现fft,但不希望用cufft库,也就是自己写的意思,下面是一篇论文中提供的核函数。不过怎么调用核函数的部分没有找到,有谁知道吗?或者和下面完全不同的思路的也可以,高手指教!
static __global__ void FFT( Complex * DataIn, Complex *DataOut, const unsigned int N)
{
extern __shared__ Complex sdata[];
const unsigned int tid_in_block = threadIdx.x;
if(tid_in_block<N)
{
sdata[tid_in_block] = DataIn[tid_in_block];
sdata[tid_in_block + N/2] = DataIn[tid_in_block + N/2];
__syncthreads();
if(tid_in_block < N/2){
int p,q;
Complex Wn,Xp,XqWn;
float stage = 0.0;
for(int Ns=1; Ns<N; Ns=Ns*2)
{
p = tid_in_block/Ns*Ns*2 + tid_in_block%Ns;
q = p + Ns;
Wn = tex2D(texRef,tid_in_block,stage++);
XqWn = ComplexMul(sdata[q], Wn);
Xp = sdata[p];
sdata[p] = ComplexAdd(Xp, XqWn);
sdata[q] = ComplexSub(Xp, XqWn);
__syncthreads();
}
DataOut[p] = sdata[p];
DataOut[q] = sdata[q];
}
}
}