openmp相关,请教一下问题出在哪里导致程序效率无法提升?

zhongxyui 2017-06-11 05:28:37
int SA(float*HFt,float*HFb,float*HMt,float*HMb,float*Vop,float*Vos,float*Vpmax,float*Vpmin,float*Vsmax,float*Vsmin,float*Xr,float*Yr,float*Hr,float*top,float*tos,int numlayer,int numreceiver,float Hsource,float Xsource,float Ysource,float leightmodel,int numbest,float weight)
{
int i,j,k;
int markovlength=10000;
float decayscale=0.95;
float stepfactor=0.2;
float temperature=100;
float tolerance=0.1;
float*preVp=(float*)malloc(numlayer*sizeof(float));
float*nextVp=(float*)malloc(numlayer*sizeof(float));
float*prebestVp=(float*)malloc(numlayer*sizeof(float));
float*bestVp=(float*)malloc(numlayer*sizeof(float));
float*preVs=(float*)malloc(numlayer*sizeof(float));
float*nextVs=(float*)malloc(numlayer*sizeof(float));
float*prebestVs=(float*)malloc(numlayer*sizeof(float));
float*bestVs=(float*)malloc(numlayer*sizeof(float));
float*pretp=(float*)malloc(numreceiver*sizeof(float));
float*nexttp=(float*)malloc(numreceiver*sizeof(float));
float*besttp=(float*)malloc(numreceiver*sizeof(float));
float*prets=(float*)malloc(numreceiver*sizeof(float));
float*nextts=(float*)malloc(numreceiver*sizeof(float));
float*bestts=(float*)malloc(numreceiver*sizeof(float));
float acceptpoints=0.0;
float*ob = (float*)malloc(NUM * sizeof(float));
float*tem = (float*)malloc(NUM * sizeof(float));
float*bestvp = (float*)malloc(numlayer *NUM * sizeof(float));
float*bestvs = (float*)malloc(numlayer *NUM * sizeof(float));
time_t t;
srand((unsigned)time(&t));
for(i=0;i<numlayer;i++)
{
preVp[i]=Vop[i];
preVs[i]=Vos[i];
prebestVp[i]=bestVp[i]=preVp[i];
prebestVs[i]=bestVs[i]=preVs[i];
}


//#pragma omp parallel num_threads(NUM)//并行区内//
// {
// /*for (int a = 0; a < NUM; a++)
//{*/
/*int n = omp_get_thread_num();*/
#pragma omp parallel num_threads(NUM) private(i,k) firstprivate(preVp,preVs,nextVp,nextVs,prebestVp,prebestVs,bestVp,bestVs,temperature,acceptpoints) shared(bestvp,bestvs,ob,tem)
{
int n = omp_get_thread_num();
do//annealing//
{
temperature *= decayscale;
acceptpoints = 0.0;
for (k = 0; k < markovlength; k++)//Monte Carlo sampling//
{
for (i = 0; i < numlayer; i++)
{
int Vpm = (Vpmax[i] - Vpmin[i]) / NUM;
int Vsm = (Vsmax[i] - Vsmin[i]) / NUM;
do
{
nextVp[i] = preVp[i] + stepfactor*Vpm*(rnd() - 0.5);
nextVs[i] = preVs[i] + stepfactor*Vsm*(rnd() - 0.5);
} while (!(nextVp[i] >= Vpmax[i] - Vpm*(n + 1) && nextVp[i] <= Vpmax[i] - Vpm*n&& nextVs[i] >= Vsmax[i] - Vsm*(n + 1) && nextVs[i] <= Vsmax[i] - Vsm*n));//constraint//
}
raytrace(HFt, HFb, HMt, HMb, nextVp, nextVs, Xr, Yr, Hr, numlayer, numreceiver, Hsource, Xsource, Ysource, leightmodel, nexttp, nextts);
raytrace(HFt, HFb, HMt, HMb, bestVp, bestVs, Xr, Yr, Hr, numlayer, numreceiver, Hsource, Xsource, Ysource, leightmodel, besttp, bestts);
if (objectfunction(besttp, bestts, top, tos, numbest, numreceiver, weight) > objectfunction(nexttp, nextts, top, tos, numbest, numreceiver, weight))
{
for (i = 0; i < numlayer; i++)
{
prebestVp[i] = bestVp[i];
prebestVs[i] = bestVs[i];
bestVp[i] = nextVp[i];
bestVs[i] = nextVs[i];
}
}
raytrace(HFt, HFb, HMt, HMb, preVp, preVs, Xr, Yr, Hr, numlayer, numreceiver, Hsource, Xsource, Ysource, leightmodel, pretp, prets);
if (objectfunction(pretp, prets, top, tos, numbest, numreceiver, weight) - objectfunction(nexttp, nextts, top, tos, numbest, numreceiver, weight) > 0)
{
preVp[i] = nextVp[i];
preVs[i] = nextVs[i];
acceptpoints++;
}
else
{
float change = -1 * (objectfunction(nexttp, nextts, top, tos, numbest, numreceiver, weight) - objectfunction(pretp, prets, top, tos, numbest, numreceiver, weight)) / temperature;
if (exp(change) > rnd())//Metropolis rule//
{
preVp[i] = nextVp[i];
preVs[i] = nextVs[i];
acceptpoints++;
}
}
}
} while (temperature > tolerance);//convergence condition//
ob[n] = objectfunction(besttp, bestts, top, tos, numbest, numreceiver, weight);
tem[n] = temperature;
for (int i = 0; i < numlayer; i++)
{
bestvp[n*numlayer + i] = bestVp[i];
bestvs[n*numlayer + i] = bestVs[i];
}
}


/*ob[n] = objectfunction(besttp, bestts, top, tos, numbest, numreceiver, weight);
tem[n] = temperature;
printf("thread:%d, temperature:%f", n, tem[n]);
for (int i = 0; i < numlayer; i++)
{
bestvp[n*numlayer + i] = bestVp[i];
bestvs[n*numlayer + i] = bestVs[i];
}*/

//}

float obmin = ob[0];
int bestthreadnum = 0;
for (int i = 1; i <NUM; i++)
{
if (ob[i] <= obmin)
{
obmin = ob[i];
bestthreadnum = i;
}
}
printf("The Min value is:%f\n",obmin);
printf("The temperature is:%f\n",tem[bestthreadnum]);
for(i=0;i<numlayer;i++)
{
printf("The Min points is:%f,%f\n",bestvp[bestthreadnum*numlayer+i],bestvs[bestthreadnum*numlayer+i]);
}
free(preVp);
free(nextVp);
free(prebestVp);
free(bestVp);
free(preVs);
free(nextVs);
free(prebestVs);
free(bestVs);
free(pretp);
free(nexttp);
free(besttp);
free(prets);
free(nextts);
free(bestts);
free(ob);
free(tem);
free(bestvp);
free(bestvs);
return 0;

图上是我的程序,不知为什么只要改动num增加线程后,效率大大降低,麻烦各位大神帮忙指点一下....
...全文
1007 回复 打赏 收藏 转发到动态 举报
写回复
用AI写文章
回复
切换为时间正序
请发表友善的回复…
发表回复

566

社区成员

发帖
与我相关
我的任务
社区描述
英特尔® 边缘计算,聚焦于边缘计算、AI、IoT等领域,为开发者提供丰富的开发资源、创新技术、解决方案与行业活动。
社区管理员
  • 英特尔技术社区
  • shere_lin
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧