并行程序效率太低,高手帮忙看看!
下面是一个搜索最大数的程序,但并行子函数运行时间大约是串行函数的两倍,大家帮忙看看原因是什么?
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <omp.h>
#define CACHE_LINE_LEN 64
#define data(x) data[x]
#define pnMax(x) pnMax[x]
int *data;
void randData(int count)
{
int i;
data = (int *)malloc(sizeof(int)*count);
srand((unsigned)time(NULL));
for(i=0; i<count; i++)
{
data(i) = (int)(rand()%1000);
}
}
int Parallel_SearchMaxData(int *data,int nLen)
{
int i,k;
int *pnMax;
int pos;
int nCore,nStep,begin,end;
int nMax;
nCore = omp_get_num_procs();
pnMax = (int *)malloc(nCore*(sizeof(int)+CACHE_LINE_LEN));
if(pnMax == NULL)
{
return -1;
}
nStep = nLen/nCore;
#pragma omp parallel for
for(k=0; k<nCore; k++)
{
begin = k*nStep;
end = (k+1)*nStep-1;
if(k == nCore-1)
{
end = nLen-1;
}
nMax = data(begin);
for(i=begin+1; i<=end; i++)
{
if(data(i) > nMax)
{
nMax = data(i);
}
}
pos = k*(sizeof(int)+CACHE_LINE_LEN)/sizeof(int);
pnMax[pos] = nMax;
}
nMax = pnMax(0);
for(i=1; i<nCore; i++)
{
pos = i*(sizeof(int)+CACHE_LINE_LEN)/sizeof(int);
if(pnMax(pos) > nMax)
{
nMax = pnMax(pos);
}
}
free(pnMax);
return nMax;
}
int SearchMaxData(int *data, int nLen)
{
int i;
int nMax;
nMax = 0;
for(i=1; i<nLen; i++)
{
if(data(i) > data(nMax))
{
nMax = i;
}
}
nMax = data(nMax);
return nMax;
}
void main(int argc,char **argv)
{
int count;
int rank1,rank2;
clock_t t1,t2,t3,t4;
printf("Input datasize:\n");
scanf("%d",&count);
randData(count);
t1 = clock();
rank1 = Parallel_SearchMaxData(data,count);
t2 = clock();
printf("Result of parallel searching:\n");
printf("The max data is:%d\n",rank1);
printf("Searching time is:%d\n",t2-t1);
printf("\n");
t3 = clock();
rank2 = SearchMaxData(data,count);
t4 = clock();
printf("Result of serial searching:\n");
printf("The max data is:%d\n",rank2);
printf("Searching time is:%d\n",t4-t3);
}
Input datasize:
100000000
Result of parallel searching:
The max data is:999
Searching time is:1016
Result of serial searching:
The max data is:999
Searching time is:719
请按任意键继续. . .