我在双核的机子上使用sections想实现这么一个功能,两个线程并行执行void Change_values(int *Input,int *Output,int threadId)
之后 将执行结果进行交换,进行第二次迭代,源代码如下:
#include "stdio.h"
#include "omp.h"
#define degree 30
void Change_values(int *Input,int *Output,int threadId)
{
int i;
for(i=0;i<degree;i++)
Output[i]=threadId+Input[i];
}
int main()
{
int input1[degree]={0},input2[degree]={0};
int output1[degree],output2[degree];
int threadId=0;
int i,j;
double start,end;
start=omp_get_wtime();
for(j=0;j<2;j++)
{
printf_s("\nj:%d",j);
#pragma omp parallel sections private(threadId)
{
#pragma omp section
{
threadId=omp_get_thread_num();
printf_s("\nthreadId:%d",threadId);
Change_values(input1,output1,threadId);
}
#pragma omp section
{
threadId=omp_get_thread_num();
printf_s("\nthreadId:%d",threadId);
Change_values(input2,output2,threadId);
}
}//end of sections , an implied barrier?
#pragma omp barrier
for(i=0;i<degree;i++)
{
input1[i]=output2[i];
input2[i]=output1[i];
}
printf_s("\nInput1[]:");
for(i=0;i<degree;i++)
printf_s("%d",input1[i]);
printf_s("\nInput2[]:");
for(i=0;i<degree;i++)
printf_s("%d",input2[i]);
}
end=omp_get_wtime();
printf_s("\nneeds time:%f",end-start);
printf_s("\nOoutput1[]:");
for(i=0;i<degree;i++)
printf_s("%d",output1[i]);
printf_s("\nOoutput2[]:");
for(i=0;i<degree;i++)
printf_s("%d",output2[i]);
return 1;
}
可是,用 Intel thread profiler分析,却是串行执行,为什么呢?