帮忙解决以下负载平衡问题

xcm83 2008-03-29 06:13:59
程序如下
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <windows.h>

#define NPARTS 1000
#define NITER 21
#define DIMS 3
#define NUM_THREADS 2

DWORD WINAPI tPoolComputePot(LPVOID);
void computePot(int);
void initPositions(void);
void updatePositions(void);

double r[DIMS][NPARTS];
int bounds[2][NUM_THREADS];
double pot;
double gPot[NUM_THREADS];
int done = 0;
HANDLE bSignal[NUM_THREADS]; // signal to begin computation
HANDLE eSignal[NUM_THREADS]; // signal that computation is ended

int main()
{
int i, j;
HANDLE tHandle[NUM_THREADS];
int tNum[NUM_THREADS];

for (i=0; i<NUM_THREADS; i++)
{
bounds[0][i] = i * (NPARTS/NUM_THREADS);
bounds[1][i] = (i+1) * (NPARTS/NUM_THREADS);
bSignal[i] = CreateEvent(NULL, FALSE, FALSE, NULL); // auto-reset
eSignal[i] = CreateEvent(NULL, FALSE, FALSE, NULL); // auto-reset
}
bounds[1][NUM_THREADS-1] = NPARTS;

for (j=0; j<NUM_THREADS; j++)
{
tNum[j] = j;
tHandle[j] = CreateThread(NULL, 0, tPoolComputePot, &tNum[j], 0, NULL);
}

initPositions();
updatePositions();

for( i=0; i<NITER; i++ )
{
WaitForMultipleObjects(NUM_THREADS, eSignal, TRUE, INFINITE);

pot = 0.0;
for (j=0; j<NUM_THREADS; j++)
{
pot += gPot[j];
}
if (i%10 == 0) printf("%5d: Potential: %10.3f\n", i, pot);
updatePositions();
}
done = 1;
for (j=0; j<NUM_THREADS; j++)
SetEvent(bSignal[i]);
}

DWORD WINAPI tPoolComputePot(LPVOID pArg)
{
int tid = *(int *)pArg;
while (!done)
{
WaitForSingleObject(bSignal[tid], INFINITE);
computePot(tid);
SetEvent(eSignal[tid]);
}
return 0;
}

void initPositions()
{
int i, j;
for( i=0; i<DIMS; i++ )
for( j=0; j<NPARTS; j++ )
r[i][j] = 0.5 + ( (double) rand() / (double) RAND_MAX );
}

void updatePositions()
{
int i, j;
for( i=0; i<DIMS; i++ )
for( j=0; j<NPARTS; j++ )
r[i][j] -= 0.5 + ( (double) rand() / (double) RAND_MAX );
for (j=0; j<NUM_THREADS; j++)
SetEvent(bSignal[j]);
}

void computePot(int tid)
{
int i, j, start, end;
double lPot = 0.0;
double distx, disty, distz, dist;

start = bounds[0][tid];
end = bounds[1][tid];

for( i=start; i<end; i++ )
{
for( j=0; j<i-1; j++ )
{
distx = pow( (r[0][j] - r[0][i]), 2 );
disty = pow( (r[1][j] - r[1][i]), 2 );
distz = pow( (r[2][j] - r[2][i]), 2 );
dist = sqrt( distx + disty + distz );
lPot += 1.0 / dist;
}
}
gPot[tid] = lPot;
}
用Intel线程档案器测试程序,显示出两个用户线程的负载不平衡,请大家看看应该怎么修改才能比较平衡啊
...全文
157 2 打赏 收藏 转发到动态 举报
AI 作业
写回复
用AI写文章
2 条回复
切换为时间正序
请发表友善的回复…
发表回复
lee_beckham 2008-04-14
  • 打赏
  • 举报
回复
代码修改前由于主线程进行初始化时( initPositions()和updatePositions() )两个子线程都是在等待,浪费了时间,同时子线程函数体内机算分配不均匀,第1个线程间计算量明显低于第2个线程造成负载不均衡。
并且由于主线程与子线程间通信开消非常大,可以先让子线程把结果全算出来,存到一个数组里,然后一并传给主线程进行打印输出。这样减少了很多通信开销。
下面为修改后的代码,代码后为Intel® TProfiler测试结果,负载比较均衡,执行时间由53秒减少为36秒。再次修改后执行时间变为4.5秒,并且结果能保证正确。
优化思想为把 initPositions()和updatePositions()两个函数分配给子线程来实现,并且子线程中computePot函数内进行负载重新分配,利用stride使得每个线程执行的任务达到均衡。先让子线程把结果全算出来,存到一个数组里,然后一并传给主线程进行打印输出。


代码:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <windows.h>

#define NPARTS 1000
#define NITER 301
#define DIMS 3
#define NUM_THREADS 2
//GZ
CRITICAL_SECTION cs;
//int rand( void );
DWORD WINAPI tPoolComputePot(LPVOID);
void computePot(int);
double tmp[301];
double r[DIMS][NPARTS];
int bounds[2][NUM_THREADS];
double pot;
double gPot[NUM_THREADS];
int done = 0;
HANDLE bSignal[NUM_THREADS]; // signal to begin computation
HANDLE eSignal[NUM_THREADS]; // signal that computation is ended

int main() {
int i, j;
HANDLE tHandle[NUM_THREADS];
int tNum[NUM_THREADS];
InitializeCriticalSection( &cs );
for (i=0; i<NUM_THREADS; i ++) {
bounds[0][i] = i ;//改变分配范围
bounds[1][i] = NPARTS;
bSignal[i] = CreateEvent(NULL, FALSE, FALSE, NULL); // auto-reset
eSignal[i] = CreateEvent(NULL, FALSE, FALSE, NULL); // auto-reset
}
bounds[1][NUM_THREADS-1] = NPARTS;

for (j=0; j<NUM_THREADS; j++) {
tNum[j] = j;
tHandle[j] = CreateThread(NULL, 0, tPoolComputePot, &tNum[j], 0, NULL);
}
WaitForMultipleObjects(NUM_THREADS, eSignal, TRUE, INFINITE);
for( i=0; i<NITER; i++ ) {
if (i%10 == 0) printf("%5d: Potential: %10.3f\n", i, tmp[i]);
}
done = 1;
for (j=0; j<NUM_THREADS; j++)
SetEvent(bSignal[i]);

WaitForMultipleObjects(NUM_THREADS, tHandle, TRUE, INFINITE);
}


DWORD WINAPI tPoolComputePot(LPVOID pArg) {
//GZ
int i, j,k, start=0, end;
int tid = *(int *)pArg;
long stride = 2;
if(tid == 0) start += 1;
end =NPARTS;
for( i=0; i<DIMS; i++ )
for( j=start; j<end; j+=stride )
r[i][j] = 0.5 + ( (double) rand() / (double) RAND_MAX );

for( i=0; i<DIMS; i++ )
for( j=start; j<end; j+=stride )
r[i][j] -= 0.5 + ( (double) rand() / (double) RAND_MAX );
SetEvent(bSignal[tid]);
//GZ
for(k=0; k<NITER; k++){
if(tid == 0) //线程同步
WaitForSingleObject(bSignal[tid+1], INFINITE);
else
WaitForSingleObject(bSignal[tid-1], INFINITE);
computePot(tid);
EnterCriticalSection( &cs); //临界区
tmp[k] += gPot[tid];
LeaveCriticalSection( &cs );
for( i=0; i<DIMS; i++ )
for( j=start; j<end; j+=stride )
r[i][j] -= 0.5 + ( (double) rand() / (double) RAND_MAX );
SetEvent(bSignal[tid]);
}
SetEvent(eSignal[tid]);
return 0;
}


void computePot(int tid) {
int i, j, start, end;
double lPot = 0.0;
double distx, disty, distz, dist;
long stride = 2;
start = bounds[0][tid];
end = bounds[1][tid];
if(tid == 0) start += stride;
for( i=start; i<end; i+=stride ) {
for( j=0; j<i-1; j++ ) {
distx = pow( (r[0][j] - r[0][i]), 2 );
disty = pow( (r[1][j] - r[1][i]), 2 );
distz = pow( (r[2][j] - r[2][i]), 2 );
dist = sqrt( distx + disty + distz );
lPot += 1.0 / dist;
}
}
gPot[tid] = lPot;
}
intel_iclifort 2008-04-02
  • 打赏
  • 举报
回复
只有2个线程的情况, 无法说明负载均衡是否一定有问题, 将 NUM_THREADS 设置成 4, 6 看看情况如何; 或者增加 NPARTS

你的硬件环境也没说明, 单/多核, 有否Hyper-Threading,

另外, 这个例子似乎用OpenMP的实现更简单些, 呵呵

567

社区成员

发帖
与我相关
我的任务
社区描述
英特尔® 边缘计算,聚焦于边缘计算、AI、IoT等领域,为开发者提供丰富的开发资源、创新技术、解决方案与行业活动。
社区管理员
  • 英特尔技术社区
  • shere_lin
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧