Intel优化大赛参赛 第一题 皇后问题 单核测试快于Jeff Somers的代码

huanyun 2007-09-19 11:05:13
/*////////////////////////////////////////
// 版权所有(C) 2000-2008 邓辉 //
// Email: denghui0815@hotmail.com //
// 文件名: Queen.cpp //
// 说明: Intel优化大赛参赛作品 //
////////////////////////////////////////*/
#include <mathimf.h>
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
#include <tmmintrin.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <omp.h>

#define XTEST_TIME
//#define XOUT_RET

#define XMAXQUEEN 32
#define XMAXTHREAD 32

unsigned int g_nAll;
unsigned int g_nQueen;

#ifdef XTEST_TIME
__int64 XGetTickCount()
{
static __int64 nFrequency = 0;
__int64 nTick = 0;
if(nFrequency == 0) QueryPerformanceFrequency((LARGE_INTEGER *)&nFrequency);
QueryPerformanceCounter((LARGE_INTEGER *)&nTick);
return nTick*1000000/nFrequency;
}
#endif

#ifdef XOUT_RET
unsigned char g_pPosIndex[0x10000];
char g_szOutStr[XMAXQUEEN][4];
char g_szOutBuf[XMAXTHREAD][10*1024];
int g_nOutBuf[XMAXTHREAD] = {0};
FILE* g_pFile[XMAXTHREAD];
void XOutRet(int nThreadID, unsigned int* pPos)
{
if(pPos == NULL)
{
if(g_nOutBuf[nThreadID])
{
fwrite(g_szOutBuf[nThreadID], 1, g_nOutBuf[nThreadID], g_pFile[nThreadID]);
g_nOutBuf[nThreadID] = 0;
}
}
else
{
int nQueen = g_nQueen,nSize = 0;
unsigned char nIndex;
char* pOut1 = g_szOutBuf[nThreadID] + g_nOutBuf[nThreadID];
char* pOut2 = g_szOutBuf[nThreadID] + g_nOutBuf[nThreadID] + nQueen*3 + 1;
if(nQueen < 17)
{
for(int i = 0; i < nQueen; ++i,pOut1+=3,pOut2+=3)
{
nIndex = g_pPosIndex[pPos[i]];
pOut1[0] = g_szOutStr[nIndex][0];
pOut1[1] = g_szOutStr[nIndex][1];
pOut1[2] = g_szOutStr[nIndex][2];
pOut2[0] = g_szOutStr[nQueen - nIndex + 1][0];
pOut2[1] = g_szOutStr[nQueen - nIndex + 1][1];
pOut2[2] = g_szOutStr[nQueen - nIndex + 1][2];
}
}
else
{
for(int i = 0; i < nQueen; ++i,pOut1+=3,pOut2+=3)
{
nIndex = g_pPosIndex[pPos[i] & 0xFFFF];
if(nIndex)
{
nIndex = g_pPosIndex[pPos[i]];
pOut1[0] = g_szOutStr[nIndex][0];
pOut1[1] = g_szOutStr[nIndex][1];
pOut1[2] = g_szOutStr[nIndex][2];
pOut2[0] = g_szOutStr[nQueen - nIndex + 1][0];
pOut2[1] = g_szOutStr[nQueen - nIndex + 1][1];
pOut2[2] = g_szOutStr[nQueen - nIndex + 1][2];
}
else
{
nIndex = g_pPosIndex[pPos[i] >> 16] + 16;
pOut1[0] = g_szOutStr[nIndex][0];
pOut1[1] = g_szOutStr[nIndex][1];
pOut1[2] = g_szOutStr[nIndex][2];
pOut2[0] = g_szOutStr[nQueen - nIndex + 1][0];
pOut2[1] = g_szOutStr[nQueen - nIndex + 1][1];
pOut2[2] = g_szOutStr[nQueen - nIndex + 1][2];
}

}
}
*pOut1 = '\n';
*pOut2 = '\n';
g_nOutBuf[nThreadID] += nQueen*6 + 2;
if(g_nOutBuf[nThreadID] > 8*1024)
{
fwrite(g_szOutBuf[nThreadID], 1, g_nOutBuf[nThreadID], g_pFile[nThreadID]);
g_nOutBuf[nThreadID] = 0;
}
}
}
#endif



typedef struct tagXTaskParam
{
unsigned int nCur,nL,nR;
#ifdef XOUT_RET
unsigned int pPos[XMAXQUEEN];
#endif
tagXTaskParam* pNext;
}XTASKPARAM;

void XInitTask(XTASKPARAM* pTask, int nThread, int nQueen)
{
g_nAll = (1 << nQueen) - 1;
g_nQueen = nQueen;

unsigned int nMask0 = (1 << (nQueen /2)) - 1,nMask1,nCur,nL,nR;
unsigned int nTask = 0,nIndex;
unsigned int nPos0 = 0,nPos1 = 0;
XTASKPARAM* pTmp,*pCur[XMAXTHREAD];
for(int i = 0; i < nThread; ++i) pCur[i] = pTask+i;

while(nMask0)
{
nCur = nMask0 & -nMask0;
nMask0 -= nCur;
nL = nCur << 1;
nR = nCur >> 1;
nMask1 = g_nAll & ~(nCur | nL | nR);
while(nMask1)
{
unsigned int p = nMask1 & -nMask1;
nMask1 -= p;
nIndex = (nTask++) % nThread;
pCur[nIndex]->nCur = nCur + p;
pCur[nIndex]->nL = (nL + p) << 1;
pCur[nIndex]->nR = (nR + p) >> 1;
#ifdef XOUT_RET
pCur[nIndex]->pPos[0] = nCur;
pCur[nIndex]->pPos[1] = p;
#endif
pCur[nIndex]->pNext = new XTASKPARAM;
pCur[nIndex] = pCur[nIndex]->pNext;
}
}

if(nQueen & 1)
{
nCur = (1 << (nQueen/2));
nL = nCur << 1;
nR = nCur >> 1;
nMask1 = ((1 << (nQueen /2)) - 1) & ~(nCur | nL | nR);
while(nMask1)
{
unsigned int p = nMask1 & -nMask1;
nMask1 -= p;
nIndex = (nTask++) % nThread;
pCur[nIndex]->nCur = nCur + p;
pCur[nIndex]->nL = (nL + p) << 1;
pCur[nIndex]->nR = (nR + p) >> 1;
#ifdef XOUT_RET
pCur[nIndex]->pPos[0] = nCur;
pCur[nIndex]->pPos[1] = p;
#endif
pCur[nIndex]->pNext = new XTASKPARAM;
pCur[nIndex] = pCur[nIndex]->pNext;
}
}

for(int i = 0; i < nThread; ++i) pCur[i]->pNext = NULL;
}


unsigned int XRunTask(int nThreadID, XTASKPARAM* pTask)
{
unsigned int nCount = 0;
unsigned int nStack[XMAXQUEEN * 4] = {0};
unsigned int *pStack = nStack + 4;
#ifdef XOUT_RET
unsigned int *pPos = pTask->pPos + 3;
#endif
nStack[1] = pTask->nCur;
nStack[2] = pTask->nL;
nStack[3] = pTask->nR;
nStack[0] = g_nAll & ~(nStack[1] | nStack[2] | nStack[3]);
while(pStack > nStack)
{
#ifdef XOUT_RET
--pPos;
#endif
pStack -= 4;
while(pStack[0])
{
unsigned int p = pStack[0] & -pStack[0];
pStack[0] -= p;
#ifdef XOUT_RET
*pPos = p;
#endif
pStack[5] = pStack[1] + p;
if(pStack[5] != g_nAll)
{
pStack[6] = (pStack[2] + p) << 1;
pStack[7] = (pStack[3] + p) >> 1;
pStack[4] = g_nAll & ~(pStack[5] | pStack[6] | pStack[7]);
pStack += 4;
#ifdef XOUT_RET
++pPos;
#endif
}
else
{
#ifdef XOUT_RET
XOutRet(nThreadID, pTask->pPos);
#endif
++nCount;
}
}
}
return nCount;
}

unsigned int XRunTaskThread(int nThreadID, XTASKPARAM* pTask)
{
SetThreadAffinityMask(GetCurrentThread(), 1 << nThreadID);
unsigned int nCount = 0;

while(pTask)
{
nCount += XRunTask(nThreadID, pTask);
pTask = pTask->pNext;
}

return nCount;
}

int main(int argc, char* argv[])
{
int i;

SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);

#ifdef XTEST_TIME
__int64 nStart = XGetTickCount();
#endif

XTASKPARAM xTask[XMAXTHREAD]={0};
unsigned int nCount = 0,nQueen = 15,nThread = 2, nCnt[XMAXTHREAD] = {0};

#ifdef XOUT_RET
for(i = 0; i < nQueen; ++i)
{
g_pPosIndex[1 << i] = i + 1;
sprintf(g_szOutStr[i+1], "%02d ", i + 1);
}
for(i = 0; i < nThread; ++i)
{
char szFile[MAX_PATH];
sprintf(szFile, "ret%d.txt", i);
g_pFile[i] = fopen(szFile, "w");
}
#endif

XInitTask(xTask, nThread, nQueen);

#pragma omp parallel for
for(i = 0; i < nThread; ++i)
{
nCnt[i] = XRunTaskThread(i, xTask + i);
#ifdef XOUT_RET
XOutRet(i, NULL);
#endif
}

for(i = 0; i < nThread; ++i) nCount += nCnt[i];

#ifdef XOUT_RET
for(i = 0; i < nThread; ++i) fclose(g_pFile[i]);
#endif

#ifdef XTEST_TIME
printf("%d皇后共有%d种排列 %d 微妙\n", nQueen, nCount*2, (int)(XGetTickCount() - nStart));
#endif

return 0;
}
...全文
198 回复 打赏 收藏 转发到动态 举报
AI 作业
写回复
用AI写文章
回复
切换为时间正序
请发表友善的回复…
发表回复

567

社区成员

发帖
与我相关
我的任务
社区描述
英特尔® 边缘计算,聚焦于边缘计算、AI、IoT等领域,为开发者提供丰富的开发资源、创新技术、解决方案与行业活动。
社区管理员
  • 英特尔技术社区
  • shere_lin
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧