求一个快速分解bit位的方法

ai_3621 2008-02-17 01:23:13

已知32位中有20个1,12个0。

要求: 快速找处所有的1位索引数值。

void find(unsigned int code, int index[20])
{

}
...全文
1059 118 打赏 收藏 转发到动态 举报
写回复
用AI写文章
118 条回复
切换为时间正序
请发表友善的回复…
发表回复
ai_3621 2008-03-16
  • 打赏
  • 举报
回复
根据116楼要求,此处结贴。
非常感谢各位来此讨论
shines77 2008-03-16
  • 打赏
  • 举报
回复
今天才看到,写了个查表法,比yaos上面的查表法科学一些,详看新帖,不知道算不算快的,但至少是一种方法
无心人 2008-03-15
  • 打赏
  • 举报
回复
http://topic.csdn.net/u/20080311/14/d7a7e07c-f4db-4a5f-bc07-83cd7229582c.html?seed=1110609872

已转新帖
那里有汇编代码的11个版本的算法

iceheart 2008-03-14
  • 打赏
  • 举报
回复
贴个查表的方法
struct _NODE_VAL {
char data[8];
int count;
};
_NODE_VAL values[256] = {
{0,0,0,0,0,0,0,0,0},
{0,0,0,0,0,0,0,0,1},
{1,0,0,0,0,0,0,0,1},
{0,1,0,0,0,0,0,0,2},
{2,0,0,0,0,0,0,0,1},
{0,2,0,0,0,0,0,0,2},
{1,2,0,0,0,0,0,0,2},
{0,1,2,0,0,0,0,0,3},
{3,0,0,0,0,0,0,0,1},
{0,3,0,0,0,0,0,0,2},
{1,3,0,0,0,0,0,0,2},
{0,1,3,0,0,0,0,0,3},
{2,3,0,0,0,0,0,0,2},
{0,2,3,0,0,0,0,0,3},
{1,2,3,0,0,0,0,0,3},
{0,1,2,3,0,0,0,0,4},
{4,0,0,0,0,0,0,0,1},
{0,4,0,0,0,0,0,0,2},
{1,4,0,0,0,0,0,0,2},
{0,1,4,0,0,0,0,0,3},
{2,4,0,0,0,0,0,0,2},
{0,2,4,0,0,0,0,0,3},
{1,2,4,0,0,0,0,0,3},
{0,1,2,4,0,0,0,0,4},
{3,4,0,0,0,0,0,0,2},
{0,3,4,0,0,0,0,0,3},
{1,3,4,0,0,0,0,0,3},
{0,1,3,4,0,0,0,0,4},
{2,3,4,0,0,0,0,0,3},
{0,2,3,4,0,0,0,0,4},
{1,2,3,4,0,0,0,0,4},
{0,1,2,3,4,0,0,0,5},
{5,0,0,0,0,0,0,0,1},
{0,5,0,0,0,0,0,0,2},
{1,5,0,0,0,0,0,0,2},
{0,1,5,0,0,0,0,0,3},
{2,5,0,0,0,0,0,0,2},
{0,2,5,0,0,0,0,0,3},
{1,2,5,0,0,0,0,0,3},
{0,1,2,5,0,0,0,0,4},
{3,5,0,0,0,0,0,0,2},
{0,3,5,0,0,0,0,0,3},
{1,3,5,0,0,0,0,0,3},
{0,1,3,5,0,0,0,0,4},
{2,3,5,0,0,0,0,0,3},
{0,2,3,5,0,0,0,0,4},
{1,2,3,5,0,0,0,0,4},
{0,1,2,3,5,0,0,0,5},
{4,5,0,0,0,0,0,0,2},
{0,4,5,0,0,0,0,0,3},
{1,4,5,0,0,0,0,0,3},
{0,1,4,5,0,0,0,0,4},
{2,4,5,0,0,0,0,0,3},
{0,2,4,5,0,0,0,0,4},
{1,2,4,5,0,0,0,0,4},
{0,1,2,4,5,0,0,0,5},
{3,4,5,0,0,0,0,0,3},
{0,3,4,5,0,0,0,0,4},
{1,3,4,5,0,0,0,0,4},
{0,1,3,4,5,0,0,0,5},
{2,3,4,5,0,0,0,0,4},
{0,2,3,4,5,0,0,0,5},
{1,2,3,4,5,0,0,0,5},
{0,1,2,3,4,5,0,0,6},
{6,0,0,0,0,0,0,0,1},
{0,6,0,0,0,0,0,0,2},
{1,6,0,0,0,0,0,0,2},
{0,1,6,0,0,0,0,0,3},
{2,6,0,0,0,0,0,0,2},
{0,2,6,0,0,0,0,0,3},
{1,2,6,0,0,0,0,0,3},
{0,1,2,6,0,0,0,0,4},
{3,6,0,0,0,0,0,0,2},
{0,3,6,0,0,0,0,0,3},
{1,3,6,0,0,0,0,0,3},
{0,1,3,6,0,0,0,0,4},
{2,3,6,0,0,0,0,0,3},
{0,2,3,6,0,0,0,0,4},
{1,2,3,6,0,0,0,0,4},
{0,1,2,3,6,0,0,0,5},
{4,6,0,0,0,0,0,0,2},
{0,4,6,0,0,0,0,0,3},
{1,4,6,0,0,0,0,0,3},
{0,1,4,6,0,0,0,0,4},
{2,4,6,0,0,0,0,0,3},
{0,2,4,6,0,0,0,0,4},
{1,2,4,6,0,0,0,0,4},
{0,1,2,4,6,0,0,0,5},
{3,4,6,0,0,0,0,0,3},
{0,3,4,6,0,0,0,0,4},
{1,3,4,6,0,0,0,0,4},
{0,1,3,4,6,0,0,0,5},
{2,3,4,6,0,0,0,0,4},
{0,2,3,4,6,0,0,0,5},
{1,2,3,4,6,0,0,0,5},
{0,1,2,3,4,6,0,0,6},
{5,6,0,0,0,0,0,0,2},
{0,5,6,0,0,0,0,0,3},
{1,5,6,0,0,0,0,0,3},
{0,1,5,6,0,0,0,0,4},
{2,5,6,0,0,0,0,0,3},
{0,2,5,6,0,0,0,0,4},
{1,2,5,6,0,0,0,0,4},
{0,1,2,5,6,0,0,0,5},
{3,5,6,0,0,0,0,0,3},
{0,3,5,6,0,0,0,0,4},
{1,3,5,6,0,0,0,0,4},
{0,1,3,5,6,0,0,0,5},
{2,3,5,6,0,0,0,0,4},
{0,2,3,5,6,0,0,0,5},
{1,2,3,5,6,0,0,0,5},
{0,1,2,3,5,6,0,0,6},
{4,5,6,0,0,0,0,0,3},
{0,4,5,6,0,0,0,0,4},
{1,4,5,6,0,0,0,0,4},
{0,1,4,5,6,0,0,0,5},
{2,4,5,6,0,0,0,0,4},
{0,2,4,5,6,0,0,0,5},
{1,2,4,5,6,0,0,0,5},
{0,1,2,4,5,6,0,0,6},
{3,4,5,6,0,0,0,0,4},
{0,3,4,5,6,0,0,0,5},
{1,3,4,5,6,0,0,0,5},
{0,1,3,4,5,6,0,0,6},
{2,3,4,5,6,0,0,0,5},
{0,2,3,4,5,6,0,0,6},
{1,2,3,4,5,6,0,0,6},
{0,1,2,3,4,5,6,0,7},
{7,0,0,0,0,0,0,0,1},
{0,7,0,0,0,0,0,0,2},
{1,7,0,0,0,0,0,0,2},
{0,1,7,0,0,0,0,0,3},
{2,7,0,0,0,0,0,0,2},
{0,2,7,0,0,0,0,0,3},
{1,2,7,0,0,0,0,0,3},
{0,1,2,7,0,0,0,0,4},
{3,7,0,0,0,0,0,0,2},
{0,3,7,0,0,0,0,0,3},
{1,3,7,0,0,0,0,0,3},
{0,1,3,7,0,0,0,0,4},
{2,3,7,0,0,0,0,0,3},
{0,2,3,7,0,0,0,0,4},
{1,2,3,7,0,0,0,0,4},
{0,1,2,3,7,0,0,0,5},
{4,7,0,0,0,0,0,0,2},
{0,4,7,0,0,0,0,0,3},
{1,4,7,0,0,0,0,0,3},
{0,1,4,7,0,0,0,0,4},
{2,4,7,0,0,0,0,0,3},
{0,2,4,7,0,0,0,0,4},
{1,2,4,7,0,0,0,0,4},
{0,1,2,4,7,0,0,0,5},
{3,4,7,0,0,0,0,0,3},
{0,3,4,7,0,0,0,0,4},
{1,3,4,7,0,0,0,0,4},
{0,1,3,4,7,0,0,0,5},
{2,3,4,7,0,0,0,0,4},
{0,2,3,4,7,0,0,0,5},
{1,2,3,4,7,0,0,0,5},
{0,1,2,3,4,7,0,0,6},
{5,7,0,0,0,0,0,0,2},
{0,5,7,0,0,0,0,0,3},
{1,5,7,0,0,0,0,0,3},
{0,1,5,7,0,0,0,0,4},
{2,5,7,0,0,0,0,0,3},
{0,2,5,7,0,0,0,0,4},
{1,2,5,7,0,0,0,0,4},
{0,1,2,5,7,0,0,0,5},
{3,5,7,0,0,0,0,0,3},
{0,3,5,7,0,0,0,0,4},
{1,3,5,7,0,0,0,0,4},
{0,1,3,5,7,0,0,0,5},
{2,3,5,7,0,0,0,0,4},
{0,2,3,5,7,0,0,0,5},
{1,2,3,5,7,0,0,0,5},
{0,1,2,3,5,7,0,0,6},
{4,5,7,0,0,0,0,0,3},
{0,4,5,7,0,0,0,0,4},
{1,4,5,7,0,0,0,0,4},
{0,1,4,5,7,0,0,0,5},
{2,4,5,7,0,0,0,0,4},
{0,2,4,5,7,0,0,0,5},
{1,2,4,5,7,0,0,0,5},
{0,1,2,4,5,7,0,0,6},
{3,4,5,7,0,0,0,0,4},
{0,3,4,5,7,0,0,0,5},
{1,3,4,5,7,0,0,0,5},
{0,1,3,4,5,7,0,0,6},
{2,3,4,5,7,0,0,0,5},
{0,2,3,4,5,7,0,0,6},
{1,2,3,4,5,7,0,0,6},
{0,1,2,3,4,5,7,0,7},
{6,7,0,0,0,0,0,0,2},
{0,6,7,0,0,0,0,0,3},
{1,6,7,0,0,0,0,0,3},
{0,1,6,7,0,0,0,0,4},
{2,6,7,0,0,0,0,0,3},
{0,2,6,7,0,0,0,0,4},
{1,2,6,7,0,0,0,0,4},
{0,1,2,6,7,0,0,0,5},
{3,6,7,0,0,0,0,0,3},
{0,3,6,7,0,0,0,0,4},
{1,3,6,7,0,0,0,0,4},
{0,1,3,6,7,0,0,0,5},
{2,3,6,7,0,0,0,0,4},
{0,2,3,6,7,0,0,0,5},
{1,2,3,6,7,0,0,0,5},
{0,1,2,3,6,7,0,0,6},
{4,6,7,0,0,0,0,0,3},
{0,4,6,7,0,0,0,0,4},
{1,4,6,7,0,0,0,0,4},
{0,1,4,6,7,0,0,0,5},
{2,4,6,7,0,0,0,0,4},
{0,2,4,6,7,0,0,0,5},
{1,2,4,6,7,0,0,0,5},
{0,1,2,4,6,7,0,0,6},
{3,4,6,7,0,0,0,0,4},
{0,3,4,6,7,0,0,0,5},
{1,3,4,6,7,0,0,0,5},
{0,1,3,4,6,7,0,0,6},
{2,3,4,6,7,0,0,0,5},
{0,2,3,4,6,7,0,0,6},
{1,2,3,4,6,7,0,0,6},
{0,1,2,3,4,6,7,0,7},
{5,6,7,0,0,0,0,0,3},
{0,5,6,7,0,0,0,0,4},
{1,5,6,7,0,0,0,0,4},
{0,1,5,6,7,0,0,0,5},
{2,5,6,7,0,0,0,0,4},
{0,2,5,6,7,0,0,0,5},
{1,2,5,6,7,0,0,0,5},
{0,1,2,5,6,7,0,0,6},
{3,5,6,7,0,0,0,0,4},
{0,3,5,6,7,0,0,0,5},
{1,3,5,6,7,0,0,0,5},
{0,1,3,5,6,7,0,0,6},
{2,3,5,6,7,0,0,0,5},
{0,2,3,5,6,7,0,0,6},
{1,2,3,5,6,7,0,0,6},
{0,1,2,3,5,6,7,0,7},
{4,5,6,7,0,0,0,0,4},
{0,4,5,6,7,0,0,0,5},
{1,4,5,6,7,0,0,0,5},
{0,1,4,5,6,7,0,0,6},
{2,4,5,6,7,0,0,0,5},
{0,2,4,5,6,7,0,0,6},
{1,2,4,5,6,7,0,0,6},
{0,1,2,4,5,6,7,0,7},
{3,4,5,6,7,0,0,0,5},
{0,3,4,5,6,7,0,0,6},
{1,3,4,5,6,7,0,0,6},
{0,1,3,4,5,6,7,0,7},
{2,3,4,5,6,7,0,0,6},
{0,2,3,4,5,6,7,0,7},
{1,2,3,4,5,6,7,0,7},
{0,1,2,3,4,5,6,7,8 }

};

#include <windows.h>
int find_index(unsigned long code,int *index)
{

int j = 0;
_NODE_VAL &_the_val = values[code & 0xFF];
for (int k = 0; k < _the_val.count; k++ ) index[j++] = _the_val.data[k];
_the_val = values[(code << 8) & 0xFF];
for (int k = 0; k < _the_val.count; k++ ) index[j++] = _the_val.data[k] + 8;
_the_val = values[(code << 16) & 0xFF];
for (int k = 0; k < _the_val.count; k++ ) index[j++] = _the_val.data[k] + 16;
_the_val = values[(code << 24) & 0xFF];
for (int k = 0; k < _the_val.count; k++ ) index[j++] = _the_val.data[k] + 24;

return j;
}
yaos 2008-03-11
  • 打赏
  • 举报
回复
[code=C/C++]//修改GxQ代码得到find_yaos_2代码
//测试时间见后, 为测试最消耗时间的)xFFFFFFFF 100万次
//P4 XEON 2.0

#include <stdio.h>
#include <stdlib.h>
#include <windows.h>

DWORD find_by_yaos(DWORD a, DWORD index[33])
{
__asm
{
mov edx, a;
mov edi, dword ptr index;
xor eax, eax;
loop1:
bsr ecx, edx;
je exit1;
btr edx, ecx;
mov dword ptr [edi + 4*eax], ecx;
inc eax;
jmp loop1;
exit1:
mov dword ptr [edi + 4*eax], -1;
}
}

void find_by_yaos_1(DWORD a, DWORD index[33])
{
__asm
{
mov eax, a
mov edi, dword ptr [index]
loop1:
bsr ebx, eax
je exit1
btr eax, ebx
mov dword ptr [edi], ebx
add edi, 4
jmp loop1
exit1:
mov dword ptr [edi], -1
}
}

void find_by_yaos_2(DWORD a, DWORD index[33])
{
__asm
{
mov eax, a
mov edi, dword ptr [index]
mov ecx, 0
loop1:
shr eax, 1
jnc loop2
mov dword ptr [edi], ecx
add edi, 4
loop2:
inc ecx
cmp ecx, 32
jne loop1
mov dword ptr [edi], -1

}
}

DWORD find_by_GxQ(DWORD code, DWORD index[33])
{
__asm
{
mov ecx, code;

xor eax, eax;

shr ecx, 1;
mov dword ptr[index + 4*eax], 0;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 1;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 2;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 3;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 4;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 5;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 6;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 7;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 8;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 9;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 10;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 11;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 12;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 13;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 14;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 15;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 16;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 17;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 18;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 19;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 20;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 21;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 22;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 23;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 24;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 25;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 26;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 27;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 28;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 29;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 30;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 31;
adc eax, 0;

// 休止符
mov dword ptr[index + 4*eax], -1;
}
}

DWORD find_by_GxQ_bt(DWORD code, DWORD index[33])
{
__asm
{
mov ecx, code;

xor eax, eax;

bt ecx, 0;
mov dword ptr[index + 4*eax], 0;
adc eax, 0;

bt ecx, 1;
mov dword ptr[index + 4*eax], 1;
adc eax, 0;

bt ecx, 2;
mov dword ptr[index + 4*eax], 2;
adc eax, 0;

bt ecx, 3;
mov dword ptr[index + 4*eax], 3;
adc eax, 0;

bt ecx, 4;
mov dword ptr[index + 4*eax], 4;
adc eax, 0;

bt ecx, 5;
mov dword ptr[index + 4*eax], 5;
adc eax, 0;

bt ecx, 6;
mov dword ptr[index + 4*eax], 6;
adc eax, 0;

bt ecx, 7;
mov dword ptr[index + 4*eax], 7;
adc eax, 0;

bt ecx, 8;
mov dword ptr[index + 4*eax], 8;
adc eax, 0;

bt ecx, 9;
mov dword ptr[index + 4*eax], 9;
adc eax, 0;

bt ecx, 10;
mov dword ptr[index + 4*eax], 10;
adc eax, 0;

bt ecx, 11;
mov dword ptr[index + 4*eax], 11;
adc eax, 0;

bt ecx, 12;
mov dword ptr[index + 4*eax], 12;
adc eax, 0;

bt ecx, 13;
mov dword ptr[index + 4*eax], 13;
adc eax, 0;

bt ecx, 14;
mov dword ptr[index + 4*eax], 14;
adc eax, 0;

bt ecx, 15;
mov dword ptr[index + 4*eax], 15;
adc eax, 0;

bt ecx, 16;
mov dword ptr[index + 4*eax], 16;
adc eax, 0;

bt ecx, 17;
mov dword ptr[index + 4*eax], 17;
adc eax, 0;

bt ecx, 18;
mov dword ptr[index + 4*eax], 18;
adc eax, 0;

bt ecx, 19;
mov dword ptr[index + 4*eax], 19;
adc eax, 0;

bt ecx, 20;
mov dword ptr[index + 4*eax], 20;
adc eax, 0;

bt ecx, 21;
mov dword ptr[index + 4*eax], 21;
adc eax, 0;

bt ecx, 22;
mov dword ptr[index + 4*eax], 22;
adc eax, 0;

bt ecx, 23;
mov dword ptr[index + 4*eax], 23;
adc eax, 0;

bt ecx, 24;
mov dword ptr[index + 4*eax], 24;
adc eax, 0;

bt ecx, 25;
mov dword ptr[index + 4*eax], 25;
adc eax, 0;

bt ecx, 26;
mov dword ptr[index + 4*eax], 26;
adc eax, 0;

bt ecx, 27;
mov dword ptr[index + 4*eax], 27;
adc eax, 0;

bt ecx, 28;
mov dword ptr[index + 4*eax], 28;
adc eax, 0;

bt ecx, 29;
mov dword ptr[index + 4*eax], 29;
adc eax, 0;

bt ecx, 30;
mov dword ptr[index + 4*eax], 30;
adc eax, 0;

bt ecx, 31;
mov dword ptr[index + 4*eax], 31;
adc eax, 0;

// 休止符
mov dword ptr[index + 4*eax], -1;
}
}

yaos 2008-03-11
  • 打赏
  • 举报
回复
//按GxQ提示修改代码
void find_by_yaos_3(DWORD a, DWORD index[33])
{
__asm
{
mov eax, a
mov edi, dword ptr [index]
mov ecx, 32
loop1:
shl eax, 1
jnc loop2
mov dword ptr [edi], ecx
dec dword ptr [edi]
add edi, 4
loop2:
sub ecx, 1
jne loop1
mov dword ptr [edi], -1

}
}

共6个函数
说下情况
1 find_by_yaos find_by_yaos_1一类
2 find_by_yaos_2 find_by_yaos_3一类
3 find_by_GxQ一类
4 find_by_Gxq_bt一类

测试了0xFFFFFFFF 0xAAAAAAAA 0三个函数
1类时间基本相等
2类时间基本相等
3时间少于1, 但bit=1越少,1越优秀, 2比3快,差距10%左右
4证明bit操作比通常操作慢, 慢于3, 但快于1

经过测试, 目前find_by_yaos_3最快

ai_3621 2008-03-10
  • 打赏
  • 举报
回复
105代码编译不过。
lxjlan 2008-03-10
  • 打赏
  • 举报
回复
用移位和位运算的方式最好。。。。可以借鉴一个经典的,计算一个整数中1的个数的算法。。。
yaos 2008-03-10
  • 打赏
  • 举报
回复
既然已经认68算比较快的了

那能用我105代码做下测试否?

如果测试和我机器情况一致

那只能说我们都错了

:)

还是CPU内置指令速度快

不排除做随机测试的可能

或者干脆做全Bit测试
执行2^32次

:)
yaos 2008-03-10
  • 打赏
  • 举报
回复
终于找到问题了
似乎我程序测试的是1000万次
你们测试的是100万次


附详细代码
并请终止任何当前占CPU程序

#include <stdio.h>
#include <stdlib.h>
#include <windows.h>

DWORD bits[32];

void find(DWORD a, DWORD * b, DWORD *c)
{
__asm
{
mov eax, dword ptr [a]
xor esi, esi
loop1:
bsr edi, eax
je exit1
xor eax, dword ptr [bits + 4 * edi]
mov dword ptr [b + 4*esi], edi
inc esi
jmp loop1
exit1:
mov dword ptr [c], esi
}
}

DWORD find_by_GxQ(DWORD code, DWORD index[33])
{
__asm
{
mov ecx, code;

xor eax, eax;

shr ecx, 1;
mov dword ptr[index + 4*eax], 0;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 1;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 2;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 3;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 4;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 5;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 6;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 7;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 8;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 9;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 10;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 11;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 12;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 13;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 14;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 15;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 16;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 17;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 18;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 19;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 20;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 21;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 22;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 23;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 24;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 25;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 26;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 27;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 28;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 29;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 30;
adc eax, 0;

shr ecx, 1;
mov dword ptr[index + 4*eax], 31;
adc eax, 0;

// 休止符
mov dword ptr[index + 4*eax], -1;
}
}

int main(int argc, char * argv[])
{
::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
LARGE_INTEGER lTimestart, lTimeEnd, liFreq;

DWORD c, bitMask = 1, index[33];

for (DWORD i = 0; i < 31; i ++)
{
bits = bitMask;
bitMask <<= 1;
}

QueryPerformanceCounter(<imestart);
for(DWORD i = 0; i < 1000000; i++){
find(i, &index[0], &c);// 调用bit扫描函数
}
QueryPerformanceCounter(<imeEnd);
QueryPerformanceFrequency(&liFreq);

double dbMs = ((double) (lTimeEnd.QuadPart - lTimestart.QuadPart) * 1000) / (double)liFreq.QuadPart;
printf("yaos: %.3f\n", dbMs);

QueryPerformanceCounter(<imestart);
for(DWORD i = 0; i < 1000000; i++){
find_by_GxQ(i, index);// 调用bit扫描函数
}
QueryPerformanceCounter(<imeEnd);
QueryPerformanceFrequency(&liFreq);

dbMs = ((double) (lTimeEnd.QuadPart - lTimestart.QuadPart) * 1000) / (double)liFreq.QuadPart;
printf("GXQ: %.3f\n", dbMs);

system("Pause");

return 0;
}



[code=INIFile]
测试机器P4 XEON 2.0 X 2, 1G DDR
多次比较时间

E:\yaojialin\FindBits\Debug>findbits
yaos: 154.030
GXQ: 174.165
请按任意键继续. . .

E:\yaojialin\FindBits\Debug>findbits
yaos: 153.931
GXQ: 174.422
请按任意键继续. . .

E:\yaojialin\FindBits\Debug>findbits
yaos: 153.751
GXQ: 171.132
请按任意键继续. . .

E:\yaojialin\FindBits\Debug>findbits
yaos: 154.908
GXQ: 171.753
请按任意键继续. . .

E:\yaojialin\FindBits\Debug>findbits
yaos: 156.030
GXQ: 171.409
请按任意键继续. . .

E:\yaojialin\FindBits\Debug>findbits
yaos: 154.036
GXQ: 171.994
请按任意键继续. . .

E:\yaojialin\FindBits\Debug>findbits
yaos: 154.855
GXQ: 173.272
请按任意键继续. . .

[/code]
yaos 2008-03-10
  • 打赏
  • 举报
回复
总觉的存在问题, 问题在哪里呢?
哪里不对呢?

68楼执行100万次 假定700ms
一次0.7us = 700ns

假定是3G CPU
一个周期1/3000 000 000 = 0.3 ns

周期为 700 / 0.3 = 2000????

我的计算存在问题??????????????
yaos 2008-03-10
  • 打赏
  • 举报
回复
此函数有个极限的啊

考虑有32位

则总的CPU周期 > 32/2 + 8 = 24
考虑3G CPU, 运行10 000 000次
是 10 0000 000 / 3000 000 000 * 24 = 24 * 10 / 3000 = 30 / 1000 = 30毫秒

不会低于30豪秒的

似乎目前的成绩还差的远呢

下面是否用RDTSC指令测下执行周期????
无心人 2008-03-10
  • 打赏
  • 举报
回复
严正声明
无论如何之前在两个论坛的该问题代码全部声明作废
以下述代码为准
为统一测试方便,以GxQ函数样式声明


DWORD find_by_yaos(DWORD code, int index[33])
{
__asm
{
mov ecx, code;
mov edx, dword ptr index;
xor eax, eax
loop1:
bsr ebx, ecx
je exit1
btr ecx, ebx
mov dword ptr[edx + 4*eax], ebx
inc eax
jmp loop1
exit1:
mov dword ptr[edx + 4*eax], -1;
}
}
fire_woods 2008-03-10
  • 打赏
  • 举报
回复
我用的是正版的vc6.0,哈哈.
yaos 2008-03-10
  • 打赏
  • 举报
回复
讨论到现在
至于算法
我想已经清楚了
我的代码也不一定对和好

我只想知道为什么出现复杂代码反而快
你们代码条数远大于我的
按道理不会超我代码这么多时间的

刚也想了
是不是bsr每次都重新扫描造成的

在P4上谁知道bsr的详细周期公式啊?

毕竟BSR也不是没用处
====================
下列函数是用BSR/BSF比较快的
求等于大于k的最小的2^n
yaos 2008-03-10
  • 打赏
  • 举报
回复
VC2008Express编译通过
=======================
偏激一点
不要拿VC6说事
那是盗版的
除非你用的是正版
:)
ProtossBird 2008-03-08
  • 打赏
  • 举报
回复
参观
crystal_heart 2008-03-08
  • 打赏
  • 举报
回复
mark
ai_3621 2008-03-08
  • 打赏
  • 举报
回复
彻底绝望了,即使改为做到下面这样,还是不能有所提高,期待有高人解局:
下面代码中
D +=((NUM+BASE)>>OFFSET

对查找表 table[4] = {0,1,1,2)的优化,由 D += table(NUM >>INDEX)&0x3) 转换而来


#define BIT_CHECK_D(NUM, INDEX,BASE,OFFSET) {index[D] = INDEX; D +=((NUM+BASE)>>OFFSET)&0x3;}

#define BIT_CHECK_S(NUM, INDEX,BASE,OFFSET) { index[S] = INDEX; S +=((NUM+BASE)>>OFFSET)&0x3;}

DWORD find_3621(DWORD code, int index[32])
{
DWORD S=0,D=0;

BIT_CHECK_D(code,0,0X1,1);
BIT_CHECK_S(code,1,0X2,2);
BIT_CHECK_D(code,2,0X4,3);
BIT_CHECK_S(code,3,0X8,4);
BIT_CHECK_D(code,4,0X10,5);
BIT_CHECK_S(code,5,0X20,6);
BIT_CHECK_D(code,6,0X40,7);
BIT_CHECK_S(code,7,0X80,8);
BIT_CHECK_D(code,8,0X100,9);
BIT_CHECK_S(code,9,0X200,10);
BIT_CHECK_D(code,10,0X400,11);
BIT_CHECK_S(code,11,0X800,12);
BIT_CHECK_D(code,12,0X1000,13);
BIT_CHECK_S(code,13,0X2000,14);
BIT_CHECK_D(code,14,0X4000,15);
BIT_CHECK_S(code,15,0X8000,16);
BIT_CHECK_D(code,16,0X10000,17);
BIT_CHECK_S(code,17,0X20000,18);
BIT_CHECK_D(code,18,0X40000,19);
BIT_CHECK_S(code,19,0X80000,20);
BIT_CHECK_D(code,20,0X100000,21);
BIT_CHECK_S(code,21,0X200000,22);
BIT_CHECK_D(code,22,0X400000,23);
BIT_CHECK_S(code,23,0X800000,24);
BIT_CHECK_D(code,24,0X1000000,25);
BIT_CHECK_S(code,25,0X2000000,26);
BIT_CHECK_D(code,26,0X4000000,27);
BIT_CHECK_S(code,27,0X8000000,28);
BIT_CHECK_D(code,28,0X10000000,29);
BIT_CHECK_S(code,29,0X20000000,30);
BIT_CHECK_D(code,30,0X40000000,31);
BIT_CHECK_S(code,31,0X80000000,32);

return D;
}
euroman 2008-03-08
  • 打赏
  • 举报
回复
我建议用取模,然后shift right那么做32次就可以了
加载更多回复(98)

33,007

社区成员

发帖
与我相关
我的任务
社区描述
数据结构与算法相关内容讨论专区
社区管理员
  • 数据结构与算法社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧