拆数程序的后续

耙子 2006-01-27 03:21:14
应 alphax 的要求,提出我的最终代码

原贴 http://community.csdn.net/Expert/topic/4515/4515392.xml?temp=.245907

我的代码今天早上发现了些错误,所以重新改写了

iNumberCount 是数的个数
ArrNumbers 是存放数的数组

function ChaishuPlus: integer;
type
tByteArray = record
Bytes: array[0..3] of byte;
end;
var
i, j, k, t, s: DWORD;
aTemp: array [0..3] of array [0..$ff] of integer;
begin
//预计算
ZeroMemory(@aTemp, SizeOf(aTemp));
for i:= 0 to 3 do begin // 每个DWORD 4个字节
for j:= 0 to $ff do begin // 每个字节 256种可能
for k:= 0 to 7 do begin // 每个字节 8 位
if (1 shl k) and j <> 0 then begin
aTemp[i][j]:= aTemp[i][j] + ArrNumbers[i*8+k+1];
end;
end;
end;
end;
Result:= 0;
// 防止数组为空
if iNumberCount=0 then
s:= 0
else
s:= (1 shl iNumberCount-1) -1;
// 开始拆解计算
for i:= 1 to s do begin
t:= aTemp[0][tByteArray(i).Bytes[0]] + aTemp[1][tByteArray(i).Bytes[1]] +
aTemp[2][tByteArray(i).Bytes[2]] + aTemp[3][tByteArray(i).Bytes[3]] - sum; // (1)
if t=0 then begin // 这个写法是为了优化(2)
Result:= i;
break;
end;
end;
end;

Result 的各个bit位,如果为1 就代表 相应的ArrNumbers 是结果。
这个就是我的最终代码。

基本上抄袭了alphax 原来的算法,
只是稍微扩展了一下成了32个数的(原来写的64个数的有点问题),他使用空间换时间的思想,空间其实也不大,才4*256*4=4K字节,
但是alphax说 把预计算从8bit变成16bit能快1/5,参看汇编代码之后对此表示一点怀疑。

下面提一下 (1)和(2)部分
...全文
310 25 打赏 收藏 转发到动态 举报
写回复
用AI写文章
25 条回复
切换为时间正序
请发表友善的回复…
发表回复
耙子 2006-02-14
  • 打赏
  • 举报
回复
基本上看明白了些,
时间复杂度基本上从 O(n^2) 降到了 O(log2 N) 了
alphax 2006-02-14
  • 打赏
  • 举报
回复
呃,注释少了点是吧,

t_pack_searcher是用类,用法很简单,先create对象,然后exec()就行了,返回一个字节数组,表示对参数nums的选择,如果返回的字节数组全部为0,则表示没有找到解

t_pack_tester包含了一个类方法用来测试t_pack_searcher的正确性和性能

前面我说的问题我现在想明白了,估计就是预计算和排序占用的时间接近0.015秒,而搜索过程则不费什么时间,所以24个数和32个数的耗时非常接近
alphax 2006-02-13
  • 打赏
  • 举报
回复
type
t_perf_count_collection = record
expected_times, tested_times: integer;
used_times: array of double;
end;
p_perf_count_collection = ^t_perf_count_collection;

{ t_pack_tester }
class procedure t_pack_tester.full_test;


procedure _exec_(
const a_no_result: boolean;
const a_num_count: cardinal;
const a_min_selection_count: integer;
perf_cnt_collect: p_perf_count_collection;
const a_indent: integer
);
var
sample: t_data_sample;
begin
sample := t_data_sample.create(
a_no_result,
a_num_count,
a_min_selection_count
);
try
if a_indent > 0 then
indent(a_indent);

sample.exec(perf_cnt_collect <> nil);

if perf_cnt_collect <> nil then
begin
if perf_cnt_collect^.tested_times < perf_cnt_collect^.expected_times then
begin
perf_cnt_collect^.used_times[perf_cnt_collect^.tested_times] :=
sample.perf_cnt_period_to_ms();

inc(perf_cnt_collect^.tested_times);
end;
end;
finally
sample.destroy();
end;
end;

procedure do_test(
const no_result: boolean;
const num_count: cardinal;
perf_cnt_collect: p_perf_count_collection;
const indent: integer
);
begin
if no_result then
_exec_(true, num_count, 0, perf_cnt_collect, indent)
else
begin
if perf_cnt_collect = nil then
begin
_exec_(false, num_count, 1, perf_cnt_collect, indent);
if num_count > 1 then
_exec_(false, num_count, num_count, nil, indent);
if num_count > 2 then
_exec_(false, num_count, num_count-1, nil, indent);
end;

if num_count > 5 then
_exec_(false, num_count, num_count div 2, perf_cnt_collect, indent);

if num_count > 7 then
begin
_exec_(false, num_count, num_count div 2 - 1, perf_cnt_collect, indent);
_exec_(false, num_count, num_count div 2 + 1, perf_cnt_collect, indent);
end;

if num_count > 9 then
begin
_exec_(false, num_count, num_count div 2 - 2, perf_cnt_collect, indent);
_exec_(false, num_count, num_count div 2 + 2, perf_cnt_collect, indent);
end;
end;
end;

procedure init_pcc(const expected_times: integer; var pcc: t_perf_count_collection);
begin
pcc.expected_times := expected_times;
pcc.tested_times := 0;
if length(pcc.used_times) <> expected_times then
setlength(pcc.used_times, expected_times);
end;

procedure avg_of_pcc(
const pcc: t_perf_count_collection;
var max, min, avg: double);
var
i: integer;
t: double;
begin
max := 0;
min := MaxInt;
avg := 0;


for i := 0 to pcc.tested_times - 1 do
begin
t := pcc.used_times[i];
avg := avg + t;
if t > max then
max := t;
if t < min then
min := t;
end;

avg := avg / pcc.tested_times;
end;

function bool2str(b: boolean): string;
const b_str: array[boolean] of string = ('false', 'true');
begin
result := b_str[b];
end;

procedure print_task_title(const title: string);
begin
writeln('######################');
writeln('TASK: ', title);
writeln('######################');
end;

procedure test_correctness();
var
i: integer;

begin
print_task_title('test correctness');
for i := 1 to max_num_count do
begin
indent(2); write('num count: ', i:2, ', no result = ', bool2str(true));
do_test(true, i, nil, 0);
do_test(true, i, nil, 0);
do_test(true, i, nil, 0);
do_test(true, i, nil, 0);
writeln(' OK');

indent(2); write('num count: ', i:2, ', no result = ', bool2str(false));
do_test(false, i, nil, 0);
do_test(false, i, nil, 0);
do_test(false, i, nil, 0);
do_test(false, i, nil, 0);
writeln(' OK');
end;
writeln('');
end;

procedure test_performance();

procedure _p_(
const no_result: boolean;
const num_count: integer);
const
times = 4;
var
i: integer;
pcc: t_perf_count_collection;
max, min, avg: double;
begin
init_pcc(times * 32, pcc);

indent(4); writeln('no result = ', bool2str(no_result));

for i := 1 to times do
begin
do_test(no_result, num_count, @pcc, 6);
if no_result then
do_test(no_result, num_count, @pcc, 6);
end;

avg_of_pcc(pcc, max, min, avg);

indent(6); writeln('tested ', pcc.tested_times, ' pass(es)');
indent(6); writeln('max used time(ms):', formatfloat('0.0000', max));
indent(6); writeln('min used time(ms):', formatfloat('0.0000', min));
indent(6); writeln('avg used time(ms):', formatfloat('0.0000', avg));
writeln('');
end;

procedure p(const num_count: integer);
begin
indent(2); writeln('num count = ', num_count:2);
indent(2); writeln('---------------------');
_p_(true, num_count);
_p_(false, num_count);
writeln('');
end;

var
i: integer;
begin
print_task_title('test performance');
i := 8;
while i <= max_num_count do
begin
p(i-1);
p(i);
inc(i, 8);
end;
end;

begin
writeln('+=======================================+');
writeln(' pack FULL TEST');
writeln('+=======================================+');

t_pack_searcher.test_scan_proc();

test_correctness();
{$ifdef perf_meas}
test_performance();
{$endif}

writeln('*****************************************');
writeln('');
writeln('+=======================================+');
writeln(' FINISH');
writeln('+=======================================+');
writeln('');
writeln('');
pause();
end;
{$endif}

end.
alphax 2006-02-13
  • 打赏
  • 举报
回复
{$ifdef pack5_test}

type
t_bitmap_64 = record
lo, hi: cardinal;
end;
{ t_data_sample }
constructor t_data_sample.create(
const a_no_result: boolean;
const a_num_count: cardinal;
const a_min_selection_count: integer);

var
i, rr_lo, rr_hi, bit_count, selection_sz: integer;
bitmask_lo, bitmask_hi: cardinal;
bitmap: t_bitmap_64;
sum64, max_sum: int64;
begin
assert((a_num_count > 0) and (a_num_count <= 40));

no_result := a_no_result;
num_count := a_num_count;
min_selection_count := a_min_selection_count;

randomize();

setlength(nums, a_num_count);

//the allowed sum is high(cardinal)-1
rr_hi := maxint div integer(a_num_count);
rr_lo := rr_hi div 4;

for i := low(nums) to high(nums) do
nums[i] := randomrange(rr_lo, rr_hi);

bitmap.lo := 0;
bitmap.hi := 0;

if not no_result then
begin
max_sum := high(cardinal);
max_sum := max_sum - 1;

if num_count >= 32 then
bitmask_lo := (not cardinal(0))
else
bitmask_lo := (not cardinal(0)) shr (32 - num_count);

if num_count > 32 then
bitmask_hi := (not cardinal(0)) shr (8 - (num_count-32)) and $ff
else bitmask_hi := 0;

repeat
if min_selection_count < 31 then
bitmap.lo := bitmap.lo or randomrange(1, maxint) and bitmask_lo
else bitmap.lo := bitmask_lo;

if min_selection_count > 32 then
begin
if min_selection_count < 40 then
bitmap.hi := bitmap.hi or randomrange(1, maxint) and bitmask_hi
else bitmap.hi := bitmask_hi;
end;

sum64 := 0;
bit_count := 0;
for i := low(nums) to high(nums) do
begin
if test_bit(@bitmap, i) then
begin
inc(sum64, nums[i]);
inc(bit_count);
end;
end;
until ((sum64 < max_sum) and (bit_count >= min_selection_count));

sum := sum64;
end
else
//no result case
sum := high(cardinal);

selection_sz := calc_selection_size(num_count);
setlength(init_selection, selection_sz);

for i := low(nums) to high(nums) do
if test_bit(@bitmap, i) then
set_bit(@init_selection[0], i);
end;


procedure t_data_sample.exec(const print_found_selection: boolean);
var
searcher: t_pack_searcher;
selection: t_pack_selection;
begin
QueryPerformanceCounter(perf_cnt_begin);
searcher := t_pack_searcher.create(no_result, sum, nums);
try
selection := searcher.exec();
finally
searcher.destroy();
end;
QueryPerformanceCounter(perf_cnt_end);

if verify(selection, false) and print_found_selection then
begin
write('found selection: '); print_selection(selection); writeln('');
end;
end;

function t_data_sample.perf_cnt_period_to_ms: double;
var
freq: tlargeinteger;
begin
QueryPerformanceFrequency(freq);
result := (perf_cnt_end - perf_cnt_begin) * 1000;
result := result / freq;
end;



function t_data_sample.verify(
const selection: t_pack_selection;
const print_nums: boolean): boolean;
var
i, selected_count: integer;
sum_verify: cardinal;

procedure print_error();
begin
writeln('******************************');
writeln('ERROR: ');
writeln('******************************');
end;

begin
if is_empty_pack_selection(selection) then
begin
if no_result then
begin
result := true;
exit;
end
else
begin
writeln('original sum is :', sum);
write('original selection is: '); print_selection(init_selection); writeln('');
print_error();
pause();
abort();
end;
end;

sum_verify := 0;
selected_count := 0;
for i := low(nums) to high(nums) do
begin
if test_selection(selection, i) then
begin
inc(sum_verify, nums[i]);
inc(selected_count);

if print_nums then
writeln('select ', i:2, 'th num: ', nums[i]:12);
end;
end;
if print_nums then
writeln('selected count: ', selected_count);

result := sum_verify = sum;
if not result then
begin
print_error();
writeln('the selected sum is :', sum_verify);
writeln('original sum is :', sum);
write('original selection is: '); print_selection(init_selection); writeln('');
write('found selection is: '); print_selection(selection); writeln('');
pause();
abort();
end;
end;


alphax 2006-02-13
  • 打赏
  • 举报
回复

//二分查找
function t_pack_searcher.scan2(const base_value: cardinal): cardinal;
var
i, min_index, max_index: integer;
temp: cardinal;
begin
min_index := 0;
max_index := prec_result_list_sz - 1;

i := max_index shr 1;

while true do
begin
temp := prec_result_list[i].value - base_value;
{$ifdef trace}
inc(scan_hit_count, 1);
{$endif}

if temp = 0 then
begin
result := prec_result_list[i].bitmap;
exit;
end
else if integer(temp) < 0 then
begin
if i = max_index then
break;

min_index := i + 1;
i := ((i + 1) + max_index) shr 1;
end
else // if temp > 0 then
begin
if i = min_index then
break;

max_index := i - 1;
i := (min_index + (i - 1)) shr 1;
end;
end;

result := scan_not_found; //not found
end;

{
对nums_copy进行排序,并且记录他们原来的次序
}
procedure t_pack_searcher.sort_num_copies;
var
i: integer;
sort_temp: array[0..39] of t_prec_result;
pp: array[0..39] of p_prec_result;
begin
for i := 0 to num_count-1 do
begin
sort_temp[i].value := nums_copy[i];
sort_temp[i].bitmap := i;
pp[i] := @sort_temp[i];
end;

quicksort(
pointer(@pp),
0,
num_count-1,
prec_result_sort_compare_asc
);

for i := 0 to num_count-1 do
begin
nums_copy[i] := pp[i]^.value;
nums_order_map[i] := pp[i]^.bitmap;
end;

require_order_map := true;
end;

class procedure t_pack_searcher.test_scan_proc;
var
i: integer;
obj: t_pack_searcher;
begin
obj := t_pack_searcher(t_pack_searcher.newinstance());
try
for i := low(obj.prec_result_list) to high(obj.prec_result_list) do
begin
obj.prec_result_list[i].value := i;
obj.prec_result_list[i].bitmap := i;
end;

obj.prec_result_list_sz := length(obj.prec_result_list);

for i := low(obj.prec_result_list) to high(obj.prec_result_list) do
begin
obj.sum := i;
if obj.scan2(i) = scan_not_found then
raise exception.create('error');
end;
finally
obj.destroy();
end;

end;


procedure print_byte_bitmap(const b: byte);
var
i: integer;
begin
for i := 0 to bits_per_byte-1 do
begin
if (b and bits[i]) <> 0 then
write('1')
else write('0');
end;

write(' ');
end;

procedure print_selection(const selection: t_pack_selection);
var
i: integer;
begin
for i := low(selection) to high(selection) do
print_byte_bitmap(selection[i]);
end;


alphax 2006-02-13
  • 打赏
  • 举报
回复
procedure t_pack_searcher.prepare_large;
var
prec_results_temp_lo: t_prec_result_list256; //size=$100 * 8
prec_results_temp_hi: t_prec_result_list256; //size=$100 * 8;
//required stack size is $1 000

i, j: cardinal;
iter_lo, iter_hi: integer;
p_lo, p_hi, p_dest: p_prec_result;

prec_result_list_temp,
prec_result_list_src,
prec_result_list_dst,
swap: p_prec_result_list;

dest_is_temp: boolean;
begin
//对num_copy进行排序
sort_num_copies();


prec_result_list_sz := length(prec_result_list);

//现在, nums[0] <= nums[1] <= nums[2] ... <= nums[15]
//所以我们接下来的calc_prec_results_temp()调用的性能应该是很好的

//对 nums[0.. 7] 产生所有的组合的 sum 到 prec_results_temp_lo,
//对 nums[8..15] 产生所有的组合的 sum 到 prec_results_temp_hi,
//并对两个结果数组进行排序
//开销 = $100 * log($100) * 2
calc_prec_result_list256(@nums_copy[0], @prec_results_temp_lo); //quick sort
{$ifdef debug}
check_prec_result_bitmap(
nums_copy,
@prec_results_temp_lo[0],
length(prec_results_temp_lo)
);
{$endif}
calc_prec_result_list256(@nums_copy[8], @prec_results_temp_hi); //quick sort
{$ifdef debug}
check_prec_result_bitmap(
copy_nums(nums_copy, 8, 8),
@prec_results_temp_hi[0],
length(prec_results_temp_hi)
);
{$endif}

//至此,有
//with prec_results_temp_lo:
// [0].value <= [1].value <= [2].value ... <= [255].value
//对prec_results_temp_hi亦有:
// [0].value <= [1].value <= [2].value ... <= [255].value

getmem(prec_result_list_temp, sizeof(prec_result_list_temp^));
try
//接下来,我们再从这两个256个可能的值中组合产生65536个16位值
p_dest := @prec_result_list[0]; //结果放在这个大数组中

(*****************************************
* TODO:
* use xmm to accelerate the addition
* operation
*****************************************)
for iter_hi := low(prec_results_temp_hi) to high(prec_results_temp_hi) do
begin
p_hi := @prec_results_temp_hi[iter_hi];

for iter_lo := low(prec_results_temp_lo) to high(prec_results_temp_lo) do
begin
p_lo := @prec_results_temp_lo[iter_lo];

p_dest^.value := p_hi^.value + p_lo^.value;
p_dest^.bitmap := (p_hi^.bitmap shl 8) + p_lo^.bitmap;

{$ifdef debug}
check_bitmap(nums_copy, p_dest^.bitmap, p_dest^.value);
{$endif}

inc(p_dest);
end;
end;

//因为前面的prec_results_temp_lo和prec_results_temp_hi都是已经有序的
//所以
// for i := 0 to 255-1,恒有
// prec_result_list[i] <= prec_result_list[i+1]
//
// for i := 256 to 511-1, 有
// prec_result_list[i] <= prec_result_list[i+1]
//
//依次类推,整个prec_result_list以256个记录为组分组,都有这些规律
//
//那么,可以在这个基础上进行归并排序,每相邻的两个分组作一次归并,得到一个更大
//的512数组,再归并,直到最后,两个32768个记录的数组归并成一个65536个记录
//的数组,排序完毕
//也可以对折归并,对折时,左边的数据比右边数据小的情况比较多,merge过程的比
//较次数就会相对减少,所以对折归并比较有利

j := $200; //最开始,每次合并的覆盖面为两个$100
prec_result_list_src := @prec_result_list;
prec_result_list_dst := prec_result_list_temp; //第1次: merge的结果在prec_result_list_temp

dest_is_temp := true;

{
对折归并
以下j循环的总共开销为
8 * $10 000 = $80 000 次 (比较+存储)
+
8 * 128 = 1024 次 (函数调用)
}
while j <= $10000 do //合并的覆盖面小于等于$10000, 总共循环8次
begin

{
以下i循环开销为
($10000 div 2 div (j div 2)) * (j/2 + j/2) = $10000
}
i := 0;
while i < $10000 div 2 do
begin
merge(
@prec_result_list_src^[i],
@prec_result_list_src^[i+cardinal($10000 div 2)],
@prec_result_list_dst^[i*2],
j div 2
);
inc(i, j div 2);
end;

// //毗邻的数组两两归并
// i := 0;
// while i < $10000 do
// begin
// merge(
// @prec_result_list_src^[i],
// @prec_result_list_src^[i+j div 2],
// @prec_result_list_dst^[i],
// j div 2
// );
// inc(i, j);
// end;

j := j * 2; //覆盖面增大1倍

//交换源/目标数组
swap := prec_result_list_src;
prec_result_list_src := prec_result_list_dst;
prec_result_list_dst := swap;

if j <= $10000 then
dest_is_temp := not dest_is_temp;
end;

if dest_is_temp then
move(
prec_result_list_temp^,
prec_result_list,
sizeof(prec_result_list)
);


//第1次: merge的结果在: in prec_result_list_temp
//2nd: prec_result_list
//3: prec_result_list_temp
//4: prec_result_list
//5: prec_result_list_temp
//6: prec_result_list
//7: prec_result_list_temp
//8: prec_result_list <<最后一次

//所以,至此需要的结果已经准备好在prec_result_list了

//释放prec_result_list_temp
finally
freemem(prec_result_list_temp);
end;

{$ifdef debug}
//验证结果数组的次序性
verify_order_le(@prec_result_list[0], length(prec_result_list));
//验证结果数组的位组合是否与他的和相对应
check_prec_result_bitmap(nums_copy, @prec_result_list[0], length(prec_result_list));
{$endif}
end;

procedure t_pack_searcher.prepare_middle;
begin
prec_result_list_sz := $100;

calc_prec_result_list256(@nums_copy, @prec_result_list);
end;

{
pre-calculate 8 nums all posible sum of 8 nums combination
nums_copy_index: the first num index of nums_copy
prec_8_arr_index: the result sum will put in prec_8_arr[prec_8_arr_index][]
}
procedure t_pack_searcher.prepare_prec_8_arr(
const nums_copy_index: integer;
const prec_8_arr_index: integer
);
var
i, j: integer;
temp: cardinal;
begin
for i := low(byte) to high(byte) do
begin
temp := 0;
for j := 0 to bits_per_byte-1 do
if (i and bits[j]) <> 0 then
inc(temp, nums_copy[nums_copy_index + j]);

prec_8_arr[prec_8_arr_index][i] := temp;
end;
end;


alphax 2006-02-13
  • 打赏
  • 举报
回复
//for the case num_count >= 24
function t_pack_searcher.internal_exec_ge24: t_pack_selection;
var
i, j, k: integer;
bits: integer;
temp, value_i, value_i_j: cardinal;
scan_result, sum_0_15: cardinal;

procedure make_result(use_scan_result: boolean);
begin
setlength(result, selection_sz);
if use_scan_result then
begin
result[0] := scan_result and $00ff;
result[1] := (scan_result and $ff00) shr 8;
end
else
begin
result[0] := 0;
result[1] := 0;
end;

result[2] := k;
if selection_sz > 3 then
begin
result[3] := j;
if selection_sz > 4 then
result[4] := i;
end;

map_to_original_order(result);
end;

begin
//pre-calculate sums of 0..15
prepare_large();

//prec-calculate sums of 16..39
prepare_prec_8_arr( 16, 0);

if num_count > 24 then
prepare_prec_8_arr( 24, 1)
else prepare_dummy_prec_8_arr(1);

if num_count > 32 then
prepare_prec_8_arr( 32, 2)
else prepare_dummy_prec_8_arr(2);



bits := 1 shl (num_count - 16) - 1;
{ reserved 16 bits is processed by scan() procedure }



{$ifndef perf_meas}
sum_0_15 := 0;
for i := 0 to 15 do
sum_0_15 := sum_0_15 + nums_copy[i];
{$else}
//因为no_result时产生的sum为high(cardinal)-1
//所以下面的if temp > sum_0_15 then
//总是成立,这将影响性能测试的准确性
//所以应该设为
sum_0_15 := high(cardinal);
{$endif}


scan_result := scan_not_found;

for i := t_bits(bits)[2] downto 0 do
begin
value_i := prec_8_arr[2][i];
for j := t_bits(bits)[1] downto 0 do
begin
value_i_j := value_i + prec_8_arr[1][j];
for k := t_bits(bits)[0] downto 0 do
begin
temp := sum - (value_i_j + prec_8_arr[0][k]);

//即使 integer(temp) < 0
//也有 temp <= sum_0_15
if temp > sum_0_15 then
continue;

if (temp = 0) then
begin
make_result(false);
exit;
end
else if integer(temp) < 0 then
continue;

scan_result := scan2(temp);
if scan_result <> scan_not_found then
begin
make_result(true);
exit;
end;
end; // for k
end; //for j
end; //for i


make_empty_pack_selection(result);
end;

//for the case num_count < 16
function t_pack_searcher.internal_exec_smallcase: t_pack_selection;
var
i, j: integer;
bits: integer;
value_i: cardinal;
begin
prepare_prec_8_arr(0, 0);

if num_count > 8 then
prepare_prec_8_arr(8, 1)
else prepare_dummy_prec_8_arr(1);

bits := (1 shl num_count) - 1;

for i := t_bits(bits)[1] downto 0 do
begin
value_i := prec_8_arr[1][i];
for j := t_bits(bits)[0] downto 0 do
begin
if (value_i + prec_8_arr[0][j]) = sum then
begin
setlength(result, selection_sz);
result[0] := j;
if selection_sz > 1 then
result[1] := i;
exit;
end;
end;
end;

make_empty_pack_selection(result);
end;

procedure t_pack_searcher.make_empty_pack_selection(
var selection: t_pack_selection);
begin
if length(selection) <> selection_sz then
setlength(selection, selection_sz);

fillchar(
selection[0],
sizeof(selection[0]) * selection_sz,
0
);
end;

procedure t_pack_searcher.map_to_original_order(
var selection: t_pack_selection);
var
b: byte;
i, j, bit_index_base, org_index: integer;
temp: t_pack_selection;
begin
if not require_order_map then
exit;

setlength(temp, length(selection));

for i := low(selection) to high(selection) do
begin
b := selection[i];
bit_index_base := i * 8;
for j := 0 to bits_per_byte - 1 do
begin
if (b and bits[j]) <> 0 then
begin
org_index := nums_order_map[bit_index_base + j];
set_bit(@temp[0], org_index);
end;
end;
end;

move(
temp[0],
selection[0],
sizeof(temp[0]) * length(temp)
);
end;

procedure t_pack_searcher.prepare_dummy_prec_8_arr(
const prec_8_arr_index: integer);
begin
fillchar(
prec_8_arr[prec_8_arr_index],
sizeof(prec_8_arr[prec_8_arr_index]),
0
);
end;
alphax 2006-02-13
  • 打赏
  • 举报
回复
{$ifdef debug}
type
t_cardinal_arr = array of cardinal;


function copy_nums(
const nums: array of cardinal;
const copy_start_index: integer;
const count: integer): t_cardinal_arr;
begin
assert(copy_start_index >= 0);
assert((copy_start_index + count) <= length(nums));

setlength(result, count);
if count > 0 then
move(nums[copy_start_index], result[0], count * sizeof(result[0]));
end;
{$endif}



{ Copy from Borland.Classes.pas/implementation }
procedure QuickSort(SortList: PPointerList; L, R: Integer;
SCompare: TListSortCompare);
var
I, J: Integer;
P, T: Pointer;
begin
repeat
I := L;
J := R;
P := SortList^[(L + R) shr 1];
repeat
while SCompare(SortList^[I], P) < 0 do
Inc(I);
while SCompare(SortList^[J], P) > 0 do
Dec(J);
if I <= J then
begin
T := SortList^[I];
SortList^[I] := SortList^[J];
SortList^[J] := T;
Inc(I);
Dec(J);
end;
until I > J;
if L < J then
QuickSort(SortList, L, J, SCompare);
L := I;
until I >= R;
end;

procedure set_bit(p: pointer; bit_index: integer);
var
byte_index: integer;
begin
byte_index := bit_index div 8;
bit_index := bit_index mod 8;
pbytearray(p)[byte_index] := pbytearray(p)[byte_index] or bits[bit_index];
end;

function nums_copy_sort_compare_asc(a, b: pointer): integer;
begin
if cardinal(a) = cardinal(b) then
result := 0
else if cardinal(a) > cardinal(b) then
result := 1
else result := -1;
end;

function prec_result_sort_compare_asc(a, b: pointer): integer;
var
x, y: p_prec_result;
begin
x := a; y := b;
if x^.value = y^.value then
result := 0
else if x^.value > y^.value then
result := 1
else result := -1;
end;


procedure calc_prec_result_list256(
in_8nums_ptr: p_8nums;
out_prec_result_temp: p_prec_result_list256);
{
预计算8个数的各种可能的组合的和,并按和的大小升序排序
}
var
i, j: integer;
temp: cardinal;
p: p_prec_result;
ptr_array_ff: array[low(byte)..high(byte)] of p_prec_result;
buff: t_prec_result_list256;
begin
//预计算8个数的各种可能的组合的和
for i := low(byte) to high(byte) do
begin
temp := 0;
for j := 0 to bits_per_byte-1 do
if (i and bits[j]) <> 0 then
temp := temp + in_8nums_ptr[j];

p := @buff[i];
p^.value := temp;
p^.bitmap := i;

ptr_array_ff[i] := p;
end;


//排序
quicksort(
@ptr_array_ff,
low(ptr_array_ff),
high(ptr_array_ff),
prec_result_sort_compare_asc
);

//now data in buff is sorted, copy to out_prec_result_temp
for i := low(buff) to high(buff) do
out_prec_result_temp[i] := ptr_array_ff[i]^;
end;

procedure merge(x, y, result: p_prec_result; count: integer);
var
x_cnt, y_cnt: integer;
begin
x_cnt := count;
y_cnt := count;

repeat
if x^.value <= y^.value then
begin
//put x
result^ := x^;
inc(x);
dec(x_cnt);
end
else
begin
//put y
result^ := y^;
inc(y);
dec(y_cnt);
end;

inc(result);
until (x_cnt = 0) or (y_cnt = 0);

if (x_cnt > 0) then
move(x^, result^, sizeof(x^) * x_cnt)
else if (y_cnt > 0) then
move(y^, result^, sizeof(y^) * y_cnt);

{$if defined(trace)}
inc(merge_move_count, x_cnt + y_cnt);
{$ifend}
end;

{$ifdef debug}
procedure verify_order_le(first_prec_result: p_prec_result; count: integer);
var
value: cardinal;
begin
value := 0;
while count > 0 do
begin
if value > first_prec_result^.value then
begin
asm int 3 end;
touch(first_prec_result); touch(count);
writeln('verify_order_le failed');
pause();
end;

value := first_prec_result^.value;
inc(first_prec_result);
dec(count);
end;
end;
{$endif}


{$ifdef debug}
procedure t_pack_searcher.check_bitmap(
const nums: array of cardinal;
const bitmap, value: cardinal);
var
s: cardinal;
begin
s := nums_sum(nums, bitmap, false);
if s <> value then
begin
nums_sum(nums, bitmap, true);
asm
int 3
end;
writeln('check_bitmap failed, sum is ', s, ', expected is ', value);
pause();
end;
end;

procedure t_pack_searcher.check_prec_result_bitmap(
const nums: array of cardinal;
p: p_prec_result;
count: integer);
begin
while count > 0 do
begin
check_bitmap(nums, p^.bitmap, p^.value);
dec(count);
inc(p);
end;
end;
{$endif debug}

constructor t_pack_searcher.create(
const a_no_result: boolean;
const a_sum: cardinal;
const a_nums: array of cardinal);
var
i: integer;
sum_64: int64;
begin
sum := a_sum;

assert(not a_no_result or (sum > 0));

num_count := length(a_nums);

assert(
(num_count > 0)
and
(num_count <= max_num_count)
);

no_result := a_no_result;

sum_64 := 0;
for i := low(a_nums) to high(a_nums) do
inc(sum_64, a_nums[i]);

if sum_64 > (high(sum)-1) then //high(cardinal) is reserved for no_result test
raise exception.create('Invalid parameter, sum of nums is too large');



selection_sz := calc_selection_size(num_count);

move(a_nums[0], nums_copy[0], length(nums_copy) * sizeof(a_nums[0]));
end;


function t_pack_searcher.exec(): t_pack_selection;
begin

{$ifdef trace}
scan_hit_count := 0;
merge_move_count := 0;
{$endif}

if num_count >= 24 then
result := internal_exec_ge24()
else if num_count >= 16 then
result := internal_exec_ge16()
else
result := internal_exec_smallcase();

{$ifdef trace}
writeln('scan hit count: ', scan_hit_count);
writeln('merge_move_count: ', merge_move_count);
{$endif}
end;

//for the case num_count >= 16
function t_pack_searcher.internal_exec_ge16: t_pack_selection;
var
i, j: integer;
bits: integer;
temp, value_i: cardinal;
scan_result, sum_0_7: cardinal;

procedure make_result(use_scan_result: boolean);
begin
setlength(result, selection_sz);
if use_scan_result then
result[0] := scan_result and $00ff
else
Result[0] := 0;

result[1] := j;
if selection_sz > 2 then
result[2] := i;

//map_to_original_order(result);
end;

begin
prepare_middle();

prepare_prec_8_arr( 8, 0);

if num_count > 16 then
prepare_prec_8_arr( 16, 1)
else prepare_dummy_prec_8_arr(1);

bits := 1 shl (num_count - 8) - 1;
{ reserved 8 bits is processed by scan() procedure }

scan_result := scan_not_found;

{$ifndef perf_meas}
sum_0_7 := 0;
for i := 0 to 7 do
sum_0_7 := sum_0_7 + nums_copy[i];
{$else}
//因为no_result时产生的sum为high(cardinal)-1
//所以下面的if temp > sum_0_7 then
//总是成立,这将影响性能测试的准确性
//所以应该设为
sum_0_7 := high(cardinal);
{$endif}


for i := t_bits(bits)[1] downto 0 do
begin
value_i := prec_8_arr[1][i];
for j := t_bits(bits)[0] downto 0 do
begin
temp := sum - (value_i + prec_8_arr[0][j]);

//即使 integer(temp) < 0
//也有 temp <= sum_0_7
if temp > sum_0_7 then
Continue;

if (temp = 0) then
begin
make_result(false);
exit;
end
else if (integer(temp) < 0) then
continue;


scan_result := scan2(temp);
if scan_result <> scan_not_found then
begin
make_result(true);
exit;
end;
end;
end;

make_empty_pack_selection(result);
end;
biao_227 2006-02-13
  • 打赏
  • 举报
回复
学习中..............
alphax 2006-02-13
  • 打赏
  • 举报
回复
unit pack5;

{.$define debug}
{.$define trace}
{$define pack5_test}

{$ifdef pack5_test}
{$define perf_meas} //正式使用时,应关闭perf_meas
{$endif}


interface
uses
SysUtils, Classes, Math
{$ifdef pack5_test }
, Windows
{$endif}
;

const
max_num_count = 40;
bits_per_byte = 8;
bits: array[0..bits_per_byte-1] of cardinal = (
1 shl 0,
1 shl 1,
1 shl 2,
1 shl 3,
1 shl 4,
1 shl 5,
1 shl 6,
1 shl 7
);
scan_not_found = $ffffffff;

type
t_pack_selection = array of byte;

type
t_prec_result = record
value: cardinal;
bitmap: cardinal;
end; //8 bytes
p_prec_result = ^t_prec_result;

t_prec_result_list = array[0..($100 * $100-1)] of t_prec_result;
p_prec_result_list = ^t_prec_result_list;

t_prec_result_list256 = array[low(byte)..high(byte)] of t_prec_result;
p_prec_result_list256 = ^t_prec_result_list256;

t_8nums = array[0..bits_per_byte-1] of cardinal;
p_8nums = ^t_8nums;

t_prec_8 = array[byte] of cardinal;
p_prec_8 = ^t_prec_8;
t_bits = array[0..3] of byte;
p_bits = ^t_bits;

{$ifdef pack5_test}
t_data_sample = class
public
no_result: boolean;
init_selection: t_pack_selection;
sum: cardinal;
num_count: cardinal;
min_selection_count: integer;
nums: array of cardinal;
perf_cnt_begin, perf_cnt_end: tlargeinteger;

constructor create(
const a_no_result: boolean;
const a_num_count: cardinal;
const a_min_selection_count: integer
);

procedure exec(const print_found_selection: boolean);

function perf_cnt_period_to_ms(): double;

function verify(const selection: t_pack_selection; const print_nums: boolean): boolean;

end;

t_pack_tester = class
public
class procedure full_test;
end;

{$endif pack5_test}

t_pack_searcher = class
private (*** common private part ***)
num_count: integer;
sum: cardinal;
no_result: boolean;
nums_copy: array[0..39] of cardinal;
require_order_map: boolean;

//for middle or large case
prec_result_list: t_prec_result_list;
prec_result_list_sz: cardinal;

//指出原来的顺序,取nums_order_map[i],可以得到当前num_copy中第i个数的原来的序号
//仅当require_order_map时有意义
nums_order_map: array[0..39] of cardinal;

prec_8_arr: array[0..2] of t_prec_8;
selection_sz: integer;

procedure sort_num_copies();

{$ifdef debug}
procedure check_bitmap(
const nums: array of cardinal;
const bitmap: cardinal;
const value: cardinal);
procedure check_prec_result_bitmap(
const nums: array of cardinal;
p: p_prec_result;
count: integer
);
{$endif}

procedure map_to_original_order(var selection: t_pack_selection);

procedure make_empty_pack_selection(var selection: t_pack_selection);

{
pre-calculate 8 nums all posible sum of 8 nums combination
nums_copy_index: the first num index of nums_copy
prec_8_arr_index: the result sum will put in prec_8_arr[prec_8_arr_index][]
}
procedure prepare_prec_8_arr(
const nums_copy_index: integer;
const prec_8_arr_index: integer
);

procedure prepare_dummy_prec_8_arr(const prec_8_arr_index: integer);

private (*** private for small case ***)
//for the case num_count < 16
function internal_exec_smallcase(): t_pack_selection;

private (*** private for middle case ***)
//pre-calcuate the results for all posible nums[0]..nums[8] combination
//sort and store in prec_result_list
procedure prepare_middle;


//for the case num_count >= 16
function internal_exec_ge16(): t_pack_selection;

private (*** private for large case ***)
//pre-calcuate the results for all posible nums[0]..nums[15] combination
//sort and store in prec_result_list
procedure prepare_large;

//return value: low 16 bit selection
//return $ffffffff for not found
function scan2(const base_value: cardinal): cardinal;

//for the case num_count >= 24
function internal_exec_ge24: t_pack_selection;

public
constructor create(
const a_no_result: boolean;
const a_sum: cardinal;
const a_nums: array of cardinal
);

class procedure test_scan_proc;

function exec(): t_pack_selection;
end;

function is_empty_pack_selection(const selection: t_pack_selection): boolean;
function calc_selection_size(const num_count: integer): integer;
procedure print_selection(const selection: t_pack_selection);

function test_selection(
const selection: t_pack_selection;
const index: integer): boolean;


implementation



procedure pause();
var
s: string;
begin
Writeln('press ENTER to continue...');
Readln(s);
end;

{$ifdef debug}
//print nums, and return the sum
function nums_sum(
const nums: array of cardinal;
const bitmap: cardinal;
const print_nums: boolean): cardinal;
var
i, max: integer;
begin
result := 0;
max := high(nums);
if max > 31 then
max := 31;
for i := low(nums) to max do
if bitmap and (1 shl i) <> 0 then
begin
inc(result, nums[i]);
if print_nums then
writeln('has_num(', i:2, '): ', nums[i]:12);
end;
end;

procedure touch(var x);
begin
//do nothing
end;
{$endif}

{$ifdef pack5_test}
procedure indent(spc_cnt: integer);
var
s: string;
begin
if spc_cnt > 0 then
begin
setlength(s, spc_cnt);
fillchar(s[1], spc_cnt, ' ');
write(s);
end;
end;
{$endif}



function is_empty_pack_selection(
const selection: t_pack_selection
): boolean;
var
i: integer;
begin
result := false;
for i := low(selection) to high(selection) do
if selection[i] <> 0 then
exit;

result := true;
end;




function calc_selection_size(const num_count: integer): integer;
begin
result := (num_count + (bits_per_byte-1)) div bits_per_byte;
end;

function test_bit(const p: pointer; const index: integer): boolean;
var
byte_index, bit_index: integer;
begin
byte_index := index div 8;
bit_index := index mod 8;

result := (pbytearray(p)[byte_index] and bits[bit_index]) <> 0;
end;

function test_selection(
const selection: t_pack_selection;
const index: integer): boolean;
begin
{$ifdef debug}
assert(length(selection) > 0);
{$endif}

result := test_bit(@selection[0], index);
end;


{$ifdef trace}
var
merge_move_count: integer;
scan_hit_count: integer;
{$endif}

alphax 2006-02-13
  • 打赏
  • 举报
回复
有点怪,昨晚我改了一下,
现在40个数无解情况下竟然只要0.2秒不到,而32个数,24个数都要0.015,有点不解,不知道是否代码错了,但是测试又没发现问题


project文件
============================================
program Project15;

{$APPTYPE CONSOLE}

uses
SysUtils,
pack5 in 'pack5.pas';

var
e: tobject;
begin
try
t_pack_tester.full_test();
except
e := ExceptObject();
if e is Exception then
writeln('ERROR: ', exception(e).Message)
else writeln('ERROR: (Unknown error)');
end;
end.
yyjzsl 2006-02-13
  • 打赏
  • 举报
回复
偶也来学习学习,^_^...
耙子 2006-02-13
  • 打赏
  • 举报
回复
吐血了,这都干啥呢呀!!!
alphax 2006-02-12
  • 打赏
  • 举报
回复
抱歉,上面的计算错了,算漏了bit_count > n的情形


简单的,撇开预计算和排序部分的开销,单就循环主体的开销来计算


主循环主体循环次数 = $100 0000

如果在第三层进行处理, n = 11 时

命中的次数(即bit_count > n)为 = C(12,24) + C(13,24) + ... + C(24,24)
不命中的次数为 = C(11,24) + C(10, 24) + ... + C(1,24)
= C(13,24) + C(14,24) + .. + C(24,24)

命中-不命中=C(12,24)
c(12,24) = $29 431C

需要执行的(不命中的)= ($100 0000 - C(12,24)) / 2 = $6B 5E72

也就是循环主体的循环次数下降到原来的42% ($6b 5e72 / $100 0000)


第二层,n = 7 时

命中的次数为 = C(8,16) + C(9,16) + ... + C(16,16)
不命中次数为 = C(7,16) + C(6,16) + ... + C(1,16)
= C(9,16) + ....
命中-不命中=C(8,16)
C(8,16) = 12870
不命中=($1 0000 - 12870) / 2 = 26333
循环次数下降到原来的40%


依此看来, 针对这些特定情形作带位计算处理的版本,还是能够得到2倍的性能提高的


我想了一下,

>>3. 在循环的适当深度计算bit位上1的个数,如果大于n,
>>或者即使下层循环全是1,1的个数也小于m,
>>则往下循环的工作不用作了

for i ...
for j ...
...
sum_i_j := prec_arr[2][i] + prec_arr[1][j];
for k ...
begin
...
sum_i_j_k := sum_i_j + prec_arr[0][k];

对于n的判断,
实际上直接比较sum和sum_i_j_k就等价于判断i/j/k的位数数否大于n了,即
if sum_i_j_k > sum then
continue;

而对于m的判断,可以预先计算nums[0..15]的和,得到sum_0_15,
将sum减去sum_0_15得到一个sum2

然后同样在k层
if sum_i_j_k < sum2 then
continue;

这样也等价于判断m,这样其实位计数和检查就简化了
alphax 2006-02-11
  • 打赏
  • 举报
回复
pazee大哥,你说得我有点不好意思了,哈哈

上面提到的问题很难笼统说,依赖于循环的层数和最内层的开销,

我对我当前的实现(我目前实现的是40个数,以下说的都是40位的情形)大致估算了一下:

假设最内层的开销为300时钟(pentium3, 65536个结果中折二查找, p4为270左右),
位计数和检查的开销为5时钟,

并假设位计数和检查放在第3层(针对n >= 11 and n <= 13的情形,
对于n = 8的情况可以放在第2层,不过道理是一样的),即k循环层,
for i ...
for j ...
...
bit_count_j := bit_count_map[i] + bit_count_map[j];
for k ...
begin
...
bit_count_k := bit_count_j + bit_count_map[k];
if bit_count_k > n then
continue;

scan(65536个结果中折二查找)
...
end //k
...

增加的开销 > ($100 0000 * 5 = $500 0000)

当n=12时,节省的开销 C(12, 24) * 300 = ($29 431C) * 300 = $305A A4D0
当n=11 or 13时, 节省 $2CA2 70C0

原来的整体开销 ($100 0000 * 300) 加上排序后约为 $1 7000 0000
改进后为 $1 7000 0000 + $500 0000 - $3000 0000 = $1 4500 0000

也就是,如果位计数和检查的开销为5的估计准的话,n=12时时间开销可能只能
降低到原来的88%,也就是1.13倍的提高,
而成本是必须为n>=11 and n<=13的情形维护另外一个程序版本。
但位计数和检查的开销为5的估计是非常乐观的,
k层内增加的操作有:访存一次,加法一次,减法一次,测试一次,这些操作
都假设寄存器不是很紧张,而且访存命中,但实际上在外3层循环里,
每层里都有计算,实际上很难有这么好的情形,
并且一直以来delphi的对循环体的寄存器分配是比较弱的,
增加位计数累加和检查,会明显增加delphi分配寄存器的压力,
上次你也看到了,就是你前面说的delphi怎么优化的那一段,
他为了腾出寄存器,竟然在循环的最内层push/pop,其实在外层腾出寄存器,
最内层就不必push/pop了,他却不知道他所产生的push/pop是G数量级的,
还有一个问题就是增加的判断:
if bit_count > n then
...
当bit_count > n成立时,就等于pentium的指令预取判断错误,执行就停顿,
必须重新取指,所以平均增加的开销很可能不是5,可能达到8甚至更多,
所以,在外层只有3层而最内层操作开销不是很大的情况下,
加插这个位数检查暂时可能不会有太大的益处,
如果外层有4层(48个数的情形),那么在第3层做这个处理是非常有必要的



另外,我又改进了一下,原来考虑排序的算法是quick sort,
因为要确保程序在比较确定的时间内返回,所以没敢作16位排序,只做了8位的,
后来想到,可以在预计算时想办法确保结果值有序,然后用归并排序归并,
归并主体的复杂度约为 8n,即$8 0000次比较,这样就可以把最内2层,用二分查找来做了,

我的最新测试,32位数无解情况下,p4 1.8,所花的时间0.02秒多一点
现已通过32位的正确性测试,现在作一下40位的测试并整理一下代码
耙子 2006-02-11
  • 打赏
  • 举报
回复
强呀!
考虑的情况比较全面,我要仔细看看,消化吸收一下。
再次感谢。

我感觉写代码这么研究是很有意思的,呵呵。
耙子 2006-02-09
  • 打赏
  • 举报
回复
十分感谢。
和我的相反有一些地方类似。
我也在考虑把拆数的大循环改称几个嵌套的循环,来减少4个数做加法消耗的时间,
我的想法:
1.上一个帖子有人提出过排序,然后计算出来至少和至多需要多少个数m和n。
2. 采用你的预计算方式,计算的同时统计与计算结果中每个结果中bit位1的个数;
3. 在循环的适当深度计算bit位上1的个数,如果大于n,或者即使下层循环全是1,1的个数也小于m,则往下循环的工作不用作了,直接Continue.

这个可以理解为是剪枝的工作。
我没试过,不知道他增加开销和剪枝的效果是否值。

不论如何,32个数无解的情况能降低到1.x秒,已经完全满足实际应用了。

alphaX你看看这个想法可行不?
alphax 2006-02-08
  • 打赏
  • 举报
回复
上面的代码有点问题,主要是没有考虑到length(nums) < 32的情况
改成:
procedure precalc();
var
bits: array[0..7] of byte;
b: byte;
i, j, temp: Integer;
pIntArr: PIntegerArray;
temp_nums: array[0..31] of Integer;
begin
for j := 0 to 7 do
bits[j] := 1 shl j;

fillchar(temp_nums[0], sizeof(temp_nums), 0);

for i := 0 to count-1 do
temp_nums[i] := nums[i];

pIntArr := @temp_nums[0];
for i := 0 to 3 do
begin
for b := low(b) to high(b) do
begin
temp := 0;
for j := 0 to 7 do
begin
if (b and bits[j]) > 0 then
inc(temp, pIntArr[j]);
end;

precarr[i][b] := temp;
end;

inc(PInteger(pIntArr), 8);
end;
end;
应该可以了



另外,我刚才下班后又测试了另外一个思路,应该可以

思路是:
把最内层的预计算排序一下
然后从预计算的结果中进行二分查找

//最内层的预计算结果
t_prec_result = class
value: integer; //计算的值
bitmask: byte; //L的值
end;

t_scan_table = class
public
sum: integer;
list: tlist;

procedure sortASC(); //排序

function Scan(const aBaseValue: integer): t_prec_result; //二分查找
end;

precarr: array[0..3] of tprec8;
变成
precarr: array[0..2] of tprec8;


预计算变成

var
temp_nums: array[0..31] of integer;

pIntArr := @temp_nums[8];

//8..31, 放在precarr[0..2]
for i := 0 to 2 do
begin
for b := low(b) to high(b) do
begin
temp := 0;
for j := 0 to 7 do
begin
if (b and bits[j]) <> 0 then
inc(temp, pIntArr[j]);
end;

precarr[i][b] := temp;
end;

inc(PInteger(pIntArr), 8);
end;

//0..7, 放在scan_table内
for b := low(b) to high(b) do
begin
temp := 0;
for j := 0 to 7 do
begin
if (b and bits[j]) <> 0 then
inc(temp, temp_nums[j]);
end;

pr := t_prec_result.Create();
pr.value := temp;
pr.bitmask := b;
scan_table.list.Add(pr);
end;

scan_table.sortASC(); //按照pr.value进行升序排序

主要循环就变成
for i := tbits(bitmask).bytearray[3] downto 0 do
begin
si := precarr[2][i];
for j := tbits(bitmask).bytearray[2] downto 0 do
begin
sj := si + precarr[1][j];
for k := tbits(bitmask).bytearray[1] downto 0 do
begin
sk := sj + precarr[0][k];

prec_result := scan_table.scan(sk);
if prec_result <> nil then
begin
result := prec_result.bitmask or (i shl 24) or (j shl 16) or (k shl 8);
exit;
end;
end;
end;
end;

Scan的代码,

function t_scan_table.Scan(const aBaseValue: integer): t_prec_result;
var
i, min_index, max_index, temp: integer;
{$ifdef debug_and_trace}
next_index: integer;
{$endif}
ptrlist: PPointerList;
begin
max_index := list.count-1;
min_index := 0;

ptrlist := list.List;

i := max_index shr 1;

while true do
begin
Result := t_prec_result(ptrlist[i]);
{$ifdef debug_and_trace}
_index_[i] := true;
inc(_count_);
{$endif}

temp := (Result.value + aBaseValue);

if (temp = sum) then
Exit //ok
else if temp > sum then
begin
if i = 0 then
Break; //exit

max_index := i-1;

{$ifdef debug_and_trace}
next_index := (min_index + max_index) shr 1;
Assert(next_index < i);
i := next_index;
{$else}
i := (min_index + max_index) shr 1;
{$endif}
if min_index > i then
Break; //exit
end
else //if (temp < sum) then
begin
if i = max_index then
Break; //exit

min_index := i + 1;

{$ifdef debug_and_trace}
next_index := (min_index + max_index+1) shr 1;
Assert(next_index > i);
i := next_index;
{$else}
i := (min_index + max_index + 1) shr 1;
{$endif}
end;
end;

Result := nil;
end;



这样,比原来多了个排序开销,
但原来最内层的256次操作,通过二分查找法降低到最多8次,即使算上二分查找和调用的开销,
也应该低于原来的256次操作

我在这台P4 1.8G上测得

32个数,无解, 1.3秒返回

本来想把最内两层的65536次操作,都通过二分查找法来作的,这样可以把操作从65536次降低到最多16次,
就远远快于原来的操作了,但是考虑到预先的排序过程确是比较慢的,65536个数排序最坏时复杂度
是$FFFFFFFF,这就得不偿失了


耙子 2006-02-08
  • 打赏
  • 举报
回复
跑哪儿过年去了?
我再看看,谢谢。

D2006的确能拷贝CPU VIEW了。D2005以前的还是不行。
alphax 2006-02-07
  • 打赏
  • 举报
回复
你这段
if sum=t then begin

|. 8B45 F8 |MOV EAX,DWORD PTR SS:[EBP-8]
|. 8B80 5C040000 |MOV EAX,DWORD PTR DS:[EAX+45C] //取sum
|. 99 |CDQ //带符号将sum扩展成双字
|. 52 |PUSH EDX
|. 50 |PUSH EAX //将双字推到stack

|. 8BC6 |MOV EAX,ESI //扩展t, 低位在eax
|. 33D2 |XOR EDX,EDX //高位在edx, 为0

|. 3B5424 04 |CMP EDX,DWORD PTR SS:[ESP+4] //比较先前push的edx
|. 75 03 |JNZ SHORT TakeApar.004595B7
|. 3B0424 |CMP EAX,DWORD PTR SS:[ESP] //比较先前push的eax
|> 5A |POP EDX //平衡stack
|. 58 |POP EAX //平衡stack
|. 75 05 |JNZ SHORT TakeApar.004595C0 //如果同则....

看起来应该是一个integer和cardinal,进行比较,很可能你的t是cardinal,而sum是integer,在这种情况下,编译是正确的

后来,你先减sum然后在测试是否为0,代码就简化了,因为加减法实质上是无符号的,而测试0就一条指令
加载更多回复(5)

16,749

社区成员

发帖
与我相关
我的任务
社区描述
Delphi 语言基础/算法/系统设计
社区管理员
  • 语言基础/算法/系统设计社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧