/* using two 256M array to record the existing state is the best way. */
int duplicated(int all[], int size)
{
bit st1[256M] = {0}, st2[256M] = {0};
int cnt = 0;
for(dat : all)// for each data in all-data set
{
if (dat < 2^31) {
if(st1[dat] == 0) st1[dat] = 1;
else if(st2[dat] == 0) { st2[dat] = 1; cnt++; }
}
}
reset st1 and st2;
for(dat : all)// for each data in all-data set
{
if (dat >= 2^31) {
dat -= 2^31;
if(st1[dat] == 0) st1[dat] = 1;
else if(st2[dat] == 0) { st2[dat] = 1; cnt++; }
}
}
return cnt;
}
/* the time complexity is not a constant, O(1), it's O(2 * size/2), it means O(n). */
/* space complexity is O(1), a constant size depending on your processing division.*/
/* if we need to process one billion data with type int, this algorithm can also */
/* work well. because the precondition: 0 <= all[i] < 2^32. */
/* Whitout real implementation and full test, if any bug, any reply will be */
/* appreciated. */