3,882
社区成员
发帖
与我相关
我的任务
分享
//...接上帖
int main(int argc,char **argv) {
int w;
__int64 fl;
if (argc<3) {
USAGE:
printf("Usage: fqsort filename.ext width [start len]\n");
return 1;
}
w=atoi(argv[2]);
if (w<=0) goto USAGE;
if (NULL==_fullpath(fullpath,argv[1],_MAX_PATH)) {
printf("Can not get fullpath of %s!\n",argv[1]);
return 2;
}
fb=fopen(fullpath,"rb+");
if (NULL==fb) {
printf("Can not open file [%s]!\n",argv[1]);
return 3;
}
fl=_filelengthi64(fileno(fb));
if (-1i64==fl) {
fclose(fb);
printf("Can not get size of file [%s]!\n",argv[1]);
return 4;
}
if (fl%w) {
fclose(fb);
printf("The size of file [%s] %I64d %% width %d != 0\n",argv[1],fl,w);
return 5;
}
start=0;len=w;
if (argc>=4) {
start=atoi(argv[3]);
if (start<0 || w<=start) start=0;
}
if (argc>=5) {
len=atoi(argv[4]);
if (len<1 || w<len) len=w;
if (start+len>w) len=w-start;
}
A=(char *)malloc(w);
if (NULL==A) {
fclose(fb);
printf("Can not A=malloc(%d)!\n",w);
return 6;
}
B=(char *)malloc(w);
if (NULL==B) {
fclose(fb);
printf("Can not B=malloc(%d)!\n",w);
return 6;
}
fqsort(0,fl/w,w,cmp);
free(B);
free(A);
fclose(fb);
printf("\nfqsort file [%s] width %d OK.\n",argv[1],w);
return 0;
}
#pragma warning(disable:4996 4244)
#include <stdio.h>
#include <io.h>
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
#include <assert.h>
int MAXW;
char *A,*B;
FILE *fb;
char fullpath[_MAX_PATH];
int start,len;
long long int n=0;
#define CUTOFF 8 /* testing shows that this is good value */
/* Note: the theoretical number of stack entries required is
no more than 1 + log2(num). But we switch to insertion
sort for CUTOFF elements or less, so we really only need
1 + log2(num) - log2(CUTOFF) stack entries. For a CUTOFF
of 8, that means we need no more than 62 stack entries for
64-bit platforms. */
//#define STKSIZ (8*sizeof(void*) - 2)
#define STKSIZ (8*8 - 2)
/***
*swap(a, b, w) - swap two elements
*
*Purpose:
* swaps the two file elements of size width
*
*Entry:
* long long int a, b = pointer to two elements to swap
* long long int width = width in bytes of each file element
*
*Exit:
* returns void
*
*Exceptions:
*
*******************************************************************************/
static void swap(
long long int a,
long long int b,
long long int w
)
{
if ( a != b ) {
_fseeki64(fb,a,SEEK_SET);
fread(A,w,1,fb);
_fseeki64(fb,b,SEEK_SET);
fread(B,w,1,fb);
_fseeki64(fb,a,SEEK_SET);
fwrite(B,w,1,fb);
_fseeki64(fb,b,SEEK_SET);
fwrite(A,w,1,fb);
n++;
if (n%1000==0) {printf(".");fflush(stdout);}
}
}
/***
*shortsort(hi, lo, width, comp) - insertion sort for sorting short file
*
*Purpose:
* sorts the file of elements between lo and hi (inclusive)
* side effects: sorts in place
* assumes that lo < hi
*
*Entry:
* long long int lo = pointer to low element to sort
* long long int hi = pointer to high element to sort
* long long int width = width in bytes of each file element
* int (*comp)() = pointer to function returning analog of strcmp for
* strings, but supplied by user for comparing the file elements.
* it accepts 2 pointers to elements, together with a pointer to a context
* (if present). Returns neg if p1<p2, 0 if p1==p2, pos if p1>p2.
*
*Exit:
* returns void
*
*Exceptions:
*
*******************************************************************************/
static void shortsort(
long long int lo,
long long int hi,
long long int width,
int (*comp)(long long int, long long int,long long int)
)
{
long long int p, maxp;
/* Note: in assertions below, i and j are alway inside original bound of
file to sort. */
while (hi > lo) {
/* A[i] <= A[j] for i <= j, j > hi */
maxp = lo;
for (p = lo+width; p <= hi; p += width) {
/* A[i] <= A[maxp] for lo <= i < p */
if (comp(p, maxp, width) > 0) {
maxp = p;
}
/* A[i] <= A[maxp] for lo <= i <= p */
}
/* A[i] <= A[maxp] for lo <= i <= hi */
swap(maxp, hi, width);
/* A[i] <= A[hi] for i <= hi, so A[i] <= A[j] for i <= j, j >= hi */
hi -= width;
/* A[i] <= A[j] for i <= j, j > hi, loop top condition established */
}
/* A[i] <= A[j] for i <= j, j > lo, which implies A[i] <= A[j] for i < j,
so file is sorted */
}
void fqsort(
long long int base,
long long int num,
long long int width,
int (*comp)(long long int, long long int, long long int)
)
{
long long int lo, hi; /* ends of sub-file currently sorting */
long long int mid; /* points to middle of subarray */
long long int loguy, higuy; /* traveling pointers for partition step */
long long int size; /* size of the sub-file */
long long int lostk[STKSIZ], histk[STKSIZ];
int stkptr; /* stack for saving sub-file to be processed */
/* validation section */
assert(base>=0 && num > 0);
assert(width > 0);
assert(comp != NULL);
if (num < 2)
return; /* nothing to do */
stkptr = 0; /* initialize stack */
lo = base;
hi = base + width * (num-1); /* initialize limits */
/* this entry point is for pseudo-recursion calling: setting
lo and hi and jumping to here is like recursion, but stkptr is
preserved, locals aren't, so we preserve stuff on the stack */
recurse:
size = (hi - lo) / width + 1; /* number of el's to sort */
/* below a certain size, it is faster to use a O(n^2) sorting method */
if (size <= CUTOFF) {
shortsort(lo, hi, width, comp);
}
else {
/* First we pick a partitioning element. The efficiency of the
algorithm demands that we find one that is approximately the median
of the values, but also that we select one fast. We choose the
median of the first, middle, and last elements, to avoid bad
performance in the face of already sorted data, or data that is made
up of multiple sorted runs appended together. Testing shows that a
median-of-three algorithm provides better performance than simply
picking the middle element for the latter case. */
mid = lo + (size / 2) * width; /* find middle element */
/* Sort the first, middle, last elements into order */
if (comp(lo, mid, width) > 0) {
swap(lo, mid, width);
}
if (comp(lo, hi, width) > 0) {
swap(lo, hi, width);
}
if (comp(mid, hi, width) > 0) {
swap(mid, hi, width);
}
/* We now wish to partition the file into three pieces, one consisting
of elements <= partition element, one of elements equal to the
partition element, and one of elements > than it. This is done
below; comments indicate conditions established at every step. */
loguy = lo;
higuy = hi;
/* Note that higuy decreases and loguy increases on every iteration,
so loop must terminate. */
for (;;) {
/* lo <= loguy < hi, lo < higuy <= hi,
A[i] <= A[mid] for lo <= i <= loguy,
A[i] > A[mid] for higuy <= i < hi,
A[hi] >= A[mid] */
/* The doubled loop is to avoid calling comp(mid,mid,width), since some
existing comparison funcs don't work when passed the same
value for both pointers. */
if (mid > loguy) {
do {
loguy += width;
} while (loguy < mid && comp(loguy, mid, width) <= 0);
}
if (mid <= loguy) {
do {
loguy += width;
} while (loguy <= hi && comp(loguy, mid, width) <= 0);
}
/* lo < loguy <= hi+1, A[i] <= A[mid] for lo <= i < loguy,
either loguy > hi or A[loguy] > A[mid] */
do {
higuy -= width;
} while (higuy > mid && comp(higuy, mid, width) > 0);
/* lo <= higuy < hi, A[i] > A[mid] for higuy < i < hi,
either higuy == lo or A[higuy] <= A[mid] */
if (higuy < loguy)
break;
/* if loguy > hi or higuy == lo, then we would have exited, so
A[loguy] > A[mid], A[higuy] <= A[mid],
loguy <= hi, higuy > lo */
swap(loguy, higuy, width);
/* If the partition element was moved, follow it. Only need
to check for mid == higuy, since before the swap,
A[loguy] > A[mid] implies loguy != mid. */
if (mid == higuy)
mid = loguy;
/* A[loguy] <= A[mid], A[higuy] > A[mid]; so condition at top
of loop is re-established */
}
/* A[i] <= A[mid] for lo <= i < loguy,
A[i] > A[mid] for higuy < i < hi,
A[hi] >= A[mid]
higuy < loguy
implying:
higuy == loguy-1
or higuy == hi - 1, loguy == hi + 1, A[hi] == A[mid] */
/* Find adjacent elements equal to the partition element. The
doubled loop is to avoid calling comp(mid,mid,width), since some
existing comparison funcs don't work when passed the same value
for both pointers. */
higuy += width;
if (mid < higuy) {
do {
higuy -= width;
} while (higuy > mid && comp(higuy, mid, width) == 0);
}
if (mid >= higuy) {
do {
higuy -= width;
} while (higuy > lo && comp(higuy, mid, width) == 0);
}
/* OK, now we have the following:
higuy < loguy
lo <= higuy <= hi
A[i] <= A[mid] for lo <= i <= higuy
A[i] == A[mid] for higuy < i < loguy
A[i] > A[mid] for loguy <= i < hi
A[hi] >= A[mid] */
/* We've finished the partition, now we want to sort the subarrays
[lo, higuy] and [loguy, hi].
We do the smaller one first to minimize stack usage.
We only sort arrays of length 2 or more.*/
if ( higuy - lo >= hi - loguy ) {
if (lo < higuy) {
lostk[stkptr] = lo;
histk[stkptr] = higuy;
++stkptr;
} /* save big recursion for later */
if (loguy < hi) {
lo = loguy;
goto recurse; /* do small recursion */
}
}
else {
if (loguy < hi) {
lostk[stkptr] = loguy;
histk[stkptr] = hi;
++stkptr; /* save big recursion for later */
}
if (lo < higuy) {
hi = higuy;
goto recurse; /* do small recursion */
}
}
}
/* We have sorted the file, except for any pending sorts on the stack.
Check if there are any, and do them. */
--stkptr;
if (stkptr >= 0) {
lo = lostk[stkptr];
hi = histk[stkptr];
goto recurse; /* pop subarray from stack */
}
else
return; /* all subarrays done */
}
int cmp(long long int a, long long int b, long long int w) {
_fseeki64(fb,a,SEEK_SET);
fread(A,w,1,fb);
_fseeki64(fb,b,SEEK_SET);
fread(B,w,1,fb);
return strncmp(A+start,B+start,len);
}
//...未完待续