数据量大的快速排序,内存不够?

zkw261123 2017-03-21 08:28:45
我现在在写一个有关快速排序的程序,准备研究怎么选支点的问题。
但是遇到一个问题,当数据量超过1万个时,程序的要么直接“按任意键退出”,或者输出有问题(前面正确,到后面大约第6000个以上后开始输出-85xxxx,貌似是溢出?),检查代码发现并不是代码的问题。
百度了一下发现是程序的内存不够,于是我按照百度的方法,在调试的时候给程序分配了10M的空间,上面的状况还是偶有发生。
我的编译器是VC++ 6.0,我想问一下有没有更好的解决办法?
...全文
962 10 打赏 收藏 转发到动态 举报
写回复
用AI写文章
10 条回复
切换为时间正序
请发表友善的回复…
发表回复
赵4老师 2017-03-28
  • 打赏
  • 举报
回复

//...接上帖
int main(int argc,char **argv) {
	int	w;
	__int64	fl;

	if (argc<3)	{
	USAGE:
		printf("Usage: fqsort filename.ext width [start len]\n");
		return 1;
	}
	w=atoi(argv[2]);
	if (w<=0) goto USAGE;
	if (NULL==_fullpath(fullpath,argv[1],_MAX_PATH)) {
		printf("Can not get fullpath of %s!\n",argv[1]);
		return 2;
	}
	fb=fopen(fullpath,"rb+");
	if (NULL==fb) {
		printf("Can not open file [%s]!\n",argv[1]);
		return 3;
	}
	fl=_filelengthi64(fileno(fb));
	if (-1i64==fl) {
		fclose(fb);
		printf("Can not get size of file [%s]!\n",argv[1]);
		return 4;
	}
	if (fl%w) {
		fclose(fb);
		printf("The size of file [%s] %I64d %% width %d != 0\n",argv[1],fl,w);
		return 5;
	}
	start=0;len=w;
	if (argc>=4) {
		start=atoi(argv[3]);
		if (start<0	|| w<=start) start=0;
	}
	if (argc>=5) {
		len=atoi(argv[4]);
		if (len<1 || w<len)	len=w;
		if (start+len>w) len=w-start;
	}
	A=(char	*)malloc(w);
	if (NULL==A) {
		fclose(fb);
		printf("Can not A=malloc(%d)!\n",w);
		return 6;
	}
	B=(char	*)malloc(w);
	if (NULL==B) {
		fclose(fb);
		printf("Can not B=malloc(%d)!\n",w);
		return 6;
	}
	fqsort(0,fl/w,w,cmp);
	free(B);
	free(A);
	fclose(fb);
	printf("\nfqsort file [%s] width %d OK.\n",argv[1],w);
	return 0;
}
赵4老师 2017-03-28
  • 打赏
  • 举报
回复
引用 8 楼 zhao4zhong1 的回复:
[quote=引用 5 楼 zkw261123 的回复:] [quote=引用 4 楼 zhao4zhong1 的回复:] 用文件读写模拟内存读写。
先读一部分到内存,然后排完后写到另一个文件,在读一部分,再排,是这样吗?[/quote] 不是。[/quote] 仅供参考:
#pragma	warning(disable:4996 4244)
#include <stdio.h>
#include <io.h>
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
#include <assert.h>
int	MAXW;
char *A,*B;
FILE *fb;
char fullpath[_MAX_PATH];
int	start,len;
long long int n=0;
#define	CUTOFF 8			/* testing shows that this is good value */

/* Note: the theoretical number	of stack entries required is
   no more than	1 +	log2(num).	But	we switch to insertion
   sort	for	CUTOFF elements	or less, so	we really only need
   1 + log2(num) - log2(CUTOFF)	stack entries.	For	a CUTOFF
   of 8, that means	we need	no more	than 62	stack entries for
   64-bit platforms. */
//#define STKSIZ (8*sizeof(void*) -	2)
#define	STKSIZ	 (8*8			  -	2)

/***
*swap(a, b,	w) - swap two elements
*
*Purpose:
*		swaps the two file elements	of size	width
*
*Entry:
*		long long int a, b = pointer to	two	elements to	swap
*		long long int width	= width	in bytes of	each file element
*
*Exit:
*		returns	void
*
*Exceptions:
*
*******************************************************************************/

static void	swap(
	long long int a,
	long long int b,
	long long int w
	)
{
	if ( a != b	) {
		_fseeki64(fb,a,SEEK_SET);
		fread(A,w,1,fb);
		_fseeki64(fb,b,SEEK_SET);
		fread(B,w,1,fb);
		_fseeki64(fb,a,SEEK_SET);
		fwrite(B,w,1,fb);
		_fseeki64(fb,b,SEEK_SET);
		fwrite(A,w,1,fb);
		n++;
		if (n%1000==0) {printf(".");fflush(stdout);}
	}
}

/***
*shortsort(hi, lo, width, comp)	- insertion	sort for sorting short file
*
*Purpose:
*		sorts the file of elements between lo and hi (inclusive)
*		side effects:  sorts in	place
*		assumes	that lo	< hi
*
*Entry:
*		long long int lo = pointer to low element to sort
*		long long int hi = pointer to high element to sort
*		long long int width	= width	in bytes of	each file element
*		int	(*comp)() =	pointer	to function	returning analog of	strcmp for
*				strings, but supplied by user for comparing	the	file elements.
*				it accepts 2 pointers to elements, together	with a pointer to a	context
*				(if	present). Returns neg if p1<p2,	0 if p1==p2, pos if	p1>p2.
*
*Exit:
*		returns	void
*
*Exceptions:
*
*******************************************************************************/

static void	shortsort(
	long long int lo,
	long long int hi,
	long long int width,
	int	(*comp)(long long int, long	long int,long long int)
	)
{
	long long int p, maxp;

	/* Note: in	assertions below, i	and	j are alway	inside original	bound of
	   file	to sort. */

	while (hi >	lo)	{
		/* A[i]	<= A[j]	for	i <= j,	j >	hi */
		maxp = lo;
		for	(p = lo+width; p <=	hi;	p += width)	{
			/* A[i]	<= A[maxp] for lo <= i < p */
			if (comp(p,	maxp, width) > 0) {
				maxp = p;
			}
			/* A[i]	<= A[maxp] for lo <= i <= p	*/
		}

		/* A[i]	<= A[maxp] for lo <= i <= hi */

		swap(maxp, hi, width);

		/* A[i]	<= A[hi] for i <= hi, so A[i] <= A[j] for i	<= j, j	>= hi */

		hi -= width;

		/* A[i]	<= A[j]	for	i <= j,	j >	hi,	loop top condition established */
	}
	/* A[i]	<= A[j]	for	i <= j,	j >	lo,	which implies A[i] <= A[j] for i < j,
	   so file is sorted */
}

void fqsort(
	long long int base,
	long long int num,
	long long int width,
	int	(*comp)(long long int, long	long int, long long	int)
	)
{
	long long int lo, hi;			   /* ends of sub-file currently sorting */
	long long int mid;					/* points to middle	of subarray	*/
	long long int loguy, higuy;		   /* traveling	pointers for partition step	*/
	long long int size;				   /* size of the sub-file */
	long long int lostk[STKSIZ], histk[STKSIZ];
	int	stkptr;					/* stack for saving	sub-file to	be processed */

	/* validation section */
	assert(base>=0 && num >	0);
	assert(width > 0);
	assert(comp	!= NULL);

	if (num	< 2)
		return;					/* nothing to do */

	stkptr = 0;					/* initialize stack	*/

	lo = base;
	hi = base +	width *	(num-1);		/* initialize limits */

	/* this	entry point	is for pseudo-recursion	calling: setting
	   lo and hi and jumping to	here is	like recursion,	but	stkptr is
	   preserved, locals aren't, so we preserve stuff on the stack */
recurse:

	size = (hi - lo) / width + 1;		 /*	number of el's to sort */

	/* below a certain size, it	is faster to use a O(n^2) sorting method */
	if (size <=	CUTOFF)	{
		shortsort(lo, hi, width, comp);
	}
	else {
		/* First we	pick a partitioning	element.  The efficiency of	the
		   algorithm demands that we find one that is approximately	the	median
		   of the values, but also that	we select one fast.	 We	choose the
		   median of the first,	middle,	and	last elements, to avoid	bad
		   performance in the face of already sorted data, or data that	is made
		   up of multiple sorted runs appended together.  Testing shows	that a
		   median-of-three algorithm provides better performance than simply
		   picking the middle element for the latter case. */

		mid	= lo + (size / 2) *	width;		/* find	middle element */

		/* Sort	the	first, middle, last	elements into order	*/
		if (comp(lo, mid, width) > 0) {
			swap(lo, mid, width);
		}
		if (comp(lo, hi, width)	> 0) {
			swap(lo, hi, width);
		}
		if (comp(mid, hi, width) > 0) {
			swap(mid, hi, width);
		}

		/* We now wish to partition	the	file into three	pieces,	one	consisting
		   of elements <= partition	element, one of	elements equal to the
		   partition element, and one of elements >	than it.  This is done
		   below; comments indicate	conditions established at every	step. */

		loguy =	lo;
		higuy =	hi;

		/* Note	that higuy decreases and loguy increases on	every iteration,
		   so loop must	terminate. */
		for	(;;) {
			/* lo <= loguy < hi, lo	< higuy	<= hi,
			   A[i]	<= A[mid] for lo <=	i <= loguy,
			   A[i]	> A[mid] for higuy <= i	< hi,
			   A[hi] >=	A[mid] */

			/* The doubled loop	is to avoid	calling	comp(mid,mid,width), since some
			   existing	comparison funcs don't work when passed the same
			   value for both pointers.	*/

			if (mid	> loguy) {
				do	{
					loguy += width;
				} while	(loguy < mid &&	comp(loguy,	mid, width)	<= 0);
			}
			if (mid	<= loguy) {
				do	{
					loguy += width;
				} while	(loguy <= hi &&	comp(loguy,	mid, width)	<= 0);
			}

			/* lo <	loguy <= hi+1, A[i]	<= A[mid] for lo <=	i <	loguy,
			   either loguy	> hi or	A[loguy] > A[mid] */

			do	{
				higuy -= width;
			} while	(higuy > mid &&	comp(higuy,	mid, width)	> 0);

			/* lo <= higuy < hi, A[i] >	A[mid] for higuy < i < hi,
			   either higuy	== lo or A[higuy] <= A[mid]	*/

			if (higuy <	loguy)
				break;

			/* if loguy	> hi or	higuy == lo, then we would have	exited,	so
			   A[loguy]	> A[mid], A[higuy] <= A[mid],
			   loguy <=	hi,	higuy >	lo */

			swap(loguy,	higuy, width);

			/* If the partition	element	was	moved, follow it.  Only	need
			   to check	for	mid	== higuy, since	before the swap,
			   A[loguy]	> A[mid] implies loguy != mid. */

			if (mid	== higuy)
				mid	= loguy;

			/* A[loguy]	<= A[mid], A[higuy]	> A[mid]; so condition at top
			   of loop is re-established */
		}

		/*	   A[i]	<= A[mid] for lo <=	i <	loguy,
			   A[i]	> A[mid] for higuy < i < hi,
			   A[hi] >=	A[mid]
			   higuy < loguy
		   implying:
			   higuy ==	loguy-1
			   or higuy	== hi -	1, loguy ==	hi + 1,	A[hi] == A[mid]	*/

		/* Find	adjacent elements equal	to the partition element.  The
		   doubled loop	is to avoid	calling	comp(mid,mid,width), since some
		   existing	comparison funcs don't work when passed the same value
		   for both	pointers. */

		higuy += width;
		if (mid	< higuy) {
			do	{
				higuy -= width;
			} while	(higuy > mid &&	comp(higuy,	mid, width)	== 0);
		}
		if (mid	>= higuy) {
			do	{
				higuy -= width;
			} while	(higuy > lo	&& comp(higuy, mid,	width) == 0);
		}

		/* OK, now we have the following:
			  higuy	< loguy
			  lo <=	higuy <= hi
			  A[i]	<= A[mid] for lo <=	i <= higuy
			  A[i]	== A[mid] for higuy	< i	< loguy
			  A[i]	>  A[mid] for loguy	<= i < hi
			  A[hi]	>= A[mid] */

		/* We've finished the partition, now we want to sort the subarrays
		   [lo,	higuy] and [loguy, hi].
		   We do the smaller one first to minimize stack usage.
		   We only sort	arrays of length 2 or more.*/

		if ( higuy - lo	>= hi -	loguy )	{
			if (lo < higuy)	{
				lostk[stkptr] =	lo;
				histk[stkptr] =	higuy;
				++stkptr;
			}							/* save	big	recursion for later	*/

			if (loguy <	hi)	{
				lo = loguy;
				goto recurse;			/* do small	recursion */
			}
		}
		else {
			if (loguy <	hi)	{
				lostk[stkptr] =	loguy;
				histk[stkptr] =	hi;
				++stkptr;				/* save	big	recursion for later	*/
			}

			if (lo < higuy)	{
				hi = higuy;
				goto recurse;			/* do small	recursion */
			}
		}
	}

	/* We have sorted the file,	except for any pending sorts on	the	stack.
	   Check if	there are any, and do them.	*/

	--stkptr;
	if (stkptr >= 0) {
		lo = lostk[stkptr];
		hi = histk[stkptr];
		goto recurse;			/* pop subarray	from stack */
	}
	else
		return;					/* all subarrays done */
}
int	cmp(long long int a, long long int b, long long	int	w) {
	_fseeki64(fb,a,SEEK_SET);
	fread(A,w,1,fb);
	_fseeki64(fb,b,SEEK_SET);
	fread(B,w,1,fb);
	return strncmp(A+start,B+start,len);
}
//...未完待续
赵4老师 2017-03-27
  • 打赏
  • 举报
回复
引用 5 楼 zkw261123 的回复:
[quote=引用 4 楼 zhao4zhong1 的回复:] 用文件读写模拟内存读写。
先读一部分到内存,然后排完后写到另一个文件,在读一部分,再排,是这样吗?[/quote] 不是。
ri_aje 2017-03-26
  • 打赏
  • 举报
回复
10M 都搞不定?放在堆上老老实实排就行了,太大了内存放不下用外排序。
zkw261123 2017-03-25
  • 打赏
  • 举报
回复
引用 4 楼 zhao4zhong1 的回复:
用文件读写模拟内存读写。
先读一部分到内存,然后排完后写到另一个文件,在读一部分,再排,是这样吗?
GKatHere 2017-03-25
  • 打赏
  • 举报
回复
数据量大,不要用递归(比如二分法递归)。 如是递归,改下吧
paschen 2017-03-23
  • 打赏
  • 举报
回复
引用 2 楼 zkw261123 的回复:
[quote=引用 1 楼 paschen 的回复:] 你的情况应该在堆上分配数组,而不是在栈上
那我可以通过减小数据量,增加排序次数来解决吗?因为数据量减少了数组大小也会减少[/quote] 不仅仅是排序的过程,数据太多,其本身的存储栈上的数组就不够 其次这样反而得不偿失,排序效率也会变低
赵4老师 2017-03-23
  • 打赏
  • 举报
回复
用文件读写模拟内存读写。
paschen 2017-03-22
  • 打赏
  • 举报
回复
你的情况应该在堆上分配数组,而不是在栈上
zkw261123 2017-03-22
  • 打赏
  • 举报
回复
引用 1 楼 paschen 的回复:
你的情况应该在堆上分配数组,而不是在栈上
那我可以通过减小数据量,增加排序次数来解决吗?因为数据量减少了数组大小也会减少

3,882

社区成员

发帖
与我相关
我的任务
社区描述
C/C++ 其它技术问题
社区管理员
  • 其它技术问题社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧