关于LZW的实现

yuanwh 2002-05-10 06:17:20

我根据书上的内容自己实现LZW算法，但速度慢得不得了。后来将查字典部分用二叉树加Hash的办法实现，但还是很慢。压缩一个24bit，512*512位图(大小约700多k)的时间是用winzip的10倍以上。请问用什么方法可提高其速度？请不吝赐教！

...全文

141 6 打赏收藏转发到动态举报

写回复

用AI写文章

6 条回复

切换为时间正序

请发表友善的回复…

发表回复

yuanwh 2002-05-15

打赏
举报

真的不好意思，又麻烦你。hash那段程序真的看不明白。到底设计思路是怎样的，能给我讲讲吗？如果嫌麻烦，告诉我一个让URL我自己看也行。
谢谢了！

coldcrane 2002-05-14

打赏
举报

这是lzw的一个相当简单的实现办法：
file到file的编码，解码
编码的位长自行确定。
hash部分(find_match())可供参考,其它没什么

一个例子:
#include <stdio.h>
#include "lzw.h"

main(int argc, char *argv[])
{
FILE *input_file;
FILE *output_file;
FILE *lzw_file;
char input_file_name[81];

/*
** The three buffers are needed for the compression phase.
*/
code_value=malloc(TABLE_SIZE*sizeof(unsigned int));
prefix_code=malloc(TABLE_SIZE*sizeof(unsigned int));
append_character=malloc(TABLE_SIZE*sizeof(unsigned char));
if (code_value==NULL || prefix_code==NULL || append_character==NULL)
{
printf("Fatal error allocating table space!\n");
exit();
}
/*
** Get the file name, open it up, and open up the lzw output file.
*/
if (argc>1)
strcpy(input_file_name,argv[1]);
else
{
printf("Input file name? ");
scanf("%s",input_file_name);
}
input_file=fopen(input_file_name,"rb");
lzw_file=fopen("test.lzw","wb");
if (input_file==NULL || lzw_file==NULL)
{
printf("Fatal error opening files.\n");
exit();
};
/*
** Compress the file.
*/
compress(input_file,lzw_file);
fclose(input_file);
fclose(lzw_file);
free(code_value);

/*
** Now open the files for the expansion.
*/
lzw_file=fopen("test.lzw","rb");
output_file=fopen("test.out","wb");

if (lzw_file==NULL || output_file==NULL)
{
printf("Fatal error opening files.\n");
exit();
};

/*
** Expand the file.
*/
expand(lzw_file,output_file);
fclose(lzw_file);
fclose(output_file);

free(prefix_code);
free(append_character);
}

coldcrane 2002-05-13

打赏
举报

#define BITS 13 /* Setting the number of bits to 12, 13*/
#define HASHING_SHIFT BITS-8 /* or 14 affects several constants. */
#define MAX_VALUE (1 << BITS) - 1 /* Note that MS-DOS machines need to */
#define MAX_CODE MAX_VALUE - 1 /* compile their code in large model if*/
/* 14 bits are selected. */
#if BITS == 14
#define TABLE_SIZE 18041 /* The string table size needs to be a */
#endif /* prime number that is somewhat larger*/
#if BITS == 13 /* than 2**BITS. */
#define TABLE_SIZE 9029
#endif
#if BITS <= 12
#define TABLE_SIZE 5021
#endif

void *malloc();

int *code_value; /* This is the code value array */
unsigned int *prefix_code; /* This array holds the prefix codes */
unsigned char *append_character; /* This array holds the appended chars */
unsigned char decode_stack[4000]; /* This array holds the decoded string */

/*
** This is the compression routine. The code should be a fairly close
** match to the algorithm accompanying the article.
**
*/

compress(FILE *input,FILE *output)
{
unsigned int next_code;
unsigned int character;
unsigned int string_code;
unsigned int index;
int i;

next_code=256; /* Next code is the next available string code*/
for (i=0;i<TABLE_SIZE;i++) /* Clear out the string table before starting */
code_value[i]=-1;

i=0;
printf("Compressing...\n");
string_code=getc(input); /* Get the first code */
/*
** This is the main loop where it all happens. This loop runs util all of
** the input has been exhausted. Note that it stops adding codes to the
** table after all of the possible codes have been defined.
*/
while ((character=getc(input)) != (unsigned)EOF)
{
if (++i==1000) /* Print a * every 1000 */
{ /* input characters. This */
i=0; /* is just a pacifier. */
printf("*");
}
index=find_match(string_code,character);/* See if the string is in */
if (code_value[index] != -1) /* the table. If it is, */
string_code=code_value[index]; /* get the code value. If */
else /* the string is not in the*/
{ /* table, try to add it. */
if (next_code <= MAX_CODE)
{
code_value[index]=next_code++;
prefix_code[index]=string_code;
append_character[index]=character;
}
output_code(output,string_code); /* When a string is found */
string_code=character; /* that is not in the table*/
} /* I output the last string*/
} /* after adding the new one*/
/*
** End of the main loop.
*/
output_code(output,string_code); /* Output the last code */
output_code(output,MAX_VALUE); /* Output the end of buffer code */
output_code(output,0); /* This code flushes the output buffer*/
printf("\n");
}

/*
** This is the hashing routine. It tries to find a match for the prefix+char
** string in the string table. If it finds it, the index is returned. If
** the string is not found, the first available index in the string table is
** returned instead.
*/

find_match(int hash_prefix,unsigned int hash_character)
{
int index;
int offset;

index = (hash_character << HASHING_SHIFT) ^ hash_prefix;
if (index == 0)
offset = 1;
else
offset = TABLE_SIZE - index;
while (1)
{
if (code_value[index] == -1)
return(index);
if (prefix_code[index] == hash_prefix &&
append_character[index] == hash_character)
return(index);
index -= offset;
if (index < 0)
index += TABLE_SIZE;
}
}

/*
** This is the expansion routine. It takes an LZW format file, and expands
** it to an output file. The code here should be a fairly close match to
** the algorithm in the accompanying article.
*/

expand(FILE *input,FILE *output)
{
unsigned int next_code;
unsigned int new_code;
unsigned int old_code;
int character;
int counter;
unsigned char *string;
char *decode_string(unsigned char *buffer,unsigned int code);

next_code=256; /* This is the next available code to define */
counter=0; /* Counter is used as a pacifier. */
printf("Expanding...\n");

old_code=input_code(input); /* Read in the first code, initialize the */
character=old_code; /* character variable, and send the first */
putc(old_code,output); /* code to the output file */
/*
** This is the main expansion loop. It reads in characters from the LZW file
** until it sees the special code used to inidicate the end of the data.
*/
while ((new_code=input_code(input)) != (MAX_VALUE))
{
if (++counter==1000) /* This section of code prints out */
{ /* an asterisk every 1000 characters */
counter=0; /* It is just a pacifier. */
printf("*");
}
/*
** This code checks for the special STRING+CHARACTER+STRING+CHARACTER+STRING
** case which generates an undefined code. It handles it by decoding
** the last code, and adding a single character to the end of the decode string.
*/
if (new_code>=next_code)
{
*decode_stack=character;
string=decode_string(decode_stack+1,old_code);
}
/*
** Otherwise we do a straight decode of the new code.
*/
else
string=decode_string(decode_stack,new_code);
/*
** Now we output the decoded string in reverse order.
*/
character=*string;
while (string >= decode_stack)
putc(*string--,output);
/*
** Finally, if possible, add a new code to the string table.
*/
if (next_code <= MAX_CODE)
{
prefix_code[next_code]=old_code;
append_character[next_code]=character;
next_code++;
}
old_code=new_code;
}
printf("\n");
}

/*
** This routine simply decodes a string from the string table, storing
** it in a buffer. The buffer can then be output in reverse order by
** the expansion program.
*/

char *decode_string(unsigned char *buffer,unsigned int code)
{
int i;

i=0;
while (code > 255)
{
*buffer++ = append_character[code];
code=prefix_code[code];
if (i++>=4094)
{
printf("Fatal error during code expansion.\n");
exit();
}
}
*buffer=code;
return(buffer);
}

/*
** The following two routines are used to output variable length
** codes. They are written strictly for clarity, and are not
** particularyl efficient.
*/

input_code(FILE *input)
{
unsigned int return_value;
static int input_bit_count=0;
static unsigned long input_bit_buffer=0L;

while (input_bit_count <= 24)
{
input_bit_buffer |=
(unsigned long) getc(input) << (24-input_bit_count);
input_bit_count += 8;
}
return_value=input_bit_buffer >> (32-BITS);
input_bit_buffer <<= BITS;
input_bit_count -= BITS;
return(return_value);
}

output_code(FILE *output,unsigned int code)
{
static int output_bit_count=0;
static unsigned long output_bit_buffer=0L;

output_bit_buffer |= (unsigned long) code << (32-BITS-output_bit_count);
output_bit_count += BITS;
while (output_bit_count >= 8)
{
putc(output_bit_buffer >> 24,output);
output_bit_buffer <<= 8;
output_bit_count -= 8;
}
}

yuanwh 2002-05-13

打赏
举报

我的代码如下，还请高手们赐教：（不过恐怕很难看得明，所最好能给我介绍一下基本的思路）

struct _DictionaryLZW{
long nPosition;//字符串在源码中的起始位置
short nLen;//字符串的长度
short nLess;//小于该字符串的串在字典中的入口
short nGreater;//大于......
};

#define FIRSTINDEX 258
#define DICSIZE 4098

static void InitLZWDic(struct _DictionaryLZW* lpDic)
{
int i;
for (i = FIRSTINDEX; i < DICSIZE; i++)
lpDic[i].nGreater = lpDic[i].nLess = lpDic[i].nPosition = lpDic[i].nLen = -1;
}

static void InitEntry(short* lpEntrys)
{
for (int i = 0; i < 256; i++)
lpEntrys[i] = -1;
}

static int FindLZWDic(const LPBYTE lpSource, /*int nSourceLen,*/ const struct _DictionaryLZW* lpDic,
const LPBYTE szPrefix_C, int nPrefix, const short* lpEntrys)
{
int i, j;
BYTE c = szPrefix_C[0];
if (nPrefix == 0)
return c;
if (lpEntrys[c] != -1) {
i = lpEntrys[c];
do {
if (nPrefix < lpDic[i].nLen) {
j = memcmp(szPrefix_C, lpSource + lpDic[i].nPosition, nPrefix + 1);
if (j == 0)
return i;
} else
j = memcmp(szPrefix_C, lpSource + lpDic[i].nPosition, lpDic[i].nLen);
if (j > 0)
i = lpDic[i].nGreater;
else
i = lpDic[i].nLess;
} while (i != -1);
}
return -1;
}

static void AddDic(int nEntry, struct _DictionaryLZW* lpDic, long nPosition, short nLen, const LPBYTE lpSource,
short* lpEntrys)
{
lpDic[nEntry].nPosition = nPosition;
lpDic[nEntry].nLen = nLen;
lpDic[nEntry].nGreater = lpDic[nEntry].nLess = -1;
if (lpEntrys[lpSource[nPosition]] == -1)
lpEntrys[lpSource[nPosition]] = nEntry;
else {
short i = lpEntrys[lpSource[nPosition]];
int j;
L1:
if (nLen < lpDic[i].nLen)
j = memcmp(lpSource + nPosition, lpSource + lpDic[i].nPosition, nLen);
else
j = memcmp(lpSource + nPosition, lpSource + lpDic[i].nPosition, lpDic[i].nLen);
if (j > 0) {
if (lpDic[i].nGreater == -1) {
lpDic[i].nGreater = nEntry;
return;
} else {
i = lpDic[i].nGreater;
goto L1;
}
} else {
if (lpDic[i].nLess == -1) {
lpDic[i].nLess = nEntry;
return;
} else {
i = lpDic[i].nLess;
goto L1;
}
}
}
}

static void OutputStream(WORD nCode, CMemFile* lpMem, BOOL bDirectWrite = FALSE)
{
static BYTE Triplet[3];
static BOOL bWrite = FALSE;
if (!bWrite) {
Triplet[0] = LOBYTE(nCode);
Triplet[1] = HIBYTE(nCode);
if (bDirectWrite)
lpMem->Write(Triplet, 2);
else
bWrite = TRUE;
} else {
Triplet[1] |= ((nCode & 0xF) << 4);
Triplet[2] = (nCode & 0x0FF0) >> 4;
lpMem->Write(Triplet, sizeof(Triplet));
bWrite = FALSE;
}
}

void EncodeLZW(const LPBYTE lpSource, int nSourceLen, LPBYTE *lpDest, int *nDestLen)
{
CMemFile memDest;
BYTE szPrefix[256];

long i, j, k;
short DicEntry[256];
short nPrefix, nCurCode, nLastCode;
struct _DictionaryLZW DicIndex[DICSIZE];
InitLZWDic(DicIndex);
InitEntry(DicEntry);
j = FIRSTINDEX;
nPrefix = 0;

for (k = 0, i = 0; i < nSourceLen; i++) {
szPrefix[nPrefix] = lpSource[i];
nCurCode = FindLZWDic(lpSource, /*nSourceLen,*/ DicIndex, szPrefix, nPrefix, DicEntry);
if (nCurCode != -1 && nPrefix < 255) {

nPrefix++;
nLastCode = nCurCode;
} else {
OutputStream(nLastCode, &memDest);

AddDic(j, DicIndex, k, nPrefix + 1, lpSource, DicEntry);

k = i;
nPrefix = 0;
i--;
j++;
if (j == DICSIZE) {
j = FIRSTINDEX;
InitLZWDic(DicIndex);
InitEntry(DicEntry);
OutputStream(256, &memDest);

}
}
}
OutputStream(nLastCode, &memDest);
OutputStream(257, &memDest, TRUE);
*nDestLen = memDest.GetLength();
*lpDest = memDest.Detach();

}

yuanwh 2002-05-13