1 程序由五个模块组成。
(1) lzw.h 定义了一些基本的数据结构,常量,还有变量的初始化等。
#ifndef __LZW_H__ #define __LZW_H__ //------------------------------------------------------------------------------ #include <stdio.h> #include <stdlib.h> #include <windows.h> #include <memory.h> //------------------------------------------------------------------------------ #define LZW_BASE 0x102// The code base #define CODE_LEN 12 // Max code length #define TABLE_LEN 4099 // It must be prime number and bigger than 2^CODE_LEN=4096. // Such as 5051 is also ok. #define BUFFERSIZE 1024 //------------------------------------------------------------------------------ typedef struct { HANDLE h_sour; // Source file handle. HANDLE h_dest; // Destination file handle. HANDLE h_suffix; // Suffix table handle. HANDLE h_prefix; // Prefix table handle. HANDLE h_code; // Code table handle. LPWORD lp_prefix; // Prefix table head pointer. LPBYTE lp_suffix; // Suffix table head pointer. LPWORD lp_code; // Code table head pointer.
WORD code; WORD prefix; BYTE suffix;
BYTE cur_code_len; // Current code length.[ used in Dynamic-Code-Length mode ]
}LZW_DATA,*PLZW_DATA;
typedef struct { WORD top; WORD index;
LPBYTE lp_buffer; HANDLE h_buffer; BYTE by_left; DWORD dw_buffer;
BOOL end_flag;
}BUFFER_DATA,*PBUFFER_DATA;
typedef struct //Stack used in decode { WORD index; HANDLE h_stack; LPBYTE lp_stack;
}STACK_DATA,*PSTACK_DATA; //------------------------------------------------------------------------------ VOID stack_create( PSTACK_DATA stack ) { stack->h_stack = GlobalAlloc( GHND , TABLE_LEN*sizeof(BYTE) ); stack->lp_stack = GlobalLock( stack->h_stack ); stack->index = 0; } //------------------------------------------------------------------------------ VOID stack_destory( PSTACK_DATA stack ) { GlobalUnlock( stack->h_stack ); GlobalFree ( stack->h_stack ); } //------------------------------------------------------------------------------ VOID buffer_create( PBUFFER_DATA buffer ) { buffer->h_buffer = GlobalAlloc( GHND, BUFFERSIZE*sizeof(BYTE) ); buffer->lp_buffer = GlobalLock( buffer->h_buffer ); buffer->top = 0; buffer->index = 0; buffer->by_left = 0; buffer->dw_buffer = 0; buffer->end_flag = FALSE; } //------------------------------------------------------------------------------ VOID buffer_destory( PBUFFER_DATA buffer ) { GlobalUnlock( buffer->h_buffer ); GlobalFree ( buffer->h_buffer ); } //------------------------------------------------------------------------------ VOID re_init_lzw( PLZW_DATA lzw ) //When code table reached its top it should { //be reinitialized. memset( lzw->lp_code, 0xFFFF, TABLE_LEN*sizeof(WORD) ); lzw->code = LZW_BASE; lzw->cur_code_len = 9; } //------------------------------------------------------------------------------ VOID lzw_create(PLZW_DATA lzw, HANDLE h_sour, HANDLE h_dest) { WORD i; lzw->h_code = GlobalAlloc( GHND, TABLE_LEN*sizeof(WORD) ); lzw->h_prefix = GlobalAlloc( GHND, TABLE_LEN*sizeof(WORD) ); lzw->h_suffix = GlobalAlloc( GHND, TABLE_LEN*sizeof(BYTE) ); lzw->lp_code = GlobalLock( lzw->h_code ); lzw->lp_prefix = GlobalLock( lzw->h_prefix ); lzw->lp_suffix = GlobalLock( lzw->h_suffix ); lzw->code = LZW_BASE; lzw->cur_code_len = 9; lzw->h_sour = h_sour; lzw->h_dest = h_dest; memset( lzw->lp_code, 0xFFFF, TABLE_LEN*sizeof(WORD) );
} //------------------------------------------------------------------------------ VOID lzw_destory(PLZW_DATA lzw) { GlobalUnlock( lzw->h_code ); GlobalUnlock( lzw->h_prefix ); GlobalUnlock( lzw->h_suffix ); GlobalFree( lzw->h_code ); GlobalFree( lzw->h_prefix ); GlobalFree( lzw->h_suffix ); } //------------------------------------------------------------------------------ #endif
(2) fileio.h 定义了一些文件操作
#ifndef __FILEIO_H__ #define __FILEIO_H__ //------------------------------------------------------------------------------ #include <stdio.h> #include <stdlib.h> #include <windows.h> //------------------------------------------------------------------------------ HANDLE file_handle(CHAR* file_name) { HANDLE h_file; h_file = CreateFile(file_name, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, 0, NULL ); return h_file; } //------------------------------------------------------------------------------ WORD load_buffer(HANDLE h_sour, PBUFFER_DATA buffer) // Load file to buffer { DWORD ret; ReadFile(h_sour,buffer->lp_buffer,BUFFERSIZE,&ret,NULL); buffer->index = 0; buffer->top = (WORD)ret; return (WORD)ret; } //------------------------------------------------------------------------------ WORD empty_buffer( PLZW_DATA lzw, PBUFFER_DATA buffer)// Output buffer to file { DWORD ret; if(buffer->end_flag) // The flag mark the end of decode { if( buffer->by_left ) { buffer->lp_buffer[ buffer->index++ ] = (BYTE)( buffer->dw_buffer >> 32-buffer->by_left )<<(8-buffer->by_left); } } WriteFile(lzw->h_dest, buffer->lp_buffer,buffer->index,&ret,NULL); buffer->index = 0; buffer->top = ret; return (WORD)ret; } //------------------------------------------------------------------------------ #endif
(3) hash.h 定义了压缩时所用的码表操作函数,为了快速查找使用了hash算法,还有处理hash冲突的函数
#ifndef __HASH_H__ #define __HASH_H__ //------------------------------------------------------------------------------ #include <stdio.h> #include <stdlib.h> #include <windows.h> //------------------------------------------------------------------------------ #define DIV TABLE_LEN #define HASHSTEP 13 // It should bigger than 0. //------------------------------------------------------------------------------ WORD get_hash_index( PLZW_DATA lzw ) { DWORD tmp; WORD result; DWORD prefix; DWORD suffix; prefix = lzw->prefix; suffix = lzw->suffix; tmp = prefix<<8 | suffix; result = tmp % DIV; return result; } //------------------------------------------------------------------------------ WORD re_hash_index( WORD hash ) // If hash conflict occured we must recalculate { // hash index . WORD result; result = hash + HASHSTEP; result = result % DIV; return result; } //------------------------------------------------------------------------------ BOOL in_table( PLZW_DATA lzw ) // To find whether current code is already in table. { BOOL result; WORD hash;
hash = get_hash_index( lzw ); if( lzw->lp_code[ hash ] == 0xFFFF ) { result = FALSE; } else { if( lzw->lp_prefix[ hash ] == lzw->prefix && lzw->lp_suffix[ hash ] == lzw->suffix ) { result = TRUE; } else { result = FALSE; while( lzw->lp_code[ hash ] != 0xFFFF ) { if( lzw->lp_prefix[ hash ] == lzw->prefix && lzw->lp_suffix[ hash ] == lzw->suffix ) { result = TRUE; break; } hash = re_hash_index( hash ); } } } return result; } //------------------------------------------------------------------------------ WORD get_code( PLZW_DATA lzw ) { WORD hash; WORD code; hash = get_hash_index( lzw ); if( lzw->lp_prefix[ hash ] == lzw->prefix && lzw->lp_suffix[ hash ] == lzw->suffix ) { code = lzw->lp_code[ hash ]; } else { while( lzw->lp_prefix[ hash ] != lzw->prefix || lzw->lp_suffix[ hash ] != lzw->suffix ) { hash = re_hash_index( hash ); } code = lzw->lp_code[ hash ]; } return code; } //------------------------------------------------------------------------------ VOID insert_table( PLZW_DATA lzw ) { WORD hash; hash = get_hash_index( lzw ); if( lzw->lp_code[ hash ] == 0xFFFF ) { lzw->lp_prefix[ hash ] = lzw->prefix; lzw->lp_suffix[ hash ] = lzw->suffix; lzw->lp_code[ hash ] = lzw->code; } else { while( lzw->lp_code[ hash ] != 0xFFFF ) { hash = re_hash_index( hash ); } lzw->lp_prefix[ hash ] = lzw->prefix; lzw->lp_suffix[ hash ] = lzw->suffix; lzw->lp_code[ hash ] = lzw->code; }
} //------------------------------------------------------------------------------
#endif
(4) encode.h 压缩程序主函数
#ifndef __ENCODE_H__ #define __ENCODE_H__ //------------------------------------------------------------------------------ #include <stdio.h> #include <stdlib.h> #include <windows.h>
//------------------------------------------------------------------------------ VOID output_code( DWORD code ,PBUFFER_DATA out, PLZW_DATA lzw) { out->dw_buffer |= code << ( 32 - out->by_left - lzw->cur_code_len ); out->by_left += lzw->cur_code_len;
while( out->by_left >= 8 ) { if( out->index == BUFFERSIZE ) { empty_buffer( lzw,out); }
out->lp_buffer[ out->index++ ] = (BYTE)( out->dw_buffer >> 24 ); out->dw_buffer <<= 8; out->by_left -= 8; } } //------------------------------------------------------------------------------ VOID do_encode( PBUFFER_DATA in, PBUFFER_DATA out, PLZW_DATA lzw) { WORD prefix; while( in->index != in->top ) { if( !in_table(lzw) ) { // current code not in code table // then add it to table and output prefix
insert_table(lzw); prefix = lzw->suffix; output_code( lzw->prefix ,out ,lzw ); lzw->code++;
if( lzw->code == (WORD)1<< lzw->cur_code_len ) { // code reached current code top(1<<cur_code_len) // then current code length add one lzw->cur_code_len++; if( lzw->cur_code_len == CODE_LEN + 1 ) { re_init_lzw( lzw ); }
} } else { // current code already in code table // then output nothing prefix = get_code(lzw);
} lzw->prefix = prefix; lzw->suffix = in->lp_buffer[ in->index++ ]; } }
//------------------------------------------------------------------------------ VOID encode(HANDLE h_sour,HANDLE h_dest) { LZW_DATA lzw; BUFFER_DATA in ; BUFFER_DATA out; BOOL first_run = TRUE;
lzw_create( &lzw ,h_sour,h_dest ); buffer_create( &in ); buffer_create( &out );
while( load_buffer( h_sour, &in ) ) { if( first_run ) {// File length should be considered but here we simply // believe file length bigger than 2 bytes. lzw.prefix = in.lp_buffer[ in.index++ ]; lzw.suffix = in.lp_buffer[ in.index++ ]; first_run = FALSE; } do_encode(&in , &out, &lzw); } output_code(lzw.prefix, &out , &lzw); output_code(lzw.suffix, &out , &lzw); out.end_flag = TRUE; empty_buffer( &lzw,&out);
lzw_destory( &lzw ); buffer_destory( &in ); buffer_destory( &out ); }
//------------------------------------------------------------------------------
#endif
(5) decode.h 解压函数主函数
#ifndef __DECODE_H__ #define __DECODE_H__ //------------------------------------------------------------------------------ #include <stdio.h> #include <stdlib.h> #include <windows.h> //------------------------------------------------------------------------------ VOID out_code( WORD code ,PBUFFER_DATA buffer,PLZW_DATA lzw,PSTACK_DATA stack) { WORD tmp; if( code < 0x100 ) { stack->lp_stack[ stack->index++ ] = code; } else { stack->lp_stack[ stack->index++ ] = lzw->lp_suffix[ code ]; tmp = lzw->lp_prefix[ code ]; while( tmp > 0x100 ) { stack->lp_stack[ stack->index++ ] = lzw->lp_suffix[ tmp ]; tmp = lzw->lp_prefix[ tmp ]; } stack->lp_stack[ stack->index++ ] = (BYTE)tmp;
}
while( stack->index ) { if( buffer->index == BUFFERSIZE ) { empty_buffer(lzw,buffer); } buffer->lp_buffer[ buffer->index++ ] = stack->lp_stack[ --stack->index ] ; } } //------------------------------------------------------------------------------ VOID insert_2_table(PLZW_DATA lzw ) {
lzw->lp_code[ lzw->code ] = lzw->code; lzw->lp_prefix[ lzw->code ] = lzw->prefix; lzw->lp_suffix[ lzw->code ] = lzw->suffix; lzw->code++;
if( lzw->code == ((WORD)1<<lzw->cur_code_len)-1 ) { lzw->cur_code_len++; if( lzw->cur_code_len == CODE_LEN+1 ) lzw->cur_code_len = 9; } if(lzw->code >= 1<<CODE_LEN ) { re_init_lzw(lzw); }
} //------------------------------------------------------------------------------ WORD get_next_code( PBUFFER_DATA buffer , PLZW_DATA lzw ) {
BYTE next; WORD code; while( buffer->by_left < lzw->cur_code_len ) { if( buffer->index == BUFFERSIZE ) { load_buffer( lzw->h_sour, buffer ); } next = buffer->lp_buffer[ buffer->index++ ]; buffer->dw_buffer |= (DWORD)next << (24-buffer->by_left); buffer->by_left += 8; } code = buffer->dw_buffer >> ( 32 - lzw->cur_code_len ); buffer->dw_buffer <<= lzw->cur_code_len; buffer->by_left -= lzw->cur_code_len;
return code; } //------------------------------------------------------------------------------ VOID do_decode( PBUFFER_DATA in, PBUFFER_DATA out, PLZW_DATA lzw, PSTACK_DATA stack) { WORD code; WORD tmp; while( in->index != in->top ) { code = get_next_code( in ,lzw );
if( code < 0x100 ) { // code already in table // then simply output the code lzw->suffix = (BYTE)code; } else { if( code < lzw->code ) { // code also in table // then output code chain tmp = lzw->lp_prefix[ code ]; while( tmp > 0x100 ) { tmp = lzw->lp_prefix[ tmp ]; } lzw->suffix = (BYTE)tmp; } else { // code == lzw->code // code not in table // add code into table // and out put code tmp = lzw->prefix; while( tmp > 0x100 ) { tmp = lzw->lp_prefix[ tmp ]; } lzw->suffix = (BYTE)tmp; } } insert_2_table( lzw ); out_code(code,out,lzw,stack);
lzw->prefix = code;
}
} //------------------------------------------------------------------------------ VOID decode( HANDLE h_sour, HANDLE h_dest ) { LZW_DATA lzw; BUFFER_DATA in ; BUFFER_DATA out; STACK_DATA stack; BOOL first_run;
first_run = TRUE;
lzw_create( &lzw ,h_sour,h_dest ); buffer_create( &in ); buffer_create( &out ); stack_create(&stack );
while( load_buffer( h_sour, &in ) ) { if( first_run ) { lzw.prefix = get_next_code( &in, &lzw ); lzw.suffix = lzw.prefix; out_code(lzw.prefix, &out, &lzw , &stack); first_run = FALSE; } do_decode(&in , &out, &lzw, &stack); }
empty_buffer( &lzw,&out);
lzw_destory( &lzw ); buffer_destory( &in ); buffer_destory( &out ); stack_destory( &stack); }
#endif
2 下面给出一个应用上面模块的简单例子
#include <stdio.h> #include <stdlib.h> //------------------------------------------------------------------------------
#include "lzw.h" #include "hash.h" #include "fileio.h" #include "encode.h" #include "decode.h"
//------------------------------------------------------------------------------ HANDLE h_file_sour; HANDLE h_file_dest; HANDLE h_file; CHAR* file_name_in = "d:\\code.c"; CHAR* file_name_out= "d:\\encode.e"; CHAR* file_name = "d:\\decode.d";
//------------------------------------------------------------------------------ int main(int argc, char *argv[]) { h_file_sour = file_handle(file_name_in); h_file_dest = file_handle(file_name_out); h_file = file_handle(file_name);
encode(h_file_sour, h_file_dest); // decode(h_file_dest,h_file);
CloseHandle(h_file_sour); CloseHandle(h_file_dest); CloseHandle(h_file);
return 0; }
3 后语
之前研究gif文件格式时偶然接触了lzw压缩算法,于是就想自己动手实现。从一开始看人家的原码,然后跟着模仿,到现在用自己的语言表达出来,从理解原理到代码的实现花费了不少时间与精力,但是真正的快乐也就在这里,现在把她拿出来跟大家分享也就是分享快乐。
|