Skip to content

Commit

Permalink
fixed bugs reported by m^3
Browse files Browse the repository at this point in the history
  • Loading branch information
inikep committed Jan 4, 2016
1 parent c9bf5df commit 2815b94
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 6 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ The improvement in compression ratio is caused mainly because of:

**In my experiments there is no open-source bytewise compressor that gives better ratio than lz5hc.**

[LZ4]: https://github.com/Cyan4973/lz4


The codewords description
-------------------------

LZ5 uses different output codewords and is not compatible with LZ4. LZ4 output codewords are 3 byte long (24-bit) and look as follows:
- LLLL_MMMM OOOOOOOO OOOOOOOO - 16-bit offset, 4-bit match length, 4-bit literal length

Expand All @@ -25,7 +31,6 @@ So we can encode values 0-7 (3-bits) for matches (what means length of 3-10 for
that 10 bytes. So e.g. 30 is encoded as a flag 7 (match length=10) and a next byte 30-10=20. I tried many different variants (e.g. separate match lenghts and literal lenghts)
but these codewords were the best.

[LZ4]: https://github.com/Cyan4973/lz4

Benchmarks
-------------------------
Expand Down
6 changes: 4 additions & 2 deletions lib/lz5.c
Original file line number Diff line number Diff line change
Expand Up @@ -743,12 +743,12 @@ int LZ5_loadDict (LZ5_stream_t* LZ5_dict, const char* dictionary, int dictSize)
if ((dict->initCheck) || (dict->currentOffset > 1 GB)) /* Uninitialized structure, or reuse overflow */
LZ5_resetStream(LZ5_dict);

if (dictSize < (int)HASH_UNIT)
/* if (dictSize < (int)HASH_UNIT)
{
dict->dictionary = NULL;
dict->dictSize = 0;
return 0;
}
}*/

if ((dictEnd - p) > LZ5_DICT_SIZE) p = dictEnd - LZ5_DICT_SIZE;
dict->currentOffset += LZ5_DICT_SIZE;
Expand Down Expand Up @@ -863,6 +863,8 @@ int LZ5_compress_forceExtDict (LZ5_stream_t* LZ5_dict, const char* source, char*
int LZ5_saveDict (LZ5_stream_t* LZ5_dict, char* safeBuffer, int dictSize)
{
LZ5_stream_t_internal* dict = (LZ5_stream_t_internal*) LZ5_dict;
if (!dict->dictionary)
return 0;
const BYTE* previousDictEnd = dict->dictionary + dict->dictSize;

if ((U32)dictSize > LZ5_DICT_SIZE) dictSize = LZ5_DICT_SIZE; /* useless to define a dictionary > LZ5_DICT_SIZE */
Expand Down
2 changes: 1 addition & 1 deletion lib/lz5hc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1627,7 +1627,7 @@ int LZ5_saveDictHC (LZ5_streamHC_t* LZ5_streamHCPtr, char* safeBuffer, int dictS
LZ5HC_Data_Structure* streamPtr = (LZ5HC_Data_Structure*)LZ5_streamHCPtr;
int prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
if (dictSize > LZ5_DICT_SIZE) dictSize = LZ5_DICT_SIZE;
if (dictSize < 4) dictSize = 0;
// if (dictSize < 4) dictSize = 0;
if (dictSize > prefixSize) dictSize = prefixSize;
memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
{
Expand Down
4 changes: 2 additions & 2 deletions programs/fullbench.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ static int local_LZ5_decompress_safe_partial(const char* in, char* out, int inSi
/* frame functions */
static int local_LZ5F_compressFrame(const char* in, char* out, int inSize)
{
return (int)LZ5F_compressFrame(out, 2*inSize + 16, in, inSize, NULL);
return (int)LZ5F_compressFrame(out, LZ5F_compressFrameBound(inSize, NULL), in, inSize, NULL);
}

static LZ5F_decompressionContext_t g_dCtx;
Expand Down Expand Up @@ -731,7 +731,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
chunkP[0].origSize = (int)benchedSize; nbChunks=1;
break;
case 40: compressionFunction = local_LZ5_saveDict; compressorName = "LZ5_saveDict";
LZ5_loadDict(&LZ5_stream, chunkP[0].origBuffer, chunkP[0].origSize);
if (!LZ5_loadDict(&LZ5_stream, chunkP[0].origBuffer, chunkP[0].origSize)) continue;
break;
case 41: compressionFunction = local_LZ5_saveDictHC; compressorName = "LZ5_saveDictHC";
if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0))
Expand Down

0 comments on commit 2815b94

Please sign in to comment.