Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TIFF PackBits compression algorithm #137

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ MAN5 = \

OBJS = \
lprint.o \
tiff-packbits.o \
lprint-brother.o \
lprint-common.o \
lprint-cpcl.o \
Expand Down Expand Up @@ -194,6 +195,7 @@ resheaders:
# Dependencies...
$(OBJS) $(TESTOBJS): config.h lprint.h Makefile
lprint.o: \
tiff-packbits.h \
lprint-brother.h \
lprint-cpcl.h \
lprint-dymo.h \
Expand Down
90 changes: 90 additions & 0 deletions tiff-packbits.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
//
// TIFF PackBits algorithm
//
// Copyright © 2024 by Andreas Grünbacher.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.

#include <string.h>
#include "tiff-packbits.h"

// tiff_packbits - pack a sequence of bytes
//
// Encodes sequences of repeating and non-repeating bytes as the length of each
// sequence, followed by the contents of the sequence: a length byte between 0
// and 127 indicates a sequence of N + 1 non-repeating bytes; a length byte
// between -128 and -1 indicates a byte that is repeated 1 - N times. For
// example,
//
// abcdef => 5 "abcdef" (7 bytes)
// abbbbc => 0 "a" -3 "b" 0 "c" (6 bytes)
// aabbcc => -1 "a" -1 "b" -1 "c" (6 bytes)
// abbccd => 5 "abbccd" (7 bytes)
//
// The output will be at most tiff_packbits_bufsize(len) bytes in size.
//
// Returns the size of the encoded byte sequence.
//
unsigned tiff_packbits(unsigned char *out, const unsigned char *in, unsigned len)
{
unsigned char *orig_out = out;
unsigned start = 0; // start of the literal to emit
unsigned pos = 0; // current position
unsigned rlen = 0; // length of the run starting at pos

while (start < len) {
while (pos < len) {
// extend the literal as long as nothing repeats
while (pos + 1 < len && in[pos] != in[pos + 1])
pos++;

// find the next run
rlen = 1;
while (pos + rlen < len && in[pos] == in[pos + rlen])
rlen++;

// Require at least three repetitions to start a run if we already have a
// literal (start != pos), and two otherwise: if we already have a
// literal, extending it by two bytes is as cheap as starting a run, but
// we can still extend that literal later at no extra cost. If we start
// a run, starting another literal will require an extra byte.

if (rlen >= 2 + (start != pos))
break;

// append to the literal instead
pos += rlen;
rlen = 0;
}

// emit the literal
while (start < pos) {
unsigned chunk = pos - start;
if (chunk > 128)
chunk = 128;
*out++ = chunk - 1;
memcpy(out, in + start, chunk);
out += chunk;
start += chunk;
}

// emit the run
while (rlen > 1) {
unsigned chunk = rlen;
if (chunk > 129)
chunk = 129;
*out++ = 1 - chunk;
*out++ = in[pos];
pos += chunk;
rlen -= chunk;
}

start = pos;
// convert a potential remaining run of 1 into a literal
pos += rlen;
rlen = 0;
}

return out - orig_out;
}
11 changes: 11 additions & 0 deletions tiff-packbits.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef __TIFF_PACKBITS_H
#define __TIFF_PACKBITS_H

static inline unsigned tiff_packbits_bufsize(unsigned len)
{
return len + (len + 127) / 128;
}

unsigned tiff_packbits(unsigned char *out, const unsigned char *in, unsigned len);

#endif /* __TIFF_PACKBITS_H */