-
Notifications
You must be signed in to change notification settings - Fork 12
/
braid.c
422 lines (392 loc) · 15 KB
/
braid.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
// braid.c
// Copyright (C) 2016 Mark Adler
// For conditions of distribution and use, see the accompanying LICENSE file.
//
// Merge a series of .br streams into a single stream. The names of files with
// the streams are provided on the command line, and the combined stream is
// written to stdout. The input streams are scanned backwards using the
// distances to the previous headers in the stream, and then read forwards to
// write out. Any input streams that do not have a complete set of distances
// are skipped, with that is noted as a warning. If all of the input .br files
// have a total uncompressed size, then the output trailer contains a total
// uncompressed size. If there is more than one embedded brotli stream, then
// the output trailer contains a check value of the individual check values.
#include <stdio.h>
#include <stdint.h>
#include "br.h"
#include "xxhash.h"
#include "try.h"
#define local static
// Return the parity of the low 8 bits of n in the 8th bit. If this is
// exclusive-or'ed with n, then the result has even (zero) parity.
local inline unsigned parity(unsigned n) {
return (0x34cb00 >> ((n ^ (n >> 4)) & 0xf)) & 0x80;
}
// Read bytes from a file backwards. The byte returned is the one that
// precedes the current file position. The file position is left pointing at
// the byte returned, so that the next call returns the byte before that.
// Throw an error if at the start of the file or if there is an I/O error.
local inline unsigned rget1(FILE *in) {
int ch;
if (ftello(in) == 0 ||
fseeko(in, -1, SEEK_CUR) ||
(ch = getc(in)) == EOF ||
fseeko(in, -1, SEEK_CUR))
throw(1, "premature arrival at start of file");
return ch;
}
// Get a bidirectional variable-length number from in, reading backwards.
local inline uintmax_t getrbvar(FILE *in) {
unsigned ch = rget1(in);
if ((ch & 0x80) == 0)
throw(3, "high bit not set (end of bidirectional variable length)");
uintmax_t val = ch & 0x7f;
do {
ch = rget1(in);
val = (val << 7) | (ch & 0x7f);
} while ((ch & 0x80) == 0);
return val;
}
// Get one byte in the forward direction. Throw an error on failure.
local inline unsigned get1(FILE *in) {
int ch = getc(in);
if (ch == EOF)
throw(1, "premature end of file");
return ch;
}
// Get a forward variable-length unsigned integer from in.
local inline uintmax_t getvar(FILE *in) {
uintmax_t val = 0;
unsigned ch;
unsigned shift = 0;
do {
ch = get1(in);
val |= (uintmax_t)(ch & 0x7f) << shift;
shift += 7;
} while ((ch & 0x80) == 0);
return val;
}
// Get a bidirectional variable-length number from in.
local inline uintmax_t getbvar(FILE *in) {
unsigned ch = get1(in);
if ((ch & 0x80) == 0)
throw(3, "invalid bidirectional integer");
uintmax_t val = ch & 0x7f;
unsigned shift = 0;
do {
ch = get1(in);
shift += 7;
val |= (uintmax_t)(ch & 0x7f) << shift;
} while ((ch & 0x80) == 0);
return val;
}
// Write out a bi-directional variable sized integer, where the first and last
// bytes have a high bit of 1, and the intermediate bytes have a high bit of 0.
local void bvar(uintmax_t num, FILE *out) {
putc(0x80 | (num & 0x7f), out);
while ((num >>= 7) > 0x7f)
putc(num & 0x7f, out);
putc(0x80 | num, out);
}
// Write out k bytes of an integer in little-endian order. k must be at least
// one.
local void little(uintmax_t num, size_t k, FILE *out) {
do {
putc(num, out);
num >>= 8;
} while (--k);
}
// Type for a linked list used as a stack of offsets.
typedef struct pos_s pos_t;
struct pos_s {
off_t off;
pos_t *next;
};
// Push an offset on to the stack.
local inline void push(off_t off, pos_t **pos)
{
pos_t *entry = malloc(sizeof(pos_t));
if (entry == NULL)
throw(-1, "out of memory");
entry->off = off;
entry->next = *pos;
*pos = entry;
}
// Pop and return an offset from the stack, or -1 if the stack is empty.
local inline off_t pop(pos_t **pos)
{
pos_t *entry = *pos;
if (entry == NULL)
return -1;
off_t off = entry->off;
*pos = entry->next;
free(entry);
return off;
}
// Scan a .br stream backwards for the positions of the headers and trailer.
// Return the file offsets of the headers after the first as well as of the
// trailer, all as an allocated linked list starting at *pos. The offset of
// the first header (4) is the first item in the list, and the offset of the
// trailer is the last item in the list. Any errors, including missing
// distances, are thrown.
local void scan(FILE *in, pos_t **pos) {
// verify .br signature
{
rewind(in);
unsigned char buf[4];
if (fread(buf, 1, 4, in) != 4 || memcmp(buf, BR_SIG, 4))
throw(2, "signature mismatch -- not a .br file");
}
// verify the trailer, push the file offset of the trailer, and get the
// file offset of the last header.
off_t at;
{
fseeko(in, 0, SEEK_END);
unsigned trail;
while ((trail = rget1(in)) == 0) // get final trailer mask
; // bypass any zero padding
if (parity(trail) || (trail & BR_CONTENT_TRAIL) == 0 ||
(trail & BR_CONTENT_EXTRA_MASK))
throw(3, "invalid final trailer");
if ((trail & BR_CONTENT_CHECK) != 7)
fseeko(in, -(1 << (trail & 3)), SEEK_CUR); // skip check of checks
if (trail & BR_CONTENT_LEN)
getrbvar(in); // skip total uncompressed length
off_t dist = 0;
if (trail & BR_CONTENT_OFF)
dist = getrbvar(in); // get distance to last header
if (trail != (BR_CONTENT_TRAIL | 7))
if (rget1(in) != trail) // get leading trailer mask
throw(3, "invalid trailer mask");
at = ftello(in); // file offset of start of trailer
if (at > 4 && (trail & BR_CONTENT_OFF) == 0)
throw(4, "no final distance to previous header");
*pos = NULL; // empty the stack
push(at, pos); // save trailer offset on stack
if (dist) {
at -= dist; // compute offset of last header
push(at, pos); // save last header offset
}
}
// go through the string of distances until at the first header
while (at > 4) { // first header is at offset 4
fseeko(in, at, SEEK_SET); // go to previous header
unsigned mask = get1(in); // get header content mask
if (parity(mask) || (mask & BR_CONTENT_TRAIL))
throw(3, "invalid header content mask");
if ((mask & BR_CONTENT_OFF) == 0)
throw(4, "missing intermediate distance");
at -= getvar(in); // offset of previous header
push(at, pos); // save header position on stack
}
if (at != 4)
throw(3, "invalid distance");
// now at offset 4, so there is indeed a complete set of distances
}
// Write one byte to out, updating offset and check.
local inline unsigned put1(unsigned val, FILE *out, off_t *off,
XXH32_state_t *check) {
putc(val, out);
(*off)++;
if (check != NULL) {
unsigned char buf[1];
buf[0] = val;
XXH32_update(check, buf, 1);
}
return val;
}
// Write the variable-length integer n to out, updating off and check.
local inline void putvar(uintmax_t n, FILE *out, off_t *off,
XXH32_state_t *check) {
while (n > 0x7f) {
put1(n & 0x7f, out, off, check);
n >>= 7;
}
put1(n | 0x80, out, off, check);
}
// Copy len bytes from in to out, updating off and check.
local void copyn(FILE *in, uintmax_t len, FILE *out, off_t *off,
XXH32_state_t *check) {
*off += len;
unsigned char buf[16384];
while (len) {
size_t n = len > sizeof(buf) ? sizeof(buf) : len;
fread(buf, 1, n, in);
fwrite(buf, 1, n, out);
if (ferror(in) || ferror(out))
throw(1, "input/output error");
if (check != NULL)
XXH32_update(check, buf, n);
len -= n;
}
}
// Copy the segment pointed to by the front of the *pos list from in to out.
// Update the check of check values. Pull the offset of the segment copied
// from the stack. It is assured that there will be at least two elements on
// the stack when copy() is called: the offset of the next header, and the
// offset of the header after that or the trailer. *last is the offset of the
// last header (zero for the first segment), and is updated to the offset of
// the header written here. *off is the current offset of the output file, and
// is updated per the data written.
local void copy(FILE *in, FILE *out, off_t *off, off_t *last, pos_t **pos,
XXH32_state_t *check) {
// read header content mask and skip distance back if present
fseeko(in, pop(pos), SEEK_SET);
unsigned mask = getc(in);
if (mask & BR_CONTENT_OFF)
getvar(in); // skip old distance
// header check state (set head to NULL to stop calculation)
XXH32_state_t xxh32;
XXH32_state_t *head = &xxh32;
XXH32_reset(head, 0);
// peek ahead to get id and extra if present, strip extra of mod and/or
// name if this isn't the first segment
unsigned id = 0;
if ((mask & BR_CONTENT_CHECK) == 7)
id = getc(in);
unsigned extra = 0, strip = 0;
if (mask & BR_CONTENT_EXTRA_MASK) {
strip = extra = getc(in);
if (*last)
strip &= ~(BR_EXTRA_MOD | BR_EXTRA_NAME);
if ((extra & BR_EXTRA_CHECK) == 0) // header check if in original
head = NULL;
}
// write header content mask, and offset if not first segment
{
if (*last)
mask |= BR_CONTENT_OFF;
unsigned newmask = mask;
if ((strip & BR_EXTRA_ANY) == 0)
newmask &= ~BR_CONTENT_EXTRA_MASK;
off_t here = *off;
put1(newmask ^ parity(newmask), out, off, head);
if (*last)
putvar(here - *last, out, off, head);
*last = here;
}
// write check ID, if present
if ((mask & BR_CONTENT_CHECK) == 7)
put1(id, out, off, head);
// copy extra contents, leaving out mod and name if not first segment
if (mask & BR_CONTENT_EXTRA_MASK) {
if (strip & BR_EXTRA_ANY)
put1(strip ^ parity(strip), out, off, head); // extra mask
if (extra & BR_EXTRA_MOD) {
uintmax_t mod = getvar(in); // get mod time
if (strip & BR_EXTRA_MOD)
putvar(mod, out, off, head); // write mod time
}
if (extra & BR_EXTRA_NAME) {
uintmax_t len = getvar(in); // get name length
if (strip & BR_EXTRA_NAME) {
putvar(len, out, off, head); // write name length
copyn(in, len, out, off, head); // copy name
}
else
fseeko(in, len, SEEK_CUR); // skip name
}
if (extra & BR_EXTRA_EXTRA) {
uintmax_t len = getvar(in);
putvar(len, out, off, head); // copy extra field length
copyn(in, len, out, off, head); // copy extra field
}
if (extra & BR_EXTRA_COMPRESSION_MASK)
put1(getc(in), out, off, head); // copy method mask
if (extra & BR_EXTRA_CHECK) {
getc(in); getc(in); // skip old header check
unsigned x = XXH32_digest(head) & 0xffff;
put1(x & 0xff, out, off, NULL); // write new header check
put1(x >> 8, out, off, NULL);
}
}
// copy the rest of the segment, updating check with the check value
{
// length of remainder
uintmax_t len = (*pos)->off - ftello(in);
// length of check value
unsigned n = (mask & BR_CONTENT_CHECK) == 7 ? 32 : 1 << (mask & 3);
copyn(in, len - n, out, off, NULL); // copy brotli stream
copyn(in, n, out, off, check); // copy check value
}
}
int main(int argc, char **argv) {
int ret = 0;
unsigned count = 0; // count of brotli streams, up to 2
intmax_t len = 0; // total uncompressed length or -1
XXH32_state_t check; // check of check values
XXH32_reset(&check, 0);
fwrite(BR_SIG, 1, 4, stdout); // write signature
off_t off = 4; // offset for stdout
off_t last = 0; // last header offset (none yet)
while (--argc) {
FILE *in = fopen(*++argv, "rb");
pos_t *pos;
int any;
ball_t err;
try {
if (in == NULL)
throw(1, "could not open %s", *argv);
scan(in, &pos);
any = pos->next != NULL;
}
preserve { // preserve pos
// copy segments
while (pos->next != NULL) {
copy(in, stdout, &off, &last, &pos, &check);
if (count < 2)
count++;
}
}
preserve { // preserve pos
// look at trailer to update uncompressed length -- if there is no
// length, then give up and don't try this again
if (len != -1) {
unsigned trail = getc(in);
if (trail & BR_CONTENT_LEN) {
if (trail & BR_CONTENT_OFF)
getbvar(in); // skip offset
len += getbvar(in); // add uncompressed length
}
else if (any)
len = -1;
}
// check for any i/o errors
if (ferror(in) || ferror(stdout))
throw(1, "input/output error");
}
always {
while (pos != NULL) // clean up any leftover mallocs
pop(&pos);
fclose(in);
}
catch (err) {
fprintf(stderr, "braid: %s in %s -- skipping\n", err.why, *argv);
drop(err);
ret = 1;
}
}
// write trailer
unsigned trail = BR_CONTENT_TRAIL | (count > 1 ? BR_CHECK_XXH32_4 : 7);
if (count == 0)
len = -1;
if (len != -1)
trail |= BR_CONTENT_LEN;
if (last)
trail |= BR_CONTENT_OFF;
trail ^= parity(trail);
putchar(trail); // write trailer mask
if (last)
bvar(off - last, stdout); // write offset to last header
if (len != -1)
bvar(len, stdout); // write total length
if (count > 1)
little(XXH32_digest(&check), 4, stdout); // write check
if (trail != (BR_CONTENT_TRAIL | 7))
putchar(trail); // write final trailer mask
if (ferror(stdout) || fclose(stdout)) {
fprintf(stderr, "braid: output error\n");
ret = 1;
}
return ret;
}