-
Notifications
You must be signed in to change notification settings - Fork 0
/
jpegrip.c
287 lines (247 loc) · 8.9 KB
/
jpegrip.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
/*
JPEG the Ripper
Copyright (c) 2005,2022 github.com/rusq
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "compat.h"
#include "jpeg.h"
#include "log.h"
#define BUF_SIZE 16384
#define ERROR -2
#define NOT_FOUND -3
/* jpeg signatures we're looking for */
const unsigned char jpeg_begin[6] = {0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10};
const unsigned char jpeg_end[2] = {0xff, 0xd9};
/* search_buf searches the buf of size buf_sz for the presense of byte seq of
size seq_sz. It will return -1 if the sequence not found, or an offset of the
sequence in buffer (can be 0). */
int search_buf(const unsigned char *buf, const size_t buf_sz, const unsigned char *seq,
const size_t seq_sz) {
size_t i;
int offset = -1;
if (buf_sz < seq_sz) {
return -1;
}
for (i = 0; i <= (buf_sz - seq_sz); ++i) {
if ((memcmp((buf + i), seq, seq_sz)) == 0) {
offset = i;
break;
}
}
return offset;
}
/* search_file searches the hFile, starting at start_pos, for the presense of
seq, that has seq_sz length. It will return the offset of the first byte of the
sequence in the file. If it is unsuccessful, it will return an ERROR, or EOF,
if end of file is encountered while searching for the sequence */
long search_file(FILE *hFile, long start_pos, const unsigned char *seq, const size_t seq_sz) {
unsigned char *buf;
if (seq_sz == 0) {
llog("search_file: zero sequence size");
return EOF;
}
if (fseek(hFile, start_pos, SEEK_SET) == -1) {
perror("search_file: seek failed");
return ERROR;
}
if ((buf = malloc(BUF_SIZE)) == 0) {
perror("memory allocation error");
return ERROR;
}
for (;;) {
size_t bytes_read = 0;
int buf_offset = 0;
int file_pos = ftell(hFile);
if ((bytes_read = fread(buf, sizeof(unsigned char), BUF_SIZE, hFile)) == 0) {
if (feof(hFile) || (bytes_read < seq_sz)) {
free(buf);
return EOF;
} else {
perror("search_file: read error");
goto looser;
}
}
buf_offset = search_buf(buf, bytes_read, seq, seq_sz);
if (buf_offset != -1) {
free(buf);
/* return the offset of the finding */
return file_pos + buf_offset;
}
if (bytes_read == seq_sz) {
/* we have scanned the last seq_sz bytes of the file, and did not
find the sequence we were looking for, it does not make sense to
continue doing this */
free(buf);
return EOF;
}
/* reversing the file pointer for seq_sz bytes to make sure that
there's a buffer overlap, in case the sequence is on the border
between buffers */
if (fseek(hFile, -(int)seq_sz, SEEK_CUR) == -1) {
perror("search_file: seek error");
goto looser;
}
}
looser:
free(buf);
return ERROR;
}
/* fmt_string creates a format string for format_name. buf is the output
buffer, and buf_sz should contain a non-zero size of the buffer buf. If it
succeeds it returs number of bytes written to the buffer. It will return 0 if
it fails. */
int fmt_string(char *buf, const int buf_sz, const char *prefix, const int digits, const char *ext) {
if (buf_sz == 0) {
llog("filename size can't be zero");
return -1;
}
memset(buf, 0, buf_sz);
/* create format string */
return snprintf(buf, buf_sz, "%s%%0%dld.%s", prefix, digits, ext);
}
int format_name(char *output, const int output_sz, const char *fmt, const int sequence) {
return snprintf(output, output_sz, fmt, sequence);
}
/* min returns the minimum value of x and y */
#ifndef min
int min(const int x, const int y) { return x < y ? x : y; }
#endif
/* extract extracts the size chunk of data from hFile starting at start_offset,
and writes it to the new file which it creates. The filename is formed by
sprintfing filename_fmt, and sequence. Sample filename_fmt: "jpg%05d.jpg".
If the sequence is 42, then, the output filename will be "jpg00042.jpg". It
returns 0 if error occurs or if there was nothing to do, EOF if we were
trying to read, and encountered an input file EOF (treat as an error), and
bytes written, if everything went well (it should equal to size).
*/
long extract(FILE *hFile, const char *filename, long start_offset, long size) {
size_t remain = size;
unsigned char *buf; /* temporary buffer */
FILE *f; /* output file */
if (size == 0) {
llog("extract: nothing to do");
return 0;
}
if (fseek(hFile, start_offset, SEEK_SET) == -1) {
perror("extract: failed to reposition the file to start offset");
return 0;
}
if ((f = fopen(filename, "wb+")) == 0) {
perror("failed to create the output file");
return 0;
}
if ((buf = malloc(BUF_SIZE)) == 0) {
perror("extract: failed to allocate memory");
goto looser;
}
do {
size_t bytes_read = 0, bytes_written = 0;
if ((bytes_read = fread(buf, sizeof(unsigned char), min(BUF_SIZE, remain), hFile)) == 0) {
if (feof(hFile) != 0) {
free(buf);
fclose(f);
return EOF; /* attempted to read past the file end */
} else {
perror("extract: read error");
goto looser;
}
}
if ((bytes_written = fwrite(buf, sizeof(unsigned char), bytes_read, f)) == 0) {
perror("extract: write error");
goto looser;
}
if (bytes_read != bytes_written) {
llog("extract: unexpected number of bytes written: read=%d != written=%d", bytes_read,
bytes_written);
goto looser;
}
remain -= bytes_written;
} while (remain > 0);
free(buf);
fclose(f);
return size;
looser:
free(buf);
fclose(f);
return 0;
}
int rip_jpeg(FILE *hFile) {
long num_files = 0;
long blob_start = 0, blob_end = 0;
char output_fmt[MAX_FNAME];
if (fmt_string(output_fmt, MAX_FNAME, "jpg", 8, "jpg") < 0) {
llog("failed to create a output format string");
return -1;
}
for (;;) {
char output_name[MAX_FNAME];
int output_file_sz = 0;
int hdr_sz = 0;
/* search for start */
blob_start = search_file(hFile, blob_end, jpeg_begin, sizeof(jpeg_begin));
if (blob_start == ERROR) {
perror("rip_jpeg: search for jepg start");
return -1;
} else if (blob_start == EOF) {
return num_files;
}
llog("%8ld: found start at %08lX\n", num_files + 1, blob_start);
/* reset the file position and try to determine the header size */
if (fseek(hFile, blob_start, SEEK_SET) == -1) {
perror("seek failed");
return -1;
}
if ((hdr_sz = jpeg_hdr_size(hFile)) == RET_ERROR) {
llog("unable to determine header size\n");
return -1;
}
/* search for ending */
blob_end = search_file(hFile, blob_start + hdr_sz, jpeg_end, sizeof(jpeg_end));
if (blob_end == ERROR) {
perror("rip_jpeg: search for jpeg end");
return -1;
} else if (blob_start == EOF) {
llog("search terminated prematurely (found start at %ld, but no end)\n");
return num_files;
}
/* blob_end was pointing at the beginning of the sequence, we need end of it.
*/
blob_end += sizeof(jpeg_end);
output_file_sz = blob_end - blob_start;
llog("%8ld: found end at %08lX, detected file size: %ld\n", num_files + 1, blob_end,
output_file_sz);
if (format_name(output_name, MAX_FNAME, output_fmt, num_files) < 0) {
llog("failed to generate a filename");
return -1;
}
ltrace("came up with this brilliant name: %s\n", output_name);
if (extract(hFile, output_name, blob_start, output_file_sz) != output_file_sz) {
llog("error extracting %ld bytes to %s\n", output_file_sz, output_name);
return -1;
};
llog("%8ld: written to:\t%s\n", num_files + 1, output_name);
num_files++;
}
}