diff --git a/main.c b/main.c index 5013058..3df68c6 100644 --- a/main.c +++ b/main.c @@ -233,7 +233,7 @@ save_to_png_output save_to_png(save_to_png_input input) { page = pdf_load_page(ctx, doc, input.page); float scale_factor = 1.5; - fz_rect bounds = pdf_bound_page(ctx, page); + fz_rect bounds = pdf_bound_page(ctx, page, FZ_CROP_BOX); if (input.width != 0) { scale_factor = input.width / bounds.x1; } else if (input.scale != 0) { diff --git a/misc/mupdf/include/mupdf/fitz.h b/misc/mupdf/include/mupdf/fitz.h index 164bd95..014bf6c 100644 --- a/misc/mupdf/include/mupdf/fitz.h +++ b/misc/mupdf/include/mupdf/fitz.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUDPF_FITZ_H #define MUDPF_FITZ_H @@ -51,6 +51,8 @@ extern "C" { #include "mupdf/fitz/compressed-buffer.h" #include "mupdf/fitz/filter.h" #include "mupdf/fitz/archive.h" +#include "mupdf/fitz/heap.h" + /* Resources */ #include "mupdf/fitz/store.h" @@ -85,6 +87,9 @@ extern "C" { #include "mupdf/fitz/write-pixmap.h" #include "mupdf/fitz/output-svg.h" +#include "mupdf/fitz/story.h" +#include "mupdf/fitz/story-writer.h" + #ifdef __cplusplus } #endif diff --git a/misc/mupdf/include/mupdf/fitz/archive.h b/misc/mupdf/include/mupdf/fitz/archive.h index 18d54f0..407eef4 100644 --- a/misc/mupdf/include/mupdf/fitz/archive.h +++ b/misc/mupdf/include/mupdf/fitz/archive.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_ARCHIVE_H #define MUPDF_FITZ_ARCHIVE_H @@ -27,6 +27,7 @@ #include "mupdf/fitz/context.h" #include "mupdf/fitz/buffer.h" #include "mupdf/fitz/stream.h" +#include "mupdf/fitz/tree.h" /* PUBLIC API */ @@ -61,6 +62,15 @@ fz_archive *fz_open_archive(fz_context *ctx, const char *filename); */ fz_archive *fz_open_archive_with_stream(fz_context *ctx, fz_stream *file); +/** + Open zip or tar archive stream. + + Does the same as fz_open_archive_with_stream, but will not throw + an error in the event of failing to recognise the format. Will + still throw errors in other cases though! +*/ +fz_archive *fz_try_open_archive_with_stream(fz_context *ctx, fz_stream *file); + /** Open a directory as if it was an archive. @@ -78,17 +88,26 @@ fz_archive *fz_open_directory(fz_context *ctx, const char *path); /** Determine if a given path is a directory. + + In the case of the path not existing, or having no access + we will return 0. */ int fz_is_directory(fz_context *ctx, const char *path); /** - Drop the reference to an archive. + Drop a reference to an archive. - Closes and releases any memory or filehandles associated - with the archive. + When the last reference is dropped, this closes and releases + any memory or filehandles associated with the archive. */ void fz_drop_archive(fz_context *ctx, fz_archive *arch); +/** + Keep a reference to an archive. +*/ +fz_archive * +fz_keep_archive(fz_context *ctx, fz_archive *arch); + /** Return a pointer to a string describing the format of the archive. @@ -137,33 +156,71 @@ int fz_has_archive_entry(fz_context *ctx, fz_archive *arch, const char *name); name: Entry name to look for, this must be an exact match to the entry name in the archive. + + Throws an exception if a matching entry cannot be found. */ fz_stream *fz_open_archive_entry(fz_context *ctx, fz_archive *arch, const char *name); +/** + Opens an archive entry as a stream. + + Returns NULL if a matching entry cannot be found, otherwise + behaves exactly as fz_open_archive_entry. +*/ +fz_stream *fz_try_open_archive_entry(fz_context *ctx, fz_archive *arch, const char *name); + /** Reads all bytes in an archive entry into a buffer. name: Entry name to look for, this must be an exact match to the entry name in the archive. + + Throws an exception if a matching entry cannot be found. */ fz_buffer *fz_read_archive_entry(fz_context *ctx, fz_archive *arch, const char *name); +/** + Reads all bytes in an archive entry + into a buffer. + + name: Entry name to look for, this must be an exact match to + the entry name in the archive. + + Returns NULL if a matching entry cannot be found. Otherwise behaves + the same as fz_read_archive_entry. Exceptions may be thrown. +*/ +fz_buffer *fz_try_read_archive_entry(fz_context *ctx, fz_archive *arch, const char *name); + /** fz_archive: tar implementation */ /** - Detect if stream object is a tar achieve. + Detect if stream object is a tar archive. Assumes that the stream object is seekable. */ int fz_is_tar_archive(fz_context *ctx, fz_stream *file); +/** + Detect if stream object is an archive supported by libarchive. + + Assumes that the stream object is seekable. +*/ +int fz_is_libarchive_archive(fz_context *ctx, fz_stream *file); + +/** + Detect if stream object is a cfb archive. + + Assumes that the stream object is seekable. +*/ +int fz_is_cfb_archive(fz_context *ctx, fz_stream *file); + /** Open a tar archive file. - An exception is throw if the file is not a tar archive as + An exception is thrown if the file is not a tar archive as indicated by the presence of a tar signature. filename: a path to a tar archive file as it would be given to @@ -177,12 +234,52 @@ fz_archive *fz_open_tar_archive(fz_context *ctx, const char *filename); Open an archive using a seekable stream object rather than opening a file or directory on disk. - An exception is throw if the stream is not a tar archive as + An exception is thrown if the stream is not a tar archive as indicated by the presence of a tar signature. */ fz_archive *fz_open_tar_archive_with_stream(fz_context *ctx, fz_stream *file); +/** + Open an archive using libarchive. + + An exception is thrown if the file is not supported by libarchive. + + filename: a path to an archive file as it would be given to + open(2). +*/ +fz_archive *fz_open_libarchive_archive(fz_context *ctx, const char *filename); + +/** + Open an archive using libarchive. + + Open an archive using a seekable stream object rather than + opening a file or directory on disk. + + An exception is thrown if the stream is not supported by libarchive. +*/ +fz_archive *fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file); + +/** + Open a cfb file as an archive. + + An exception is thrown if the file is not recognised as a cfb. + + filename: a path to an archive file as it would be given to + open(2). +*/ +fz_archive *fz_open_cfb_archive(fz_context *ctx, const char *filename); + +/** + Open a cfb file as an archive. + + Open an archive using a seekable stream object rather than + opening a file or directory on disk. + + An exception is thrown if the file is not recognised as a chm. +*/ +fz_archive *fz_open_cfb_archive_with_stream(fz_context *ctx, fz_stream *file); + /** fz_archive: zip implementation */ @@ -197,7 +294,7 @@ int fz_is_zip_archive(fz_context *ctx, fz_stream *file); /** Open a zip archive file. - An exception is throw if the file is not a zip archive as + An exception is thrown if the file is not a zip archive as indicated by the presence of a zip signature. filename: a path to a zip archive file as it would be given to @@ -211,7 +308,7 @@ fz_archive *fz_open_zip_archive(fz_context *ctx, const char *path); Open an archive using a seekable stream object rather than opening a file or directory on disk. - An exception is throw if the stream is not a zip archive as + An exception is thrown if the stream is not a zip archive as indicated by the presence of a zip signature. */ @@ -265,13 +362,68 @@ void fz_close_zip_writer(fz_context *ctx, fz_zip_writer *zip); */ void fz_drop_zip_writer(fz_context *ctx, fz_zip_writer *zip); +/** + Create an archive that holds named buffers. + + tree can either be a preformed tree with fz_buffers as values, + or it can be NULL for an empty tree. +*/ +fz_archive *fz_new_tree_archive(fz_context *ctx, fz_tree *tree); + +/** + Add a named buffer to an existing tree archive. + + The tree will take a new reference to the buffer. Ownership + is not transferred. +*/ +void fz_tree_archive_add_buffer(fz_context *ctx, fz_archive *arch_, const char *name, fz_buffer *buf); + +/** + Add a named block of data to an existing tree archive. + + The data will be copied into a buffer, and so the caller + may free it as soon as this returns. +*/ +void fz_tree_archive_add_data(fz_context *ctx, fz_archive *arch_, const char *name, const void *data, size_t size); + +/** + Create a new multi archive (initially empty). +*/ +fz_archive *fz_new_multi_archive(fz_context *ctx); + +/** + Add an archive to the set of archives handled by a multi + archive. + + If path is NULL, then the archive contents will appear at the + top level, otherwise, the archives contents will appear prefixed + by path. +*/ +void fz_mount_multi_archive(fz_context *ctx, fz_archive *arch_, fz_archive *sub, const char *path); + +typedef int (fz_recognize_archive_fn)(fz_context *, fz_stream *); +typedef fz_archive *(fz_open_archive_fn)(fz_context *, fz_stream *); + +typedef struct +{ + fz_recognize_archive_fn *recognize; + fz_open_archive_fn *open; +} +fz_archive_handler; + +FZ_DATA extern const fz_archive_handler fz_libarchive_archive_handler; + +void fz_register_archive_handler(fz_context *ctx, const fz_archive_handler *handler); + /** Implementation details: Subject to change. */ struct fz_archive { + int refs; fz_stream *file; + const char *format; void (*drop_archive)(fz_context *ctx, fz_archive *arch); diff --git a/misc/mupdf/include/mupdf/fitz/band-writer.h b/misc/mupdf/include/mupdf/fitz/band-writer.h index be1cc56..853307b 100644 --- a/misc/mupdf/include/mupdf/fitz/band-writer.h +++ b/misc/mupdf/include/mupdf/fitz/band-writer.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_BAND_WRITER_H #define MUPDF_FITZ_BAND_WRITER_H @@ -26,6 +26,8 @@ #include "mupdf/fitz/system.h" #include "mupdf/fitz/context.h" #include "mupdf/fitz/output.h" +#include "mupdf/fitz/color.h" +#include "mupdf/fitz/separation.h" /** fz_band_writer @@ -66,6 +68,12 @@ void fz_write_header(fz_context *ctx, fz_band_writer *writer, int w, int h, int */ void fz_write_band(fz_context *ctx, fz_band_writer *writer, int stride, int band_height, const unsigned char *samples); +/** + Finishes up the output and closes the band writer. After this + call no more headers or bands may be written. +*/ +void fz_close_band_writer(fz_context *ctx, fz_band_writer *writer); + /** Drop the reference to the band writer, causing it to be destroyed. @@ -79,11 +87,13 @@ void fz_drop_band_writer(fz_context *ctx, fz_band_writer *writer); typedef void (fz_write_header_fn)(fz_context *ctx, fz_band_writer *writer, fz_colorspace *cs); typedef void (fz_write_band_fn)(fz_context *ctx, fz_band_writer *writer, int stride, int band_start, int band_height, const unsigned char *samples); typedef void (fz_write_trailer_fn)(fz_context *ctx, fz_band_writer *writer); +typedef void (fz_close_band_writer_fn)(fz_context *ctx, fz_band_writer *writer); typedef void (fz_drop_band_writer_fn)(fz_context *ctx, fz_band_writer *writer); struct fz_band_writer { fz_drop_band_writer_fn *drop; + fz_close_band_writer_fn *close; fz_write_header_fn *header; fz_write_band_fn *band; fz_write_trailer_fn *trailer; diff --git a/misc/mupdf/include/mupdf/fitz/bidi.h b/misc/mupdf/include/mupdf/fitz/bidi.h index 574682b..3aa2dbb 100644 --- a/misc/mupdf/include/mupdf/fitz/bidi.h +++ b/misc/mupdf/include/mupdf/fitz/bidi.h @@ -21,6 +21,7 @@ #define FITZ_BIDI_H #include "mupdf/fitz/system.h" +#include "mupdf/fitz/context.h" /* Implementation details: subject to change. */ diff --git a/misc/mupdf/include/mupdf/fitz/bitmap.h b/misc/mupdf/include/mupdf/fitz/bitmap.h index 502b897..bf41035 100644 --- a/misc/mupdf/include/mupdf/fitz/bitmap.h +++ b/misc/mupdf/include/mupdf/fitz/bitmap.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_BITMAP_H #define MUPDF_FITZ_BITMAP_H @@ -60,6 +60,13 @@ fz_bitmap *fz_keep_bitmap(fz_context *ctx, fz_bitmap *bit); */ void fz_drop_bitmap(fz_context *ctx, fz_bitmap *bit); +/** + Invert bitmap. + + Never throws exceptions. +*/ +void fz_invert_bitmap(fz_context *ctx, fz_bitmap *bmp); + /** A halftone is a set of threshold tiles, one per component. Each threshold tile is a pixmap, possibly of varying sizes and diff --git a/misc/mupdf/include/mupdf/fitz/buffer.h b/misc/mupdf/include/mupdf/fitz/buffer.h index 8c9f097..5ac949c 100644 --- a/misc/mupdf/include/mupdf/fitz/buffer.h +++ b/misc/mupdf/include/mupdf/fitz/buffer.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2023 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_BUFFER_H #define MUPDF_FITZ_BUFFER_H @@ -104,6 +104,12 @@ fz_buffer *fz_new_buffer_from_shared_data(fz_context *ctx, const unsigned char * */ fz_buffer *fz_new_buffer_from_copied_data(fz_context *ctx, const unsigned char *data, size_t size); +/** + Make a new buffer, containing a copy of the data used in + the original. +*/ +fz_buffer *fz_clone_buffer(fz_context *ctx, fz_buffer *buf); + /** Create a new buffer with data decoded from a base64 input string. */ @@ -138,6 +144,16 @@ void fz_trim_buffer(fz_context *ctx, fz_buffer *buf); */ void fz_clear_buffer(fz_context *ctx, fz_buffer *buf); +/** + Create a new buffer with a (subset of) the data from the buffer. + + start: if >= 0, offset from start of buffer, if < 0 offset from end of buffer. + + end: if >= 0, offset from start of buffer, if < 0 offset from end of buffer. + +*/ +fz_buffer *fz_slice_buffer(fz_context *ctx, fz_buffer *buf, int64_t start, int64_t end); + /** Append the contents of the source buffer onto the end of the destination buffer, extending automatically as required. @@ -146,6 +162,16 @@ void fz_clear_buffer(fz_context *ctx, fz_buffer *buf); */ void fz_append_buffer(fz_context *ctx, fz_buffer *destination, fz_buffer *source); +/** + Write a base64 encoded data block, optionally with periodic newlines. +*/ +void fz_append_base64(fz_context *ctx, fz_buffer *out, const unsigned char *data, size_t size, int newline); + +/** + Append a base64 encoded fz_buffer, optionally with periodic newlines. +*/ +void fz_append_base64_buffer(fz_context *ctx, fz_buffer *out, fz_buffer *data, int newline); + /** fz_append_*: Append data to a buffer. diff --git a/misc/mupdf/include/mupdf/fitz/color.h b/misc/mupdf/include/mupdf/fitz/color.h index 1b028c0..0a7e985 100644 --- a/misc/mupdf/include/mupdf/fitz/color.h +++ b/misc/mupdf/include/mupdf/fitz/color.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_COLOR_H #define MUPDF_FITZ_COLOR_H @@ -254,6 +254,18 @@ int fz_colorspace_is_lab_icc(fz_context *ctx, fz_colorspace *cs); */ int fz_is_valid_blend_colorspace(fz_context *ctx, fz_colorspace *cs); +/** + Get the 'base' colorspace for a colorspace. + + For indexed colorspaces, this is the colorspace the index + decodes into. For all other colorspaces, it is the colorspace + itself. + + The returned colorspace is 'borrowed' (i.e. no additional + references are taken or dropped). +*/ +fz_colorspace *fz_base_colorspace(fz_context *ctx, fz_colorspace *cs); + /** Retrieve global default colorspaces. diff --git a/misc/mupdf/include/mupdf/fitz/compress.h b/misc/mupdf/include/mupdf/fitz/compress.h index fde9f66..9acf1cf 100644 --- a/misc/mupdf/include/mupdf/fitz/compress.h +++ b/misc/mupdf/include/mupdf/fitz/compress.h @@ -17,13 +17,15 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_COMPRESS_H #define MUPDF_FITZ_COMPRESS_H #include "mupdf/fitz/system.h" +#include "mupdf/fitz/buffer.h" +#include "mupdf/fitz/pixmap.h" typedef enum { @@ -74,13 +76,13 @@ unsigned char *fz_new_deflated_data_from_buffer(fz_context *ctx, size_t *compres Creates a stream assuming the default PDF parameters, except the number of columns. */ -fz_buffer *fz_compress_ccitt_fax_g3(fz_context *ctx, const unsigned char *data, int columns, int rows); +fz_buffer *fz_compress_ccitt_fax_g3(fz_context *ctx, const unsigned char *data, int columns, int rows, ptrdiff_t stride); /** Compress bitmap data as CCITT Group 4 2D fax image. Creates a stream assuming the default PDF parameters, except K=-1 and the number of columns. */ -fz_buffer *fz_compress_ccitt_fax_g4(fz_context *ctx, const unsigned char *data, int columns, int rows); +fz_buffer *fz_compress_ccitt_fax_g4(fz_context *ctx, const unsigned char *data, int columns, int rows, ptrdiff_t stride); #endif diff --git a/misc/mupdf/include/mupdf/fitz/compressed-buffer.h b/misc/mupdf/include/mupdf/fitz/compressed-buffer.h index 852648c..5f88649 100644 --- a/misc/mupdf/include/mupdf/fitz/compressed-buffer.h +++ b/misc/mupdf/include/mupdf/fitz/compressed-buffer.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2023 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_COMPRESSED_BUFFER_H #define MUPDF_FITZ_COMPRESSED_BUFFER_H @@ -39,6 +39,7 @@ typedef struct union { struct { int color_transform; /* Use -1 for unset */ + int invert_cmyk; /* Use 1 for standalone JPEG files */ } jpeg; struct { int smask_in_data; @@ -82,10 +83,16 @@ typedef struct */ typedef struct { + int refs; fz_compression_params params; fz_buffer *buffer; } fz_compressed_buffer; +/** + Take a reference to an fz_compressed_buffer. +*/ +fz_compressed_buffer *fz_keep_compressed_buffer(fz_context *ctx, fz_compressed_buffer *cbuf); + /** Return the storage size used for a buffer and its data. Used in implementing store handling. @@ -123,6 +130,19 @@ fz_stream *fz_open_image_decomp_stream(fz_context *ctx, fz_stream *, fz_compress */ int fz_recognize_image_format(fz_context *ctx, unsigned char p[8]); +/** + Map from FZ_IMAGE_* value to string. + + The returned string is static and therefore must not be freed. +*/ +const char *fz_image_type_name(int type); + +/** + Map from (case sensitive) image type string to FZ_IMAGE_* + type value. +*/ +int fz_lookup_image_type(const char *type); + enum { FZ_IMAGE_UNKNOWN = 0, @@ -146,6 +166,7 @@ enum FZ_IMAGE_PNG, FZ_IMAGE_PNM, FZ_IMAGE_TIFF, + FZ_IMAGE_PSD, }; /** @@ -156,4 +177,9 @@ enum */ void fz_drop_compressed_buffer(fz_context *ctx, fz_compressed_buffer *buf); +/** + Create a new, UNKNOWN format, compressed_buffer. +*/ +fz_compressed_buffer *fz_new_compressed_buffer(fz_context *ctx); + #endif diff --git a/misc/mupdf/include/mupdf/fitz/config.h b/misc/mupdf/include/mupdf/fitz/config.h index b300617..553e69f 100644 --- a/misc/mupdf/include/mupdf/fitz/config.h +++ b/misc/mupdf/include/mupdf/fitz/config.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef FZ_CONFIG_H diff --git a/misc/mupdf/include/mupdf/fitz/context.h b/misc/mupdf/include/mupdf/fitz/context.h index 0ecdb6e..8277c51 100644 --- a/misc/mupdf/include/mupdf/fitz/context.h +++ b/misc/mupdf/include/mupdf/fitz/context.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_CONTEXT_H #define MUPDF_FITZ_CONTEXT_H @@ -27,6 +27,11 @@ #include "mupdf/fitz/system.h" #include "mupdf/fitz/geometry.h" + +#ifndef FZ_VERBOSE_EXCEPTIONS +#define FZ_VERBOSE_EXCEPTIONS 0 +#endif + typedef struct fz_font_context fz_font_context; typedef struct fz_colorspace_context fz_colorspace_context; typedef struct fz_style_context fz_style_context; @@ -34,6 +39,7 @@ typedef struct fz_tuning_context fz_tuning_context; typedef struct fz_store fz_store; typedef struct fz_glyph_cache fz_glyph_cache; typedef struct fz_document_handler_context fz_document_handler_context; +typedef struct fz_archive_handler_context fz_archive_handler_context; typedef struct fz_output fz_output; typedef struct fz_context fz_context; @@ -52,35 +58,182 @@ typedef struct Exception macro definitions. Just treat these as a black box - pay no attention to the man behind the curtain. */ - #define fz_var(var) fz_var_imp((void *)&(var)) #define fz_try(ctx) if (!fz_setjmp(*fz_push_try(ctx))) if (fz_do_try(ctx)) do #define fz_always(ctx) while (0); if (fz_do_always(ctx)) do #define fz_catch(ctx) while (0); if (fz_do_catch(ctx)) +/** + These macros provide a simple exception handling system. Use them as + follows: + + fz_try(ctx) + ... + fz_catch(ctx) + ... + + or as: + + fz_try(ctx) + ... + fz_always(ctx) + ... + fz_catch(ctx) + ... + + Code within the fz_try() section can then throw exceptions using fz_throw() + (or fz_vthrow()). + + They are implemented with setjmp/longjmp, which can have unfortunate + consequences for 'losing' local variable values on a throw. To avoid this + we recommend calling 'fz_var(variable)' before the fz_try() for any + local variable whose value may change within the fz_try() block and whose + value will be required afterwards. + + Do not call anything in the fz_always() section that can throw. + + Any exception can be rethrown from the fz_catch() section using fz_rethrow() + as long as there has been no intervening use of fz_try/fz_catch. +*/ + +/** + Throw an exception. + + This assumes an enclosing fz_try() block within the callstack. +*/ FZ_NORETURN void fz_vthrow(fz_context *ctx, int errcode, const char *, va_list ap); FZ_NORETURN void fz_throw(fz_context *ctx, int errcode, const char *, ...) FZ_PRINTFLIKE(3,4); FZ_NORETURN void fz_rethrow(fz_context *ctx); + +/** + Called within a catch block this modifies the current + exception's code. If it's of type 'fromcode' it is + modified to 'tocode'. Typically used for 'downgrading' + exception severity. +*/ +void fz_morph_error(fz_context *ctx, int fromcode, int tocode); + +/** + Log a warning. + + This goes to the registered warning stream (stderr by + default). +*/ void fz_vwarn(fz_context *ctx, const char *fmt, va_list ap); void fz_warn(fz_context *ctx, const char *fmt, ...) FZ_PRINTFLIKE(2,3); + +/** + Within an fz_catch() block, retrieve the formatted message + string for the current exception. + + This assumes no intervening use of fz_try/fz_catch. +*/ const char *fz_caught_message(fz_context *ctx); + +/** + Within an fz_catch() block, retrieve the error code for + the current exception. + + This assumes no intervening use of fz_try/fz_catch. +*/ int fz_caught(fz_context *ctx); + +/* + Within an fz_catch() block, retrieve the errno code for + the current SYSTEM exception. + + Is undefined for non-SYSTEM errors. +*/ +int fz_caught_errno(fz_context *ctx); + +/** + Within an fz_catch() block, rethrow the current exception + if the errcode of the current exception matches. + + This assumes no intervening use of fz_try/fz_catch. +*/ void fz_rethrow_if(fz_context *ctx, int errcode); +void fz_rethrow_unless(fz_context *ctx, int errcode); + +/** + Format an error message, and log it to the registered + error stream (stderr by default). +*/ +void fz_log_error_printf(fz_context *ctx, const char *fmt, ...) FZ_PRINTFLIKE(2,3); +void fz_vlog_error_printf(fz_context *ctx, const char *fmt, va_list ap); + +/** + Log a (preformatted) string to the registered + error stream (stderr by default). +*/ +void fz_log_error(fz_context *ctx, const char *str); void fz_start_throw_on_repair(fz_context *ctx); void fz_end_throw_on_repair(fz_context *ctx); -enum +/** + Now, a debugging feature. If FZ_VERBOSE_EXCEPTIONS is 1 then + some of the above functions are replaced by versions that print + FILE and LINE information. +*/ +#if FZ_VERBOSE_EXCEPTIONS +#define fz_vthrow(CTX, ERRCODE, FMT, VA) fz_vthrowFL(CTX, __FILE__, __LINE__, ERRCODE, FMT, VA) +#define fz_throw(CTX, ERRCODE, ...) fz_throwFL(CTX, __FILE__, __LINE__, ERRCODE, __VA_ARGS__) +#define fz_rethrow(CTX) fz_rethrowFL(CTX, __FILE__, __LINE__) +#define fz_morph_error(CTX, FROM, TO) fz_morph_errorFL(CTX, __FILE__, __LINE__, FROM, TO) +#define fz_vwarn(CTX, FMT, VA) fz_vwarnFL(CTX, __FILE__, __LINE__, FMT, VA) +#define fz_warn(CTX, ...) fz_warnFL(CTX, __FILE__, __LINE__, __VA_ARGS__) +#define fz_rethrow_if(CTX, ERRCODE) fz_rethrow_ifFL(CTX, __FILE__, __LINE__, ERRCODE) +#define fz_rethrow_unless(CTX, ERRCODE) fz_rethrow_unlessFL(CTX, __FILE__, __LINE__, ERRCODE) +#define fz_log_error_printf(CTX, ...) fz_log_error_printfFL(CTX, __FILE__, __LINE__, __VA_ARGS__) +#define fz_vlog_error_printf(CTX, FMT, VA) fz_log_error_printfFL(CTX, __FILE__, __LINE__, FMT, VA) +#define fz_log_error(CTX, STR) fz_log_error_printfFL(CTX, __FILE__, __LINE__, STR) +#define fz_do_catch(CTX) fz_do_catchFL(CTX, __FILE__, __LINE__) +FZ_NORETURN void fz_vthrowFL(fz_context *ctx, const char *file, int line, int errcode, const char *fmt, va_list ap); +FZ_NORETURN void fz_throwFL(fz_context *ctx, const char *file, int line, int errcode, const char *fmt, ...) FZ_PRINTFLIKE(5,6); +FZ_NORETURN void fz_rethrowFL(fz_context *ctx, const char *file, int line); +void fz_morph_errorFL(fz_context *ctx, const char *file, int line, int fromcode, int tocode); +void fz_vwarnFL(fz_context *ctx, const char *file, int line, const char *fmt, va_list ap); +void fz_warnFL(fz_context *ctx, const char *file, int line, const char *fmt, ...) FZ_PRINTFLIKE(4,5); +void fz_rethrow_ifFL(fz_context *ctx, const char *file, int line, int errcode); +void fz_rethrow_unlessFL(fz_context *ctx, const char *file, int line, int errcode); +void fz_log_error_printfFL(fz_context *ctx, const char *file, int line, const char *fmt, ...) FZ_PRINTFLIKE(4,5); +void fz_vlog_error_printfFL(fz_context *ctx, const char *file, int line, const char *fmt, va_list ap); +void fz_log_errorFL(fz_context *ctx, const char *file, int line, const char *str); +int fz_do_catchFL(fz_context *ctx, const char *file, int line); +#endif + +/* Report an error to the registered error callback. */ +void fz_report_error(fz_context *ctx); + +/* + * Swallow an error and ignore it completely. + * This should only be called to signal that you've handled a TRYLATER or ABORT error, + */ +void fz_ignore_error(fz_context *ctx); + +/* Convert an error into another runtime exception. + * For use when converting an exception from Fitz to a language binding exception. + */ +const char *fz_convert_error(fz_context *ctx, int *code); + +enum fz_error_type { - FZ_ERROR_NONE = 0, - FZ_ERROR_MEMORY = 1, - FZ_ERROR_GENERIC = 2, - FZ_ERROR_SYNTAX = 3, - FZ_ERROR_MINOR = 4, - FZ_ERROR_TRYLATER = 5, - FZ_ERROR_ABORT = 6, - FZ_ERROR_REPAIRED = 7, - FZ_ERROR_COUNT + FZ_ERROR_NONE, + FZ_ERROR_GENERIC, + + FZ_ERROR_SYSTEM, // fatal out of memory or syscall error + FZ_ERROR_LIBRARY, // unclassified error from third-party library + FZ_ERROR_ARGUMENT, // invalid or out-of-range arguments to functions + FZ_ERROR_LIMIT, // failed because of resource or other hard limits + FZ_ERROR_UNSUPPORTED, // tried to use an unsupported feature + FZ_ERROR_FORMAT, // syntax or format errors that are unrecoverable + FZ_ERROR_SYNTAX, // syntax errors that should be diagnosed and ignored + + // for internal use only + FZ_ERROR_TRYLATER, // try-later progressive loading signal + FZ_ERROR_ABORT, // user requested abort signal + FZ_ERROR_REPAIRED, // internal flag used when repairing a PDF to avoid cycles }; /** @@ -258,13 +411,35 @@ void fz_default_error_callback(void *user, const char *message); */ void fz_default_warning_callback(void *user, const char *message); +/** + A callback called whenever an error message is generated. + The user pointer passed to fz_set_error_callback() is passed + along with the error message. +*/ +typedef void (fz_error_cb)(void *user, const char *message); + +/** + A callback called whenever a warning message is generated. + The user pointer passed to fz_set_warning_callback() is + passed along with the warning message. +*/ +typedef void (fz_warning_cb)(void *user, const char *message); + /** Set the error callback. This will be called as part of the exception handling. The callback must not throw exceptions! */ -void fz_set_error_callback(fz_context *ctx, void (*print)(void *user, const char *message), void *user); +void fz_set_error_callback(fz_context *ctx, fz_error_cb *error_cb, void *user); + +/** + Retrieve the currently set error callback, or NULL if none + has been set. Optionally, if user is non-NULL, the user pointer + given when the warning callback was set is also passed back to + the caller. +*/ +fz_error_cb *fz_error_callback(fz_context *ctx, void **user); /** Set the warning callback. This will be called as part of the @@ -272,7 +447,15 @@ void fz_set_error_callback(fz_context *ctx, void (*print)(void *user, const char The callback must not throw exceptions! */ -void fz_set_warning_callback(fz_context *ctx, void (*print)(void *user, const char *message), void *user); +void fz_set_warning_callback(fz_context *ctx, fz_warning_cb *warning_cb, void *user); + +/** + Retrieve the currently set warning callback, or NULL if none + has been set. Optionally, if user is non-NULL, the user pointer + given when the warning callback was set is also passed back to + the caller. +*/ +fz_warning_cb *fz_warning_callback(fz_context *ctx, void **user); /** In order to tune MuPDF's behaviour, certain functions can @@ -541,6 +724,34 @@ char *fz_strdup(fz_context *ctx, const char *s); */ void fz_memrnd(fz_context *ctx, uint8_t *block, int len); +/* + Reference counted malloced C strings. +*/ +typedef struct +{ + int refs; + char str[1]; +} fz_string; + +/* + Allocate a new string to hold a copy of str. + + Returns with a refcount of 1. +*/ +fz_string *fz_new_string(fz_context *ctx, const char *str); + +/* + Take another reference to a string. +*/ +fz_string *fz_keep_string(fz_context *ctx, fz_string *str); + +/* + Drop a reference to a string, freeing if the refcount + reaches 0. +*/ +void fz_drop_string(fz_context *ctx, fz_string *str); + +#define fz_cstring_from_string(A) ((A) == NULL ? NULL : (A)->str) /* Implementation details: subject to change. */ @@ -550,19 +761,27 @@ void fz_var_imp(void *); fz_jmp_buf *fz_push_try(fz_context *ctx); int fz_do_try(fz_context *ctx); int fz_do_always(fz_context *ctx); -int fz_do_catch(fz_context *ctx); +int (fz_do_catch)(fz_context *ctx); + +#ifndef FZ_JMPBUF_ALIGN +#define FZ_JMPBUF_ALIGN 32 +#endif typedef struct { - int state, code; fz_jmp_buf buffer; + int state, code; + char padding[FZ_JMPBUF_ALIGN-sizeof(int)*2]; } fz_error_stack_slot; typedef struct { fz_error_stack_slot *top; fz_error_stack_slot stack[256]; + fz_error_stack_slot padding; + fz_error_stack_slot *stack_base; int errcode; + int errnum; /* errno for SYSTEM class errors */ void *print_user; void (*print)(void *user, const char *message); char message[256]; @@ -604,6 +823,7 @@ struct fz_context /* TODO: should these be unshared? */ fz_document_handler_context *handler; + fz_archive_handler_context *archive; fz_style_context *style; fz_tuning_context *tuning; @@ -671,6 +891,21 @@ fz_keep_imp_locked(fz_context *ctx FZ_UNUSED, void *p, int *refs) return p; } +static inline void * +fz_keep_imp8_locked(fz_context *ctx FZ_UNUSED, void *p, int8_t *refs) +{ + if (p) + { + (void)Memento_checkIntPointerOrNull(refs); + if (*refs > 0) + { + (void)Memento_takeRef(p); + ++*refs; + } + } + return p; +} + static inline void * fz_keep_imp8(fz_context *ctx, void *p, int8_t *refs) { diff --git a/misc/mupdf/include/mupdf/fitz/crypt.h b/misc/mupdf/include/mupdf/fitz/crypt.h index 7b852d7..e556f3b 100644 --- a/misc/mupdf/include/mupdf/fitz/crypt.h +++ b/misc/mupdf/include/mupdf/fitz/crypt.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_CRYPT_H #define MUPDF_FITZ_CRYPT_H @@ -33,8 +33,8 @@ */ typedef struct { - unsigned int state[4]; - unsigned int count[2]; + uint32_t lo, hi; + uint32_t a, b, c, d; unsigned char buffer[64]; } fz_md5; diff --git a/misc/mupdf/include/mupdf/fitz/device.h b/misc/mupdf/include/mupdf/fitz/device.h index b062524..9f51c21 100644 --- a/misc/mupdf/include/mupdf/fitz/device.h +++ b/misc/mupdf/include/mupdf/fitz/device.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2023 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_DEVICE_H #define MUPDF_FITZ_DEVICE_H @@ -106,6 +106,63 @@ int fz_lookup_blendmode(const char *name); */ const char *fz_blendmode_name(int blendmode); +/* + Generic function type. + + Different function implementations will derive from this. +*/ +typedef struct fz_function fz_function; + +typedef void (fz_function_eval_fn)(fz_context *, fz_function *, const float *, float *); + +enum +{ + FZ_FUNCTION_MAX_N = FZ_MAX_COLORS, + FZ_FUNCTION_MAX_M = FZ_MAX_COLORS +}; + +struct fz_function +{ + fz_storable storable; + size_t size; + int m; /* number of input values */ + int n; /* number of output values */ + + fz_function_eval_fn *eval; +}; + +fz_function *fz_new_function_of_size(fz_context *ctx, int size, size_t size2, int m, int n, fz_function_eval_fn *eval, fz_store_drop_fn *drop); + +#define fz_new_derived_function(CTX,TYPE,SIZE,M,N,EVAL,DROP) \ + ((TYPE*)Memento_label(fz_new_function_of_size(CTX,sizeof(TYPE),SIZE,M,N,EVAL,DROP), #TYPE)) + +/* + Evaluate a function. + + Input vector = (in[0], ..., in[inlen-1]) + Output vector = (out[0], ..., out[outlen-1]) + + If inlen or outlen do not match that expected by the function, this + routine will truncate or extend the input/output (with 0's) as + required. +*/ +void fz_eval_function(fz_context *ctx, fz_function *func, const float *in, int inlen, float *out, int outlen); + +/* + Keep a function reference. +*/ +fz_function *fz_keep_function(fz_context *ctx, fz_function *func); + +/* + Drop a function reference. +*/ +void fz_drop_function(fz_context *ctx, fz_function *func); + +/* + Function size +*/ +size_t fz_function_size(fz_context *ctx, fz_function *func); + /** The device structure is public to allow devices to be implemented outside of fitz. @@ -134,6 +191,103 @@ enum fz_device_container_stack_is_tile, }; +/* Structure types */ +typedef enum +{ + FZ_STRUCTURE_INVALID = -1, + + /* Grouping elements (PDF 1.7 - Table 10.20) */ + FZ_STRUCTURE_DOCUMENT, + FZ_STRUCTURE_PART, + FZ_STRUCTURE_ART, + FZ_STRUCTURE_SECT, + FZ_STRUCTURE_DIV, + FZ_STRUCTURE_BLOCKQUOTE, + FZ_STRUCTURE_CAPTION, + FZ_STRUCTURE_TOC, + FZ_STRUCTURE_TOCI, + FZ_STRUCTURE_INDEX, + FZ_STRUCTURE_NONSTRUCT, + FZ_STRUCTURE_PRIVATE, + /* Grouping elements (PDF 2.0 - Table 364) */ + FZ_STRUCTURE_DOCUMENTFRAGMENT, + /* Grouping elements (PDF 2.0 - Table 365) */ + FZ_STRUCTURE_ASIDE, + /* Grouping elements (PDF 2.0 - Table 366) */ + FZ_STRUCTURE_TITLE, + FZ_STRUCTURE_FENOTE, + /* Grouping elements (PDF 2.0 - Table 367) */ + FZ_STRUCTURE_SUB, + + /* Paragraphlike elements (PDF 1.7 - Table 10.21) */ + FZ_STRUCTURE_P, + FZ_STRUCTURE_H, + FZ_STRUCTURE_H1, + FZ_STRUCTURE_H2, + FZ_STRUCTURE_H3, + FZ_STRUCTURE_H4, + FZ_STRUCTURE_H5, + FZ_STRUCTURE_H6, + + /* List elements (PDF 1.7 - Table 10.23) */ + FZ_STRUCTURE_LIST, + FZ_STRUCTURE_LISTITEM, + FZ_STRUCTURE_LABEL, + FZ_STRUCTURE_LISTBODY, + + /* Table elements (PDF 1.7 - Table 10.24) */ + FZ_STRUCTURE_TABLE, + FZ_STRUCTURE_TR, + FZ_STRUCTURE_TH, + FZ_STRUCTURE_TD, + FZ_STRUCTURE_THEAD, + FZ_STRUCTURE_TBODY, + FZ_STRUCTURE_TFOOT, + + /* Inline elements (PDF 1.7 - Table 10.25) */ + FZ_STRUCTURE_SPAN, + FZ_STRUCTURE_QUOTE, + FZ_STRUCTURE_NOTE, + FZ_STRUCTURE_REFERENCE, + FZ_STRUCTURE_BIBENTRY, + FZ_STRUCTURE_CODE, + FZ_STRUCTURE_LINK, + FZ_STRUCTURE_ANNOT, + /* Inline elements (PDF 2.0 - Table 368) */ + FZ_STRUCTURE_EM, + FZ_STRUCTURE_STRONG, + + /* Ruby inline element (PDF 1.7 - Table 10.26) */ + FZ_STRUCTURE_RUBY, + FZ_STRUCTURE_RB, + FZ_STRUCTURE_RT, + FZ_STRUCTURE_RP, + + /* Warichu inline element (PDF 1.7 - Table 10.26) */ + FZ_STRUCTURE_WARICHU, + FZ_STRUCTURE_WT, + FZ_STRUCTURE_WP, + + /* Illustration elements (PDF 1.7 - Table 10.27) */ + FZ_STRUCTURE_FIGURE, + FZ_STRUCTURE_FORMULA, + FZ_STRUCTURE_FORM, + + /* Artifact structure type (PDF 2.0 - Table 375) */ + FZ_STRUCTURE_ARTIFACT +} fz_structure; + +const char *fz_structure_to_string(fz_structure type); +fz_structure fz_structure_from_string(const char *str); + +typedef enum +{ + FZ_METATEXT_ACTUALTEXT, + FZ_METATEXT_ALT, + FZ_METATEXT_ABBREVIATION, + FZ_METATEXT_TITLE +} fz_metatext; + struct fz_device { int refs; @@ -162,7 +316,7 @@ struct fz_device void (*pop_clip)(fz_context *, fz_device *); void (*begin_mask)(fz_context *, fz_device *, fz_rect area, int luminosity, fz_colorspace *, const float *bc, fz_color_params ); - void (*end_mask)(fz_context *, fz_device *); + void (*end_mask)(fz_context *, fz_device *, fz_function *fn); void (*begin_group)(fz_context *, fz_device *, fz_rect area, fz_colorspace *cs, int isolated, int knockout, int blendmode, float alpha); void (*end_group)(fz_context *, fz_device *); @@ -175,6 +329,12 @@ struct fz_device void (*begin_layer)(fz_context *, fz_device *, const char *layer_name); void (*end_layer)(fz_context *, fz_device *); + void (*begin_structure)(fz_context *, fz_device *, fz_structure standard, const char *raw, int idx); + void (*end_structure)(fz_context *, fz_device *); + + void (*begin_metatext)(fz_context *, fz_device *, fz_metatext meta, const char *text); + void (*end_metatext)(fz_context *, fz_device *); + fz_rect d1_rect; int container_len; @@ -201,6 +361,7 @@ void fz_fill_image_mask(fz_context *ctx, fz_device *dev, fz_image *image, fz_mat void fz_clip_image_mask(fz_context *ctx, fz_device *dev, fz_image *image, fz_matrix ctm, fz_rect scissor); void fz_begin_mask(fz_context *ctx, fz_device *dev, fz_rect area, int luminosity, fz_colorspace *colorspace, const float *bc, fz_color_params color_params); void fz_end_mask(fz_context *ctx, fz_device *dev); +void fz_end_mask_tr(fz_context *ctx, fz_device *dev, fz_function *fn); void fz_begin_group(fz_context *ctx, fz_device *dev, fz_rect area, fz_colorspace *cs, int isolated, int knockout, int blendmode, float alpha); void fz_end_group(fz_context *ctx, fz_device *dev); void fz_begin_tile(fz_context *ctx, fz_device *dev, fz_rect area, fz_rect view, float xstep, float ystep, fz_matrix ctm); @@ -210,6 +371,10 @@ void fz_render_flags(fz_context *ctx, fz_device *dev, int set, int clear); void fz_set_default_colorspaces(fz_context *ctx, fz_device *dev, fz_default_colorspaces *default_cs); void fz_begin_layer(fz_context *ctx, fz_device *dev, const char *layer_name); void fz_end_layer(fz_context *ctx, fz_device *dev); +void fz_begin_structure(fz_context *ctx, fz_device *dev, fz_structure standard, const char *raw, int idx); +void fz_end_structure(fz_context *ctx, fz_device *dev); +void fz_begin_metatext(fz_context *ctx, fz_device *dev, fz_metatext meta, const char *text); +void fz_end_metatext(fz_context *ctx, fz_device *dev); /** Devices are created by calls to device implementations, for @@ -267,6 +432,7 @@ enum /* Hints */ FZ_DONT_INTERPOLATE_IMAGES = 1, FZ_NO_CACHE = 2, + FZ_DONT_DECODE_IMAGES = 4 }; /** diff --git a/misc/mupdf/include/mupdf/fitz/display-list.h b/misc/mupdf/include/mupdf/fitz/display-list.h index cba3ac5..957efe4 100644 --- a/misc/mupdf/include/mupdf/fitz/display-list.h +++ b/misc/mupdf/include/mupdf/fitz/display-list.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_DISPLAY_LIST_H #define MUPDF_FITZ_DISPLAY_LIST_H diff --git a/misc/mupdf/include/mupdf/fitz/document.h b/misc/mupdf/include/mupdf/fitz/document.h index 1eaec41..a233407 100644 --- a/misc/mupdf/include/mupdf/fitz/document.h +++ b/misc/mupdf/include/mupdf/fitz/document.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2023 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,13 +17,14 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_DOCUMENT_H #define MUPDF_FITZ_DOCUMENT_H #include "mupdf/fitz/system.h" +#include "mupdf/fitz/types.h" #include "mupdf/fitz/context.h" #include "mupdf/fitz/geometry.h" #include "mupdf/fitz/device.h" @@ -32,23 +33,22 @@ #include "mupdf/fitz/outline.h" #include "mupdf/fitz/separation.h" -typedef struct fz_document fz_document; typedef struct fz_document_handler fz_document_handler; typedef struct fz_page fz_page; typedef intptr_t fz_bookmark; -/** - Locations within the document are referred to in terms of - chapter and page, rather than just a page number. For some - documents (such as epub documents with large numbers of pages - broken into many chapters) this can make navigation much faster - as only the required chapter needs to be decoded at a time. -*/ -typedef struct +typedef enum { - int chapter; - int page; -} fz_location; + FZ_MEDIA_BOX, + FZ_CROP_BOX, + FZ_BLEED_BOX, + FZ_TRIM_BOX, + FZ_ART_BOX, + FZ_UNKNOWN_BOX +} fz_box_type; + +fz_box_type fz_box_type_from_string(const char *name); +const char *fz_string_from_box_type(fz_box_type box); /** Simple constructor for fz_locations. @@ -108,6 +108,10 @@ typedef enum FZ_PERMISSION_COPY = 'c', FZ_PERMISSION_EDIT = 'e', FZ_PERMISSION_ANNOTATE = 'n', + FZ_PERMISSION_FORM = 'f', + FZ_PERMISSION_ACCESSIBILITY = 'y', + FZ_PERMISSION_ASSEMBLE = 'a', + FZ_PERMISSION_PRINT_HQ = 'h', } fz_permission; @@ -148,6 +152,12 @@ typedef int (fz_document_has_permission_fn)(fz_context *ctx, fz_document *doc, f */ typedef fz_outline *(fz_document_load_outline_fn)(fz_context *ctx, fz_document *doc); +/** + Type for a function to be called to obtain an outline iterator + for a document. See fz_document_outline_iterator for more information. +*/ +typedef fz_outline_iterator *(fz_document_outline_iterator_fn)(fz_context *ctx, fz_document *doc); + /** Type for a function to be called to lay out a document. See fz_layout_document for more information. @@ -157,9 +167,16 @@ typedef void (fz_document_layout_fn)(fz_context *ctx, fz_document *doc, float w, /** Type for a function to be called to resolve an internal link to a location (chapter/page number - tuple). See fz_resolve_link for more information. + tuple). See fz_resolve_link_dest for more information. */ -typedef fz_location (fz_document_resolve_link_fn)(fz_context *ctx, fz_document *doc, const char *uri, float *xp, float *yp); +typedef fz_link_dest (fz_document_resolve_link_dest_fn)(fz_context *ctx, fz_document *doc, const char *uri); + +/** + Type for a function to be called to + create an internal link to a destination (chapter/page/x/y/w/h/zoom/type + tuple). See fz_resolve_link_dest for more information. +*/ +typedef char * (fz_document_format_link_uri_fn)(fz_context *ctx, fz_document *doc, fz_link_dest dest); /** Type for a function to be called to @@ -181,12 +198,25 @@ typedef int (fz_document_count_pages_fn)(fz_context *ctx, fz_document *doc, int */ typedef fz_page *(fz_document_load_page_fn)(fz_context *ctx, fz_document *doc, int chapter, int page); +/** + Type for a function to get the page label of a page in the document. + See fz_page_label for more information. +*/ +typedef void (fz_document_page_label_fn)(fz_context *ctx, fz_document *doc, int chapter, int page, char *buf, size_t size); + /** Type for a function to query - a documents metadata. See fz_lookup_metadata for more + a document's metadata. See fz_lookup_metadata for more + information. +*/ +typedef int (fz_document_lookup_metadata_fn)(fz_context *ctx, fz_document *doc, const char *key, char *buf, size_t size); + +/** + Type for a function to set + a document's metadata. See fz_set_metadata for more information. */ -typedef int (fz_document_lookup_metadata_fn)(fz_context *ctx, fz_document *doc, const char *key, char *buf, int size); +typedef int (fz_document_set_metadata_fn)(fz_context *ctx, fz_document *doc, const char *key, const char *value); /** Return output intent color space if it exists @@ -198,6 +228,11 @@ typedef fz_colorspace *(fz_document_output_intent_fn)(fz_context *ctx, fz_docume */ typedef void (fz_document_output_accelerator_fn)(fz_context *ctx, fz_document *doc, fz_output *out); +/** + Send document structure to device +*/ +typedef void (fz_document_run_structure_fn)(fz_context *ctx, fz_document *doc, fz_device *dev, fz_cookie *cookie); + /** Type for a function to make a bookmark. See fz_make_bookmark for more information. @@ -222,7 +257,7 @@ typedef void (fz_page_drop_page_fn)(fz_context *ctx, fz_page *page); bounding box of a page. See fz_bound_page for more information. */ -typedef fz_rect (fz_page_bound_page_fn)(fz_context *ctx, fz_page *page); +typedef fz_rect (fz_page_bound_page_fn)(fz_context *ctx, fz_page *page, fz_box_type box); /** Type for a function to run the @@ -279,13 +314,9 @@ typedef int (fz_page_uses_overprint_fn)(fz_context *ctx, fz_page *page); typedef fz_link *(fz_page_create_link_fn)(fz_context *ctx, fz_page *page, fz_rect bbox, const char *uri); /** - Function type to open a document from a file. - - filename: file to open - - Pointer to opened document. Throws exception in case of error. + Type for a function to delete a link on a page. */ -typedef fz_document *(fz_document_open_fn)(fz_context *ctx, const char *filename); +typedef void (fz_page_delete_link_fn)(fz_context *ctx, fz_page *page, fz_link *link); /** Function type to open a @@ -294,35 +325,16 @@ typedef fz_document *(fz_document_open_fn)(fz_context *ctx, const char *filename stream: fz_stream to read document data from. Must be seekable for formats that require it. - Pointer to opened document. Throws exception in case of error. -*/ -typedef fz_document *(fz_document_open_with_stream_fn)(fz_context *ctx, fz_stream *stream); - -/** - Function type to open a document from a - file, with accelerator data. - - filename: file to open - - accel: accelerator file - - Pointer to opened document. Throws exception in case of error. -*/ -typedef fz_document *(fz_document_open_accel_fn)(fz_context *ctx, const char *filename, const char *accel); - -/** - Function type to open a document from a file, - with accelerator data. + accel: fz_stream to read accelerator data from. May be + NULL. May be ignored. - stream: fz_stream to read document data from. Must be - seekable for formats that require it. - - accel: fz_stream to read accelerator data from. Must be - seekable for formats that require it. + dir: 'Directory context' in which the document is loaded; + associated content from (like images for an html stream + will be loaded from this). Maybe NULL. May be ignored. Pointer to opened document. Throws exception in case of error. */ -typedef fz_document *(fz_document_open_accel_with_stream_fn)(fz_context *ctx, fz_stream *stream, fz_stream *accel); +typedef fz_document *(fz_document_open_fn)(fz_context *ctx, fz_stream *stream, fz_stream *accel, fz_archive *dir); /** Recognize a document type from @@ -337,6 +349,20 @@ typedef fz_document *(fz_document_open_accel_with_stream_fn)(fz_context *ctx, fz */ typedef int (fz_document_recognize_fn)(fz_context *ctx, const char *magic); +/** + Recognize a document type from stream contents. + + stream: stream contents to recognise (may be NULL if document is + a directory). + + dir: directory context from which stream is loaded. + + Returns a number between 0 (not recognized) and 100 + (fully recognized) based on how certain the recognizer + is that this is of the required type. +*/ +typedef int (fz_document_recognize_content_fn)(fz_context *ctx, fz_stream *stream, fz_archive *dir); + /** Type for a function to be called when processing an already opened page. See fz_process_opened_pages. @@ -366,6 +392,42 @@ void fz_register_document_handlers(fz_context *ctx); */ const fz_document_handler *fz_recognize_document(fz_context *ctx, const char *magic); +/** + Given a filename find a document handler that can handle a + document of this type. + + filename: The filename of the document. This will be opened and sampled + to check data. +*/ +const fz_document_handler *fz_recognize_document_content(fz_context *ctx, const char *filename); + +/** + Given a magic find a document handler that can handle a + document of this type. + + stream: the file stream to sample. May be NULL if the document is + a directory. + + magic: Can be a filename extension (including initial period) or + a mimetype. +*/ +const fz_document_handler *fz_recognize_document_stream_content(fz_context *ctx, fz_stream *stream, const char *magic); + +/** + Given a magic find a document handler that can handle a + document of this type. + + stream: the file stream to sample. May be NULL if the document is + a directory. + + dir: an fz_archive representing the directory from which the + stream was opened (or NULL). + + magic: Can be a filename extension (including initial period) or + a mimetype. +*/ +const fz_document_handler *fz_recognize_document_stream_and_dir_content(fz_context *ctx, fz_stream *stream, fz_archive *dir, const char *magic); + /** Open a document file and read its basic structure so pages and objects can be located. MuPDF will try to repair broken @@ -396,6 +458,11 @@ fz_document *fz_open_accelerated_document(fz_context *ctx, const char *filename, magic: a string used to detect document type; either a file name or mime-type. + + stream: a stream representing the contents of the document file. + + NOTE: The caller retains ownership of 'stream' - the document will take its + own reference if required. */ fz_document *fz_open_document_with_stream(fz_context *ctx, const char *magic, fz_stream *stream); @@ -405,9 +472,55 @@ fz_document *fz_open_document_with_stream(fz_context *ctx, const char *magic, fz magic: a string used to detect document type; either a file name or mime-type. + + stream: a stream representing the contents of the document file. + + dir: a 'directory context' for those filetypes that need it. + + NOTE: The caller retains ownership of 'stream' and 'dir' - the document will + take its own references if required. +*/ +fz_document *fz_open_document_with_stream_and_dir(fz_context *ctx, const char *magic, fz_stream *stream, fz_archive *dir); + +/** + Open a document using a buffer rather than opening a file on disk. +*/ +fz_document *fz_open_document_with_buffer(fz_context *ctx, const char *magic, fz_buffer *buffer); + +/** + Open a document using the specified stream object rather than + opening a file on disk. + + magic: a string used to detect document type; either a file name + or mime-type. + + stream: a stream of the document contents. + + accel: NULL, or a stream of the 'accelerator' contents for this document. + + NOTE: The caller retains ownership of 'stream' and 'accel' - the document will + take its own references if required. */ fz_document *fz_open_accelerated_document_with_stream(fz_context *ctx, const char *magic, fz_stream *stream, fz_stream *accel); +/** + Open a document using the specified stream object rather than + opening a file on disk. + + magic: a string used to detect document type; either a file name + or mime-type. + + stream: a stream of the document contents. + + accel: NULL, or a stream of the 'accelerator' contents for this document. + + dir: NULL, or the 'directory context' for the stream contents. + + NOTE: The caller retains ownership of 'stream', 'accel' and 'dir' - the document will + take its own references if required. +*/ +fz_document *fz_open_accelerated_document_with_stream_and_dir(fz_context *ctx, const char *magic, fz_stream *stream, fz_stream *accel, fz_archive *dir); + /** Query if the document supports the saving of accelerator data. */ @@ -483,6 +596,13 @@ int fz_authenticate_password(fz_context *ctx, fz_document *doc, const char *pass */ fz_outline *fz_load_outline(fz_context *ctx, fz_document *doc); +/** + Get an iterator for the document outline. + + Should be freed by fz_drop_outline_iterator. +*/ +fz_outline_iterator *fz_new_outline_iterator(fz_context *ctx, fz_document *doc); + /** Is the document reflowable. @@ -517,6 +637,21 @@ fz_location fz_lookup_bookmark(fz_context *ctx, fz_document *doc, fz_bookmark ma */ int fz_count_pages(fz_context *ctx, fz_document *doc); +/** + Resolve an internal link to a page number, location, and possible viewing parameters. + + Returns location (-1,-1) if the URI cannot be resolved. +*/ +fz_link_dest fz_resolve_link_dest(fz_context *ctx, fz_document *doc, const char *uri); + +/** + Format an internal link to a page number, location, and possible viewing parameters, + suitable for use with fz_create_link. + + Returns a newly allocated string that the caller must free. +*/ +char *fz_format_link_uri(fz_context *ctx, fz_document *doc, fz_link_dest dest); + /** Resolve an internal link to a page number. @@ -526,6 +661,23 @@ int fz_count_pages(fz_context *ctx, fz_document *doc); */ fz_location fz_resolve_link(fz_context *ctx, fz_document *doc, const char *uri, float *xp, float *yp); +/** + Run the document structure through a device. + + doc: Document in question. + + dev: Device obtained from fz_new_*_device. + + cookie: Communication mechanism between caller and library. + Intended for multi-threaded applications, while + single-threaded applications set cookie to NULL. The + caller may abort an ongoing rendering of a page. Cookie also + communicates progress information back to the caller. The + fields inside cookie are continually updated while the page is + rendering. +*/ +void fz_run_document_structure(fz_context *ctx, fz_document *doc, fz_device *dev, fz_cookie *cookie); + /** Function to get the location for the last page in the document. Using this can be far more efficient in some cases than calling @@ -623,6 +775,7 @@ fz_page *fz_new_page_of_size(fz_context *ctx, int size, fz_document *doc); Determine the size of a page at 72 dpi. */ fz_rect fz_bound_page(fz_context *ctx, fz_page *page); +fz_rect fz_bound_page_box(fz_context *ctx, fz_page *page, fz_box_type box); /** Run a page through a device. @@ -715,6 +868,11 @@ void fz_drop_page(fz_context *ctx, fz_page *page); */ fz_transition *fz_page_presentation(fz_context *ctx, fz_page *page, fz_transition *transition, float *duration); +/** + Get page label for a given page. +*/ +const char *fz_page_label(fz_context *ctx, fz_page *page, char *buf, int size); + /** Check permission flags on document. */ @@ -755,10 +913,17 @@ int fz_lookup_metadata(fz_context *ctx, fz_document *doc, const char *key, char #define FZ_META_FORMAT "format" #define FZ_META_ENCRYPTION "encryption" -#define FZ_META_INFO_AUTHOR "info:Author" +#define FZ_META_INFO "info:" #define FZ_META_INFO_TITLE "info:Title" +#define FZ_META_INFO_AUTHOR "info:Author" +#define FZ_META_INFO_SUBJECT "info:Subject" +#define FZ_META_INFO_KEYWORDS "info:Keywords" #define FZ_META_INFO_CREATOR "info:Creator" #define FZ_META_INFO_PRODUCER "info:Producer" +#define FZ_META_INFO_CREATIONDATE "info:CreationDate" +#define FZ_META_INFO_MODIFICATIONDATE "info:ModDate" + +void fz_set_metadata(fz_context *ctx, fz_document *doc, const char *key, const char *value); /** Find the output intent colorspace if the document has defined @@ -789,6 +954,11 @@ int fz_page_uses_overprint(fz_context *ctx, fz_page *page); */ fz_link *fz_create_link(fz_context *ctx, fz_page *page, fz_rect bbox, const char *uri); +/** + Delete an existing link on a page. +*/ +void fz_delete_link(fz_context *ctx, fz_page *page, fz_link *link); + /** Iterates over all opened pages of the document, calling the provided callback for each page for processing. If the callback @@ -812,7 +982,7 @@ void *fz_process_opened_pages(fz_context *ctx, fz_document *doc, fz_process_open struct fz_page { int refs; - fz_document *doc; /* reference to parent document */ + fz_document *doc; /* kept reference to parent document. Guaranteed non-NULL. */ int chapter; /* chapter number */ int number; /* page number in chapter */ int incomplete; /* incomplete from progressive loading; don't cache! */ @@ -828,7 +998,14 @@ struct fz_page fz_page_separations_fn *separations; fz_page_uses_overprint_fn *overprint; fz_page_create_link_fn *create_link; - fz_page **prev, *next; /* linked list of currently open pages */ + fz_page_delete_link_fn *delete_link; + + /* linked list of currently open pages. This list is maintained + * by fz_load_chapter_page and fz_drop_page. All pages hold a + * kept reference to the document, so the document cannot disappear + * while pages exist. 'Incomplete' pages are NOT kept in this + * list. */ + fz_page **prev, *next; }; /** @@ -845,30 +1022,41 @@ struct fz_document fz_document_authenticate_password_fn *authenticate_password; fz_document_has_permission_fn *has_permission; fz_document_load_outline_fn *load_outline; + fz_document_outline_iterator_fn *outline_iterator; fz_document_layout_fn *layout; fz_document_make_bookmark_fn *make_bookmark; fz_document_lookup_bookmark_fn *lookup_bookmark; - fz_document_resolve_link_fn *resolve_link; + fz_document_resolve_link_dest_fn *resolve_link_dest; + fz_document_format_link_uri_fn *format_link_uri; fz_document_count_chapters_fn *count_chapters; fz_document_count_pages_fn *count_pages; fz_document_load_page_fn *load_page; + fz_document_page_label_fn *page_label; fz_document_lookup_metadata_fn *lookup_metadata; + fz_document_set_metadata_fn *set_metadata; fz_document_output_intent_fn *get_output_intent; fz_document_output_accelerator_fn *output_accelerator; + fz_document_run_structure_fn *run_structure; int did_layout; int is_reflowable; - fz_page *open; /* linked list of currently open pages */ + + /* Linked list of currently open pages. These are not + * references, but just a linked list of open pages, + * maintained by fz_load_chapter_page, and fz_drop_page. + * Every page holds a kept reference to the document, so + * the document cannot be destroyed while a page exists. + * Incomplete pages are NOT inserted into this list, but + * do still hold a real document reference. */ + fz_page *open; }; struct fz_document_handler { fz_document_recognize_fn *recognize; fz_document_open_fn *open; - fz_document_open_with_stream_fn *open_with_stream; const char **extensions; const char **mimetypes; - fz_document_open_accel_fn *open_accel; - fz_document_open_accel_with_stream_fn *open_accel_with_stream; + fz_document_recognize_content_fn *recognize_content; }; #endif diff --git a/misc/mupdf/include/mupdf/fitz/export.h b/misc/mupdf/include/mupdf/fitz/export.h index 4f53201..853e2d5 100644 --- a/misc/mupdf/include/mupdf/fitz/export.h +++ b/misc/mupdf/include/mupdf/fitz/export.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_EXPORT_H #define MUPDF_FITZ_EXPORT_H @@ -31,7 +31,7 @@ * When compiling MuPDF DLL itself, FZ_DLL should be defined. */ -#if defined(WIN32) || defined(WIN64) +#if defined(_WIN32) || defined(_WIN64) #if defined(FZ_DLL) /* Building DLL. */ #define FZ_FUNCTION __declspec(dllexport) diff --git a/misc/mupdf/include/mupdf/fitz/filter.h b/misc/mupdf/include/mupdf/fitz/filter.h index 1dea44e..bfa8f9d 100644 --- a/misc/mupdf/include/mupdf/fitz/filter.h +++ b/misc/mupdf/include/mupdf/fitz/filter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2023 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_FILTER_H #define MUPDF_FITZ_FILTER_H @@ -34,14 +34,14 @@ typedef struct fz_jbig2_globals fz_jbig2_globals; typedef struct { int64_t offset; - size_t length; + uint64_t length; } fz_range; /** The null filter reads a specified amount of data from the substream. */ -fz_stream *fz_open_null_filter(fz_context *ctx, fz_stream *chain, int len, int64_t offset); +fz_stream *fz_open_null_filter(fz_context *ctx, fz_stream *chain, uint64_t len, int64_t offset); /** The range filter copies data from specified ranges of the @@ -53,7 +53,7 @@ fz_stream *fz_open_range_filter(fz_context *ctx, fz_stream *chain, fz_range *ran The endstream filter reads a PDF substream, and starts to look for an 'endstream' token after the specified length. */ -fz_stream *fz_open_endstream_filter(fz_context *ctx, fz_stream *chain, int len, int64_t offset); +fz_stream *fz_open_endstream_filter(fz_context *ctx, fz_stream *chain, uint64_t len, int64_t offset); /** Concat filter concatenates several streams into one. @@ -101,8 +101,14 @@ fz_stream *fz_open_rld(fz_context *ctx, fz_stream *chain); dctd filter performs DCT (JPEG) decoding of data read from the chained filter. - color_transform implements the PDF color_transform option; - use -1 (unset) as a default. + color_transform implements the PDF color_transform option + use -1 for default behavior + use 0 to disable YUV-RGB / YCCK-CMYK transforms + use 1 to enable YUV-RGB / YCCK-CMYK transforms + + invert_cmyk implements the necessary inversion for Photoshop CMYK images + use 0 if embedded in PDF + use 1 if not embedded in PDF For subsampling on decode, set l2factor to the log2 of the reduction required (therefore 0 = full size decode). @@ -110,7 +116,7 @@ fz_stream *fz_open_rld(fz_context *ctx, fz_stream *chain); jpegtables is an optional stream from which the JPEG tables can be read. Use NULL if not required. */ -fz_stream *fz_open_dctd(fz_context *ctx, fz_stream *chain, int color_transform, int l2factor, fz_stream *jpegtables); +fz_stream *fz_open_dctd(fz_context *ctx, fz_stream *chain, int color_transform, int invert_cmyk, int l2factor, fz_stream *jpegtables); /** faxd filter performs FAX decoding of data read from @@ -148,6 +154,15 @@ fz_stream *fz_open_faxd(fz_context *ctx, fz_stream *chain, */ fz_stream *fz_open_flated(fz_context *ctx, fz_stream *chain, int window_bits); +/** + libarchived filter performs generic compressed decoding of data + in any format understood by libarchive from the chained filter. + + This will throw an exception if libarchive is not built in, or + if the compression format is not recognised. +*/ +fz_stream *fz_open_libarchived(fz_context *ctx, fz_stream *chain); + /** lzwd filter performs LZW decoding of data read from the chained filter. diff --git a/misc/mupdf/include/mupdf/fitz/font.h b/misc/mupdf/include/mupdf/fitz/font.h index 2402fac..bcfe2d5 100644 --- a/misc/mupdf/include/mupdf/fitz/font.h +++ b/misc/mupdf/include/mupdf/fitz/font.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_FONT_H #define MUPDF_FITZ_FONT_H @@ -27,6 +27,7 @@ #include "mupdf/fitz/context.h" #include "mupdf/fitz/geometry.h" #include "mupdf/fitz/buffer.h" +#include "mupdf/fitz/color.h" /* forward declaration for circular dependency */ struct fz_device; @@ -61,6 +62,44 @@ int fz_unicode_from_glyph_name_strict(const char *name); const char **fz_duplicate_glyph_names_from_unicode(int unicode); const char *fz_glyph_name_from_unicode_sc(int unicode); +/** + * A text decoder (to read arbitrary encodings and convert to unicode). + */ +typedef struct fz_text_decoder fz_text_decoder; + +struct fz_text_decoder { + // get maximum size estimate of converted text (fast) + int (*decode_bound)(fz_text_decoder *dec, unsigned char *input, int n); + + // get exact size of converted text (slow) + int (*decode_size)(fz_text_decoder *dec, unsigned char *input, int n); + + // convert text into output buffer + void (*decode)(fz_text_decoder *dec, char *output, unsigned char *input, int n); + + // for internal use only; do not touch! + void *table1; + void *table2; +}; + +/* Initialize a text decoder using an IANA encoding name. + * See source/fitz/text-decoder.c for the exact list of supported encodings. + * Will throw an exception if the requested encoding is not available. + * + * The following is a subset of the supported encodings (see source/fitz/text-decoder.c for the full list): + * iso-8859-1 + * iso-8859-7 + * koi8-r + * euc-jp + * shift_jis + * euc-kr + * euc-cn + * gb18030 + * euc-tw + * big5 + */ +void fz_init_text_decoder(fz_context *ctx, fz_text_decoder *dec, const char *encoding); + /** An abstract font handle. */ @@ -118,6 +157,9 @@ typedef struct unsigned int cjk : 1; unsigned int cjk_lang : 2; /* CNS, GB, JAPAN, or KOREA */ + + unsigned int embed : 1; + unsigned int never_embed : 1; } fz_font_flags_t; /** @@ -380,6 +422,7 @@ const unsigned char *fz_lookup_noto_music_font(fz_context *ctx, int *len); const unsigned char *fz_lookup_noto_symbol1_font(fz_context *ctx, int *len); const unsigned char *fz_lookup_noto_symbol2_font(fz_context *ctx, int *len); const unsigned char *fz_lookup_noto_emoji_font(fz_context *ctx, int *len); +const unsigned char *fz_lookup_noto_boxes_font(fz_context *ctx, int *len); /** Try to load a fallback font for the @@ -414,8 +457,9 @@ fz_font *fz_load_fallback_font(fz_context *ctx, int script, int language, int se fz_font *fz_new_type3_font(fz_context *ctx, const char *name, fz_matrix matrix); /** - Create a new font from a font - file in memory. + Create a new font from a font file in memory. + + Fonts created in this way, will be eligible for embedding by default. name: Name of font (leave NULL to use name from font). @@ -434,6 +478,8 @@ fz_font *fz_new_font_from_memory(fz_context *ctx, const char *name, const unsign /** Create a new font from a font file in a fz_buffer. + Fonts created in this way, will be eligible for embedding by default. + name: Name of font (leave NULL to use name from font). buffer: Buffer to load from. @@ -449,6 +495,8 @@ fz_font *fz_new_font_from_buffer(fz_context *ctx, const char *name, fz_buffer *b /** Create a new font from a font file. + Fonts created in this way, will be eligible for embedding by default. + name: Name of font (leave NULL to use name from font). path: File path to load from. @@ -468,6 +516,11 @@ fz_font *fz_new_base14_font(fz_context *ctx, const char *name); fz_font *fz_new_cjk_font(fz_context *ctx, int ordering); fz_font *fz_new_builtin_font(fz_context *ctx, const char *name, int is_bold, int is_italic); +/** + Control whether a given font should be embedded or not when writing. +*/ +void fz_set_font_embedding(fz_context *ctx, fz_font *font, int embed); + /** Add a reference to an existing fz_font. @@ -705,8 +758,9 @@ struct fz_font int glyph_count; - /* per glyph bounding box cache */ - fz_rect *bbox_table; + /* per glyph bounding box cache. */ + fz_rect **bbox_table; + int use_glyph_bbox; /* substitute metrics */ int width_count; @@ -714,7 +768,7 @@ struct fz_font short *width_table; /* in 1000 units */ /* cached glyph metrics */ - float *advance_cache; + float **advance_cache; /* cached encoding lookup */ uint16_t *encoding_cache[256]; @@ -722,6 +776,40 @@ struct fz_font /* cached md5sum for caching */ int has_digest; unsigned char digest[16]; + + /* Which font to use in a collection. */ + int subfont; }; +void fz_ft_lock(fz_context *ctx); + +void fz_ft_unlock(fz_context *ctx); + +/* Internal function. Must be called with FT_ALLOC_LOCK + * held. Returns 1 if this thread (context!) already holds + * the freeetype lock. */ +int fz_ft_lock_held(fz_context *ctx); + +/* Internal function: Extract a ttf from the ttc that underlies + * a given fz_font. Caller takes ownership of the returned + * buffer. + */ +fz_buffer *fz_extract_ttf_from_ttc(fz_context *ctx, fz_font *font); + +/* Internal function: Given a ttf in a buffer, create a subset + * ttf in a new buffer that only provides the required gids. + * Caller takes ownership of the returned buffer. + * + * EXPERIMENTAL AND VERY SUBJECT TO CHANGE. + */ +fz_buffer *fz_subset_ttf_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids, int symbolic, int cidfont); + +/* Internal function: Given a cff in a buffer, create a subset + * cff in a new buffer that only provides the required gids. + * Caller takes ownership of the returned buffer. + * + * EXPERIMENTAL AND VERY SUBJECT TO CHANGE. + */ +fz_buffer *fz_subset_cff_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids, int symbolic, int cidfont); + #endif diff --git a/misc/mupdf/include/mupdf/fitz/geometry.h b/misc/mupdf/include/mupdf/fitz/geometry.h index 1bf7f74..57ca0e8 100644 --- a/misc/mupdf/include/mupdf/fitz/geometry.h +++ b/misc/mupdf/include/mupdf/fitz/geometry.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_MATH_H #define MUPDF_FITZ_MATH_H @@ -115,6 +115,11 @@ static inline size_t fz_minz(size_t a, size_t b) return (a < b ? a : b); } +static inline int64_t fz_mini64(int64_t a, int64_t b) +{ + return (a < b ? a : b); +} + static inline float fz_max(float a, float b) { return (a > b ? a : b); @@ -135,24 +140,29 @@ static inline int64_t fz_maxi64(int64_t a, int64_t b) return (a > b ? a : b); } -static inline float fz_clamp(float f, float min, float max) +static inline float fz_clamp(float x, float min, float max) +{ + return x < min ? min : x > max ? max : x; +} + +static inline int fz_clampi(int x, int min, int max) { - return (f > min ? (f < max ? f : max) : min); + return x < min ? min : x > max ? max : x; } -static inline int fz_clampi(int i, int min, int max) +static inline int64_t fz_clamp64(int64_t x, int64_t min, int64_t max) { - return (i > min ? (i < max ? i : max) : min); + return x < min ? min : x > max ? max : x; } -static inline double fz_clampd(double d, double min, double max) +static inline double fz_clampd(double x, double min, double max) { - return (d > min ? (d < max ? d : max) : min); + return x < min ? min : x > max ? max : x; } -static inline void *fz_clampp(void *p, void *min, void *max) +static inline void *fz_clampp(void *x, void *min, void *max) { - return (p > min ? (p < max ? p : max) : min); + return x < min ? min : x > max ? max : x; } #define DIV_BY_ZERO(a, b, min, max) (((a) < 0) ^ ((b) < 0) ? (min) : (max)) diff --git a/misc/mupdf/include/mupdf/fitz/getopt.h b/misc/mupdf/include/mupdf/fitz/getopt.h index 8d72b5d..50409a0 100644 --- a/misc/mupdf/include/mupdf/fitz/getopt.h +++ b/misc/mupdf/include/mupdf/fitz/getopt.h @@ -17,17 +17,112 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_GETOPT_H #define MUPDF_FITZ_GETOPT_H +#include "export.h" + +typedef struct +{ + char *option; + int *flag; + void *opaque; +} fz_getopt_long_options; + /** Simple functions/variables for use in tools. + + ostr = option string. Comprises single letter options, followed by : if there + is an argument to the option. + + longopts: NULL (indicating no long options), or a pointer to an array of + longoptions, terminated by an entry with option == NULL. + + In the event of matching a single char option, this function will normally + return the char. The exception to this is when the option requires an + argument and none is supplied; in this case we return ':'. + + In the event of matching a long option, this function returns 0, with fz_optlong + set to point to the matching option. + + A long option entry may be followed with : to indicate there is an argument to the + option. If the need for an argument is specified in this way, and no argument is + given, an error will be displayed and argument processing will stop. If an argument + is given, and the long option record contains a non-null flag pointer, then the code + will decode the argument and fill in that flag pointer. Specifically, + case-insensitive matches to 'yes', 'no', 'true' and 'false' will cause a value of 0 + or 1 as appropriate to be written; failing this the arg will be interpreted as a + decimal integer using atoi. + + A long option entry may be followed by an list of options (e.g. myoption=foo|bar|baz) + and the option will be passed to fz_opt_from_list. The return value of that will be + placed in fz_optitem. If the return value of that function is -1, then an error will + be displayed and argument processing will stop. + + In the event of reaching the end of the arg list or '--', this function returns EOF. + + In the event of failing to match anything, an error is printed, and we return '?'. + + If an argument is expected for the option, then fz_optarg will be returned pointing + at the start of the argument. Examples of supported argument formats: '-r500', '-r 500', + '--resolution 500', '--resolution=500'. +*/ +extern int fz_getopt_long(int nargc, char * const *nargv, const char *ostr, const fz_getopt_long_options *longopts); + +/** + Identical to fz_getopt_long, but with a NULL longopts field, signifying no long + options. */ extern int fz_getopt(int nargc, char * const *nargv, const char *ostr); + +/** + fz_optind is updated to point to the current index being read from the + arguments. +*/ FZ_DATA extern int fz_optind; + +/** + fz_optarg is a pointer to the argument data for the most recently + read option. +*/ FZ_DATA extern char *fz_optarg; +/** + fz_optlong is a pointer to the record for the most recently + read long option. (i.e. if a long option is detected, this + will be set to point to the record for that option, otherwise + it will be NULL). +*/ +FZ_DATA extern const fz_getopt_long_options *fz_optlong; + +/** + The item number for the most recently matched item list. + + First item in the list is numbered 0. No match is -1. +*/ +FZ_DATA extern int fz_optitem; + +/** + Return the index of a (case-insensitive) option within an optlist. + + For instance for optlist = "Foo|Bar|Baz", and opt = "bar", + this would return 1. + + If the optlist ends with "|*" then that is a catch all case and + matches all options allowing the caller to process it itself. + fz_optarg will be set to point to the option, and the return + value will be the index of the '*' option within that list. + + If an optlist entry ends with ':' (e.g. "Foo:") then that option + may have suboptions appended to it (for example "JPG:80") and + fz_optarg will be set to point at "80". Otherwise fz_optarg will + be set to NULL. + + In the event of no-match found, prints an error and returns -1. +*/ +int fz_opt_from_list(char *opt, const char *optlist); + #endif diff --git a/misc/mupdf/include/mupdf/fitz/glyph-cache.h b/misc/mupdf/include/mupdf/fitz/glyph-cache.h index 4d8bebf..c4b5fcf 100644 --- a/misc/mupdf/include/mupdf/fitz/glyph-cache.h +++ b/misc/mupdf/include/mupdf/fitz/glyph-cache.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_GLYPH_CACHE_H #define MUPDF_FITZ_GLYPH_CACHE_H @@ -27,6 +27,7 @@ #include "mupdf/fitz/geometry.h" #include "mupdf/fitz/font.h" #include "mupdf/fitz/pixmap.h" +#include "mupdf/fitz/device.h" /** Purge all the glyphs from the cache. diff --git a/misc/mupdf/include/mupdf/fitz/glyph.h b/misc/mupdf/include/mupdf/fitz/glyph.h index 20e0455..960a4ff 100644 --- a/misc/mupdf/include/mupdf/fitz/glyph.h +++ b/misc/mupdf/include/mupdf/fitz/glyph.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_GLYPH_H #define MUPDF_FITZ_GLYPH_H @@ -27,6 +27,8 @@ #include "mupdf/fitz/context.h" #include "mupdf/fitz/geometry.h" #include "mupdf/fitz/store.h" +#include "mupdf/fitz/font.h" +#include "mupdf/fitz/path.h" /** Glyphs represent a run length encoded set of pixels for a 2 diff --git a/misc/mupdf/include/mupdf/fitz/hash.h b/misc/mupdf/include/mupdf/fitz/hash.h index 9cd2de6..873cf32 100644 --- a/misc/mupdf/include/mupdf/fitz/hash.h +++ b/misc/mupdf/include/mupdf/fitz/hash.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_HASH_H #define MUPDF_FITZ_HASH_H diff --git a/misc/mupdf/include/mupdf/fitz/heap-imp.h b/misc/mupdf/include/mupdf/fitz/heap-imp.h new file mode 100644 index 0000000..5e042d9 --- /dev/null +++ b/misc/mupdf/include/mupdf/fitz/heap-imp.h @@ -0,0 +1,163 @@ +// Copyright (C) 2004-2022 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +/* This file has preprocessor magic in it to instantiate both + * protoypes and implementations for heap sorting structures + * of various different types. Effectively, it's templating for + * C. + * + * If you are including this file directly without intending to + * be instantiating a new set of heap sort functions, you are + * doing the wrong thing. + */ + +#ifndef MUPDF_FITZ_HEAP_I_KNOW_WHAT_IM_DOING +#error Do not include heap-imp.h unless you know what youre doing +#endif + +#define HEAP_XCAT(A,B) A##B +#define HEAP_CAT(A,B) HEAP_XCAT(A,B) + +#ifndef MUPDF_FITZ_HEAP_IMPLEMENT +typedef struct +{ + int max; + int len; + HEAP_CONTAINER_TYPE *heap; +} HEAP_TYPE_NAME; +#endif + +void HEAP_CAT(HEAP_TYPE_NAME,_insert)(fz_context *ctx, HEAP_TYPE_NAME *heap, HEAP_CONTAINER_TYPE v +#ifndef HEAP_CMP + , int (*HEAP_CMP)(HEAP_CONTAINER_TYPE *a, HEAP_CONTAINER_TYPE *b) +#endif + ) +#ifndef MUPDF_FITZ_HEAP_IMPLEMENT +; +#else +{ + int i; + HEAP_CONTAINER_TYPE *h; + + if (heap->max == heap->len) + { + int m = heap->max * 2; + + if (m == 0) + m = 32; + + heap->heap = (HEAP_CONTAINER_TYPE *)fz_realloc(ctx, heap->heap, sizeof(*heap->heap) * m); + heap->max = m; + } + h = heap->heap; + + /* Insert it into the heap. Consider inserting at position i, and + * then 'heapify' back. We can delay the actual insertion to the + * end of the process. */ + i = heap->len++; + while (i != 0) + { + int parent_idx = (i-1)/2; + HEAP_CONTAINER_TYPE *parent_val = &h[parent_idx]; + if (HEAP_CMP(parent_val, &v) > 0) + break; + h[i] = h[parent_idx]; + i = parent_idx; + } + h[i] = v; +} +#endif + +void HEAP_CAT(HEAP_TYPE_NAME,_sort)(fz_context *ctx, HEAP_TYPE_NAME *heap +#ifndef HEAP_CMP + , int (*HEAP_CMP)(HEAP_CONTAINER_TYPE *a, HEAP_CONTAINER_TYPE *b) +#endif + ) +#ifndef MUPDF_FITZ_HEAP_IMPLEMENT +; +#else +{ + int j; + HEAP_CONTAINER_TYPE *h = heap->heap; + + /* elements j to len are always sorted. 0 to j are always a valid heap. Gradually move j to 0. */ + for (j = heap->len-1; j > 0; j--) + { + int k; + HEAP_CONTAINER_TYPE val; + + /* Swap max element with j. Invariant valid for next value to j. */ + val = h[j]; + h[j] = h[0]; + /* Now reform the heap. 0 to k is a valid heap. */ + k = 0; + while (1) + { + int kid = k*2+1; + if (kid >= j) + break; + if (kid+1 < j && (HEAP_CMP(&h[kid+1], &h[kid])) > 0) + kid++; + if ((HEAP_CMP(&val, &h[kid])) > 0) + break; + h[k] = h[kid]; + k = kid; + } + h[k] = val; + } +} +#endif + +void HEAP_CAT(HEAP_TYPE_NAME,_uniq)(fz_context *ctx, HEAP_TYPE_NAME *heap +#ifndef HEAP_CMP + , int (*HEAP_CMP)(HEAP_CONTAINER_TYPE *a, HEAP_CONTAINER_TYPE *b) +#endif + ) +#ifndef MUPDF_FITZ_HEAP_IMPLEMENT +; +#else +{ + int n = heap->len; + int i, j = 0; + HEAP_CONTAINER_TYPE *h = heap->heap; + + if (n == 0) + return; + + j = 0; + for (i = 1; i < n; i++) + { + if (HEAP_CMP(&h[j], &h[i]) == 0) + continue; + j++; + if (i != j) + h[j] = h[i]; + } + heap->len = j+1; +} +#endif + +#undef HEAP_CONTAINER_TYPE +#undef HEAP_TYPE_NAME +#undef HEAP_CMP +#undef HEAP_XCAT +#undef HEAP_CAT diff --git a/misc/mupdf/include/mupdf/fitz/heap.h b/misc/mupdf/include/mupdf/fitz/heap.h new file mode 100644 index 0000000..97d98a8 --- /dev/null +++ b/misc/mupdf/include/mupdf/fitz/heap.h @@ -0,0 +1,140 @@ +// Copyright (C) 2004-2022 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +/* This file has preprocessor magic in it to instantiate both + * protoypes and implementations for heap sorting structures + * of various different types. Effectively, it's templating for + * C. + * + * If you are including this file directly without intending to + * be instantiating a new set of heap sort functions, you are + * doing the wrong thing. + */ + +/* This header file declares some useful heap functions. (Heap + * as in heap sort, not as in memory heap). It uses some + * clever (read "hacky") multiple inclusion techniques to allow + * us to generate multiple different versions of this code. + * This is kinda like 'templating' in C++, but without language + * support. + */ + +/* For every instance of this code, we end up a heap structure: + * + * typedef struct + * { + * int max; + * int len; + * *heap; + * } fz__heap; + * + * This can be created and initialised on the stack in user code using: + * + * fz__heap heap = { 0 }; + * + * and some functions. + * + * When is a simple int (or float or similar), the ordering required is + * obvious, and so the functions are simple (Form 1): + * + * First some to insert elements into the heap: + * + * void fz__heap_insert(fz_context *ctx, fz__heap *heap, v); + * + * Once all the elements have been inserted, the heap can be sorted: + * + * void fz__heap_sort(fz_context *ctx, fz__heap *heap); + * + * Once sorted, repeated elements can be removed: + * + * void fz__heap_uniq(fz_context *ctx, fz__heap *heap); + * + * + * For more complex TYPEs (such as pointers) the ordering may not be implicit within the , + * but rather depends upon the data found by dereferencing those pointers. For such types, + * the functions are modified with a function, of the form used by qsort etc: + * + * int (x, y) that returns 0 for x == y, +ve for x > y, and -ve for x < y. + * + * The functions are modified thus (Form 2): + * + * void fz__heap_insert(fz_context *ctx, fz__heap *heap, v, t); + * void fz__heap_sort(fz_context *ctx, fz__heap *heap, t); + * void fz__heap_uniq(fz_context *ctx, fz__heap *heap, t); + * + * Currently, we define: + * + * fz_int_heap Operates on 'int' values. Form 1. + * fz_ptr_heap Operates on 'void *' values. Form 2. + * fz_int2_heap Operates on 'typedef struct { int a; int b} fz_int2' values, + * with the sort/uniq being done based on 'a' alone. Form 1. + * fz_intptr_heap Operates on 'typedef struct { int a; void *b} fz_intptr' values, + * with the sort/uniq being done based on 'a' alone. Form 1. + */ + +/* Everything after this point is preprocessor magic. Ignore it, and just read the above + * unless you are wanting to instantiate a new set of functions. */ + +#ifndef MUPDF_FITZ_HEAP_H + +#define MUPDF_FITZ_HEAP_H + +#define MUPDF_FITZ_HEAP_I_KNOW_WHAT_IM_DOING + +/* Instantiate fz_int_heap */ +#define HEAP_TYPE_NAME fz_int_heap +#define HEAP_CONTAINER_TYPE int +#define HEAP_CMP(a,b) ((*a) - (*b)) +#include "mupdf/fitz/heap-imp.h" + +/* Instantiate fz_ptr_heap */ +#define HEAP_TYPE_NAME fz_ptr_heap +#define HEAP_CONTAINER_TYPE void * +#include "mupdf/fitz/heap-imp.h" + +/* Instantiate fz_int2_heap */ +#ifndef MUPDF_FITZ_HEAP_IMPLEMENT +typedef struct +{ + int a; + int b; +} fz_int2; +#endif +#define HEAP_TYPE_NAME fz_int2_heap +#define HEAP_CMP(A,B) (((A)->a) - ((B)->a)) +#define HEAP_CONTAINER_TYPE fz_int2 +#include "mupdf/fitz/heap-imp.h" + +/* Instantiate fz_intptr_heap */ +#ifndef MUPDF_FITZ_HEAP_IMPLEMENT +typedef struct +{ + int a; + int b; +} fz_intptr; +#endif +#define HEAP_TYPE_NAME fz_intptr_heap +#define HEAP_CONTAINER_TYPE fz_intptr +#define HEAP_CMP(A,B) (((A)->a) - ((B)->a)) +#include "mupdf/fitz/heap-imp.h" + +#endif /* MUPDF_FITZ_HEAP_H */ diff --git a/misc/mupdf/include/mupdf/fitz/image.h b/misc/mupdf/include/mupdf/fitz/image.h index f4ce009..3f9dee1 100644 --- a/misc/mupdf/include/mupdf/fitz/image.h +++ b/misc/mupdf/include/mupdf/fitz/image.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2023 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_IMAGE_H #define MUPDF_FITZ_IMAGE_H @@ -62,7 +62,7 @@ typedef struct fz_pixmap_image fz_pixmap_image; h: If non-NULL, a pointer to an int to be updated on exit to the height (in pixels) that the scaled output will cover. - Returns a non NULL pixmap pointer. May throw exceptions. + Returns a non NULL kept pixmap pointer. May throw exceptions. */ fz_pixmap *fz_get_pixmap_from_image(fz_context *ctx, fz_image *image, const fz_irect *subarea, fz_matrix *ctm, int *w, int *h); @@ -199,8 +199,8 @@ fz_image *fz_new_image_of_size(fz_context *ctx, int yres, int interpolate, int imagemask, - float *decode, - int *colorkey, + const float *decode, + const int *colorkey, fz_image *mask, size_t size, fz_image_get_pixmap_fn *get_pixmap, @@ -243,7 +243,7 @@ fz_image *fz_new_image_of_size(fz_context *ctx, A new reference is taken to this image. Supplying a masked image as a mask to another image is illegal! */ -fz_image *fz_new_image_from_compressed_buffer(fz_context *ctx, int w, int h, int bpc, fz_colorspace *colorspace, int xres, int yres, int interpolate, int imagemask, float *decode, int *colorkey, fz_compressed_buffer *buffer, fz_image *mask); +fz_image *fz_new_image_from_compressed_buffer(fz_context *ctx, int w, int h, int bpc, fz_colorspace *colorspace, int xres, int yres, int interpolate, int imagemask, const float *decode, const int *colorkey, fz_compressed_buffer *buffer, fz_image *mask); /** Create an image from the given @@ -291,6 +291,8 @@ void fz_drop_image_base(fz_context *ctx, fz_image *image); subsampling that should be performed by this routine. This will be updated on exit to the amount of subsampling that is still required to be done. + + Returns a kept reference. */ fz_pixmap *fz_decomp_image_from_stream(fz_context *ctx, fz_stream *stm, fz_compressed_image *image, fz_irect *subarea, int indexed, int l2factor, int *l2extra); @@ -313,6 +315,14 @@ fz_pixmap *fz_convert_separation_pixmap_to_base(fz_context *ctx, const fz_pixmap */ size_t fz_image_size(fz_context *ctx, fz_image *im); +/** + Return the type of a compressed image. + + Any non-compressed image will have the type returned as UNKNOWN. +*/ +int fz_compressed_image_type(fz_context *ctx, fz_image *image); + + /** Structure is public to allow other structures to be derived from it. Do not access members directly. @@ -327,7 +337,6 @@ struct fz_image unsigned int interpolate:1; unsigned int use_colorkey:1; unsigned int use_decode:1; - unsigned int invert_cmyk_jpeg:1; unsigned int decoded:1; unsigned int scalable:1; uint8_t orientation; @@ -412,6 +421,13 @@ void fz_set_pixmap_image_tile(fz_context *ctx, fz_pixmap_image *cimg, fz_pixmap */ fz_pixmap *fz_load_jpx(fz_context *ctx, const unsigned char *data, size_t size, fz_colorspace *cs); +/** + Exposed because compression and decompression need to share this. +*/ +void opj_lock(fz_context *ctx); +void opj_unlock(fz_context *ctx); + + /** Exposed for CBZ. */ diff --git a/misc/mupdf/include/mupdf/fitz/link.h b/misc/mupdf/include/mupdf/fitz/link.h index dffad00..1a20a22 100644 --- a/misc/mupdf/include/mupdf/fitz/link.h +++ b/misc/mupdf/include/mupdf/fitz/link.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_LINK_H #define MUPDF_FITZ_LINK_H @@ -26,6 +26,12 @@ #include "mupdf/fitz/system.h" #include "mupdf/fitz/context.h" #include "mupdf/fitz/geometry.h" +#include "mupdf/fitz/types.h" + +typedef struct fz_link fz_link; +typedef void (fz_link_set_rect_fn)(fz_context *ctx, fz_link *link, fz_rect rect); +typedef void (fz_link_set_uri_fn)(fz_context *ctx, fz_link *link, const char *uri); +typedef void (fz_link_drop_link_fn)(fz_context *ctx, fz_link *link); /** fz_link is a list of interactive links on a page. @@ -52,17 +58,46 @@ typedef struct fz_link struct fz_link *next; fz_rect rect; char *uri; + fz_link_set_rect_fn *set_rect_fn; + fz_link_set_uri_fn *set_uri_fn; + fz_link_drop_link_fn *drop; } fz_link; +typedef enum +{ + FZ_LINK_DEST_FIT, + FZ_LINK_DEST_FIT_B, + FZ_LINK_DEST_FIT_H, + FZ_LINK_DEST_FIT_BH, + FZ_LINK_DEST_FIT_V, + FZ_LINK_DEST_FIT_BV, + FZ_LINK_DEST_FIT_R, + FZ_LINK_DEST_XYZ +} fz_link_dest_type; + +typedef struct +{ + fz_location loc; + fz_link_dest_type type; + float x, y, w, h, zoom; +} fz_link_dest; + +fz_link_dest fz_make_link_dest_none(void); +fz_link_dest fz_make_link_dest_xyz(int chapter, int page, float x, float y, float z); + /** Create a new link record. next is set to NULL with the expectation that the caller will - handle the linked list setup. + handle the linked list setup. Internal function. - Internal function. + Different document types will be implemented by deriving from + fz_link. This macro allocates such derived structures, and + initialises the base sections. */ -fz_link *fz_new_link(fz_context *ctx, fz_rect bbox, const char *uri); +fz_link *fz_new_link_of_size(fz_context *ctx, int size, fz_rect rect, const char *uri); +#define fz_new_derived_link(CTX,TYPE,RECT,URI) \ + ((TYPE *)Memento_label(fz_new_link_of_size(CTX,sizeof(TYPE),RECT,URI),#TYPE)) /** Increment the reference count for a link. The same pointer is @@ -89,4 +124,7 @@ void fz_drop_link(fz_context *ctx, fz_link *link); */ int fz_is_external_link(fz_context *ctx, const char *uri); +void fz_set_link_rect(fz_context *ctx, fz_link *link, fz_rect rect); +void fz_set_link_uri(fz_context *ctx, fz_link *link, const char *uri); + #endif diff --git a/misc/mupdf/include/mupdf/fitz/log.h b/misc/mupdf/include/mupdf/fitz/log.h index 82c4ae3..50892a0 100644 --- a/misc/mupdf/include/mupdf/fitz/log.h +++ b/misc/mupdf/include/mupdf/fitz/log.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_LOG_H #define MUPDF_FITZ_LOG_H diff --git a/misc/mupdf/include/mupdf/fitz/outline.h b/misc/mupdf/include/mupdf/fitz/outline.h index 9e0e5d8..6f5810c 100644 --- a/misc/mupdf/include/mupdf/fitz/outline.h +++ b/misc/mupdf/include/mupdf/fitz/outline.h @@ -17,19 +17,77 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_OUTLINE_H #define MUPDF_FITZ_OUTLINE_H #include "mupdf/fitz/system.h" +#include "mupdf/fitz/types.h" #include "mupdf/fitz/context.h" #include "mupdf/fitz/link.h" #include "mupdf/fitz/output.h" /* Outline */ +typedef struct { + char *title; + char *uri; + int is_open; +} fz_outline_item; + +typedef struct fz_outline_iterator fz_outline_iterator; + +/** + Call to get the current outline item. + + Can return NULL. The item is only valid until the next call. +*/ +fz_outline_item *fz_outline_iterator_item(fz_context *ctx, fz_outline_iterator *iter); + +/** + Calls to move the iterator position. + + A negative return value means we could not move as requested. Otherwise: + 0 = the final position has a valid item. + 1 = not a valid item, but we can insert an item here. +*/ +int fz_outline_iterator_next(fz_context *ctx, fz_outline_iterator *iter); +int fz_outline_iterator_prev(fz_context *ctx, fz_outline_iterator *iter); +int fz_outline_iterator_up(fz_context *ctx, fz_outline_iterator *iter); +int fz_outline_iterator_down(fz_context *ctx, fz_outline_iterator *iter); + +/** + Call to insert a new item BEFORE the current point. + + Ownership of pointers are retained by the caller. The item data will be copied. + + After an insert, we do not change where we are pointing. + The return code is the same as for next, it indicates the current iterator position. +*/ +int fz_outline_iterator_insert(fz_context *ctx, fz_outline_iterator *iter, fz_outline_item *item); + +/** + Delete the current item. + + This implicitly moves us to the 'next' item, and the return code is as for fz_outline_iterator_next. +*/ +int fz_outline_iterator_delete(fz_context *ctx, fz_outline_iterator *iter); + +/** + Update the current item properties according to the given item. +*/ +void fz_outline_iterator_update(fz_context *ctx, fz_outline_iterator *iter, fz_outline_item *item); + +/** + Drop the current iterator. +*/ +void fz_drop_outline_iterator(fz_context *ctx, fz_outline_iterator *iter); + + +/** Structure based API */ + /** fz_outline is a tree of the outline of a document (also known as table of contents). @@ -55,7 +113,7 @@ typedef struct fz_outline int refs; char *title; char *uri; - int page; + fz_location page; float x, y; struct fz_outline *next; struct fz_outline *down; @@ -87,4 +145,84 @@ fz_outline *fz_keep_outline(fz_context *ctx, fz_outline *outline); */ void fz_drop_outline(fz_context *ctx, fz_outline *outline); +/** + Routine to implement the old Structure based API from an iterator. +*/ +fz_outline * +fz_load_outline_from_iterator(fz_context *ctx, fz_outline_iterator *iter); + + +/** + Implementation details. + Of use to people coding new document handlers. +*/ + +/** + Function type for getting the current item. + + Can return NULL. The item is only valid until the next call. +*/ +typedef fz_outline_item *(fz_outline_iterator_item_fn)(fz_context *ctx, fz_outline_iterator *iter); + +/** + Function types for moving the iterator position. + + A negative return value means we could not move as requested. Otherwise: + 0 = the final position has a valid item. + 1 = not a valid item, but we can insert an item here. +*/ +typedef int (fz_outline_iterator_next_fn)(fz_context *ctx, fz_outline_iterator *iter); +typedef int (fz_outline_iterator_prev_fn)(fz_context *ctx, fz_outline_iterator *iter); +typedef int (fz_outline_iterator_up_fn)(fz_context *ctx, fz_outline_iterator *iter); +typedef int (fz_outline_iterator_down_fn)(fz_context *ctx, fz_outline_iterator *iter); + +/** + Function type for inserting a new item BEFORE the current point. + + Ownership of pointers are retained by the caller. The item data will be copied. + + After an insert, we implicitly do a next, so that a successive insert operation + would insert after the item inserted here. The return code is therefore as for next. +*/ +typedef int (fz_outline_iterator_insert_fn)(fz_context *ctx, fz_outline_iterator *iter, fz_outline_item *item); + +/** + Function type for deleting the current item. + + This implicitly moves us to the 'next' item, and the return code is as for fz_outline_iterator_next. +*/ +typedef int (fz_outline_iterator_delete_fn)(fz_context *ctx, fz_outline_iterator *iter); + +/** + Function type for updating the current item properties according to the given item. +*/ +typedef void (fz_outline_iterator_update_fn)(fz_context *ctx, fz_outline_iterator *iter, fz_outline_item *item); + +/** + Function type for dropping the current iterator. +*/ +typedef void (fz_outline_iterator_drop_fn)(fz_context *ctx, fz_outline_iterator *iter); + +#define fz_new_derived_outline_iter(CTX, TYPE, DOC)\ + ((TYPE *)Memento_label(fz_new_outline_iterator_of_size(ctx,sizeof(TYPE),DOC),#TYPE)) + +fz_outline_iterator *fz_new_outline_iterator_of_size(fz_context *ctx, size_t size, fz_document *doc); + +fz_outline_iterator *fz_outline_iterator_from_outline(fz_context *ctx, fz_outline *outline); + +struct fz_outline_iterator { + /* Functions */ + fz_outline_iterator_drop_fn *drop; + fz_outline_iterator_item_fn *item; + fz_outline_iterator_next_fn *next; + fz_outline_iterator_prev_fn *prev; + fz_outline_iterator_up_fn *up; + fz_outline_iterator_down_fn *down; + fz_outline_iterator_insert_fn *insert; + fz_outline_iterator_update_fn *update; + fz_outline_iterator_delete_fn *del; + /* Common state */ + fz_document *doc; +}; + #endif diff --git a/misc/mupdf/include/mupdf/fitz/output-svg.h b/misc/mupdf/include/mupdf/fitz/output-svg.h index 5885235..b1b07ab 100644 --- a/misc/mupdf/include/mupdf/fitz/output-svg.h +++ b/misc/mupdf/include/mupdf/fitz/output-svg.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_OUTPUT_SVG_H #define MUPDF_FITZ_OUTPUT_SVG_H diff --git a/misc/mupdf/include/mupdf/fitz/output.h b/misc/mupdf/include/mupdf/fitz/output.h index 69178e1..ce7039d 100644 --- a/misc/mupdf/include/mupdf/fitz/output.h +++ b/misc/mupdf/include/mupdf/fitz/output.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_OUTPUT_H #define MUPDF_FITZ_OUTPUT_H @@ -54,7 +54,7 @@ typedef void (fz_output_write_fn)(fz_context *ctx, void *state, const void *data state: The output stream state to seek within. - offset, whence: as defined for fs_seek_output. + offset, whence: as defined for fz_seek(). */ typedef void (fz_output_seek_fn)(fz_context *ctx, void *state, int64_t offset, int whence); @@ -76,6 +76,14 @@ typedef int64_t (fz_output_tell_fn)(fz_context *ctx, void *state); */ typedef void (fz_output_close_fn)(fz_context *ctx, void *state); +/** + A function type for use when implementing + fz_outputs. The supplied function of this type is called + when the output stream is reset, and resets the state + to that when it was first initialised. +*/ +typedef void (fz_output_reset_fn)(fz_context *ctx, void *state); + /** A function type for use when implementing fz_outputs. The supplied function of this type is called @@ -107,9 +115,15 @@ struct fz_output fz_output_tell_fn *tell; fz_output_close_fn *close; fz_output_drop_fn *drop; + fz_output_reset_fn *reset; fz_stream_from_output_fn *as_stream; fz_truncate_fn *truncate; + int closed; char *bp, *wp, *ep; + /* If buffered is non-zero, then we have that many + * bits (1-7) waiting to be written in bits. */ + int buffered; + int bits; }; /** @@ -218,6 +232,14 @@ void fz_flush_output(fz_context *ctx, fz_output *out); */ void fz_close_output(fz_context *, fz_output *); +/** + Reset a closed output stream. Returns state to + (broadly) that which it was in when opened. Not + all outputs can be reset, so this may throw an + exception. +*/ +void fz_reset_output(fz_context *, fz_output *); + /** Free an output stream. Don't forget to close it first! */ @@ -253,6 +275,7 @@ void fz_truncate_output(fz_context *, fz_output *); size: Size of data to write in bytes. */ void fz_write_data(fz_context *ctx, fz_output *out, const void *data, size_t size); +void fz_write_buffer(fz_context *ctx, fz_output *out, fz_buffer *data); /** Write a string. Does not write zero terminator. @@ -292,9 +315,20 @@ void fz_write_base64(fz_context *ctx, fz_output *out, const unsigned char *data, */ void fz_write_base64_buffer(fz_context *ctx, fz_output *out, fz_buffer *data, int newline); +/** + Write num_bits of data to the end of the output stream, assumed to be packed + most significant bits first. +*/ +void fz_write_bits(fz_context *ctx, fz_output *out, unsigned int data, int num_bits); + +/** + Sync to byte boundary after writing bits. +*/ +void fz_write_bits_sync(fz_context *ctx, fz_output *out); + /** Our customised 'printf'-like string formatter. - Takes %c, %d, %s, %u, %x, as usual. + Takes %c, %d, %s, %u, %x, %X as usual. Modifiers are not supported except for zero-padding ints (e.g. %02d, %03u, %04x, etc). %g output in "as short as possible hopefully lossless @@ -306,8 +340,8 @@ void fz_write_base64_buffer(fz_context *ctx, fz_output *out, fz_buffer *data, in %P outputs a fz_point*. %n outputs a PDF name (with appropriate escaping). %q and %( output escaped strings in C/PDF syntax. - %l{d,u,x} indicates that the values are int64_t. - %z{d,u,x} indicates that the value is a size_t. + %l{d,u,x,X} indicates that the values are int64_t. + %z{d,u,x,X} indicates that the value is a size_t. user: An opaque pointer that is passed to the emit function. diff --git a/misc/mupdf/include/mupdf/fitz/path.h b/misc/mupdf/include/mupdf/fitz/path.h index 1e906cd..78c9b08 100644 --- a/misc/mupdf/include/mupdf/fitz/path.h +++ b/misc/mupdf/include/mupdf/fitz/path.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_PATH_H #define MUPDF_FITZ_PATH_H @@ -152,28 +152,20 @@ int fz_packed_path_size(const fz_path *path); be aligned by the caller to the same alignment as required for a fz_path pointer. - max: The number of bytes available in the block. - If max < sizeof(fz_path) then an exception will - be thrown. If max >= the value returned by - fz_packed_path_size, then this call will never - fail, except in low memory situations with large - paths. - path: The path to pack. Returns the number of bytes within the block used. Callers can access the packed path data by casting the value of pack on entry to be a fz_path *. - Throws exceptions on failure to allocate, or if - max < sizeof(fz_path). + Throws exceptions on failure to allocate. Implementation details: Paths can be 'unpacked', 'flat', or - 'open'. Standard paths, as created are 'unpacked'. Paths that - will pack into less than max bytes will be packed as 'flat', - unless they are too large (where large indicates that they - exceed some private implementation defined limits, currently - including having more than 256 coordinates or commands). + 'open'. Standard paths, as created are 'unpacked'. Paths + will be packed as 'flat', unless they are too large + (where large indicates that they exceed some private + implementation defined limits, currently including having + more than 256 coordinates or commands). Large paths are 'open' packed as a header into the given block, plus pointers to other data blocks. @@ -182,7 +174,7 @@ int fz_packed_path_size(const fz_path *path); or 'flat' packed. Simply pack a path (if required), and then forget about the details. */ -size_t fz_pack_path(fz_context *ctx, uint8_t *pack, size_t max, const fz_path *path); +size_t fz_pack_path(fz_context *ctx, uint8_t *pack, const fz_path *path); /** Clone the data for a path. diff --git a/misc/mupdf/include/mupdf/fitz/pixmap.h b/misc/mupdf/include/mupdf/fitz/pixmap.h index 8565639..e1c1fab 100644 --- a/misc/mupdf/include/mupdf/fitz/pixmap.h +++ b/misc/mupdf/include/mupdf/fitz/pixmap.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_PIXMAP_H #define MUPDF_FITZ_PIXMAP_H @@ -65,6 +65,11 @@ int fz_pixmap_x(fz_context *ctx, const fz_pixmap *pix); */ int fz_pixmap_y(fz_context *ctx, const fz_pixmap *pix); +/** + Return sizeof fz_pixmap plus size of data, in bytes. +*/ +size_t fz_pixmap_size(fz_context *ctx, fz_pixmap *pix); + /** Create a new pixmap, with its origin at (0,0) @@ -319,12 +324,18 @@ void fz_invert_pixmap_luminance(fz_context *ctx, fz_pixmap *pix); void fz_tint_pixmap(fz_context *ctx, fz_pixmap *pix, int black, int white); /** - Invert all the pixels in a given rectangle of a + Invert all the pixels in a given rectangle of a (premultiplied) pixmap. All components of all pixels in the rectangle are inverted (except alpha, which is unchanged). */ void fz_invert_pixmap_rect(fz_context *ctx, fz_pixmap *image, fz_irect rect); +/** + Invert all the pixels in a non-premultiplied pixmap in a + very naive manner. +*/ +void fz_invert_pixmap_raw(fz_context *ctx, fz_pixmap *pix); + /** Apply gamma correction to a pixmap. All components of all pixels are modified (except alpha, which is unchanged). @@ -465,4 +476,26 @@ fz_pixmap *fz_new_pixmap_from_alpha_channel(fz_context *ctx, fz_pixmap *src); */ fz_pixmap *fz_new_pixmap_from_color_and_mask(fz_context *ctx, fz_pixmap *color, fz_pixmap *mask); +/* + * Scale the pixmap up or down in size to fit the rectangle. Will return `NULL` + * if the scaling factors are out of range. This applies fancy filtering and + * will anti-alias the edges for subpixel positioning if using non-integer + * coordinates. If the clip rectangle is set, the returned pixmap may be subset + * to fit the clip rectangle. Pass `NULL` to the clip if you want the whole + * pixmap scaled. + */ +fz_pixmap *fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip); + +/* + * Reduces size to: + * tile->w => (tile->w + 2^factor-1) / 2^factor + * tile->h => (tile->h + 2^factor-1) / 2^factor + */ +void fz_subsample_pixmap(fz_context *ctx, fz_pixmap *tile, int factor); + +/* + * Copies r (clipped to both src and dest) in src to dest. + */ +void fz_copy_pixmap_rect(fz_context *ctx, fz_pixmap *dest, fz_pixmap *src, fz_irect r, const fz_default_colorspaces *default_cs); + #endif diff --git a/misc/mupdf/include/mupdf/fitz/pool.h b/misc/mupdf/include/mupdf/fitz/pool.h index 059a680..43bc6b2 100644 --- a/misc/mupdf/include/mupdf/fitz/pool.h +++ b/misc/mupdf/include/mupdf/fitz/pool.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_POOL_H #define MUPDF_FITZ_POOL_H diff --git a/misc/mupdf/include/mupdf/fitz/separation.h b/misc/mupdf/include/mupdf/fitz/separation.h index 20e5b39..0a6e2fd 100644 --- a/misc/mupdf/include/mupdf/fitz/separation.h +++ b/misc/mupdf/include/mupdf/fitz/separation.h @@ -17,14 +17,15 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_SEPARATION_H #define MUPDF_FITZ_SEPARATION_H #include "mupdf/fitz/system.h" #include "mupdf/fitz/context.h" +#include "mupdf/fitz/color.h" /** A fz_separation structure holds details of a set of separations @@ -106,6 +107,14 @@ int fz_count_separations(fz_context *ctx, const fz_separations *sep); */ int fz_count_active_separations(fz_context *ctx, const fz_separations *seps); +/** + Compare 2 separations structures (or NULLs). + + Return 0 if identical, non-zero if not identical. +*/ +int fz_compare_separations(fz_context *ctx, const fz_separations *sep1, const fz_separations *sep2); + + /** Return a separations object with all the spots in the input separations object that are set to composite, reset to be diff --git a/misc/mupdf/include/mupdf/fitz/shade.h b/misc/mupdf/include/mupdf/fitz/shade.h index 4e7b935..8acde99 100644 --- a/misc/mupdf/include/mupdf/fitz/shade.h +++ b/misc/mupdf/include/mupdf/fitz/shade.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_SHADE_H #define MUPDF_FITZ_SHADE_H diff --git a/misc/mupdf/include/mupdf/fitz/store.h b/misc/mupdf/include/mupdf/fitz/store.h index c8acf1f..452bb59 100644 --- a/misc/mupdf/include/mupdf/fitz/store.h +++ b/misc/mupdf/include/mupdf/fitz/store.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_STORE_H #define MUPDF_FITZ_STORE_H @@ -59,6 +59,14 @@ typedef struct fz_storable fz_storable; */ typedef void (fz_store_drop_fn)(fz_context *, fz_storable *); +/** + Function type for a function to check whether a storable + object can be dropped at the moment. + + Return 0 for 'cannot be dropped', 1 otherwise. +*/ +typedef int (fz_store_droppable_fn)(fz_context *, fz_storable *); + /** Any storable object should include an fz_storable structure at the start (by convention at least) of their structure. @@ -67,6 +75,7 @@ typedef void (fz_store_drop_fn)(fz_context *, fz_storable *); struct fz_storable { int refs; fz_store_drop_fn *drop; + fz_store_droppable_fn *droppable; }; /** @@ -81,11 +90,16 @@ typedef struct } fz_key_storable; /** - Macro to initialise a storable object. + Macros to initialise a storable object. */ #define FZ_INIT_STORABLE(S_,RC,DROP) \ do { fz_storable *S = &(S_)->storable; S->refs = (RC); \ - S->drop = (DROP); \ + S->drop = (DROP); S->droppable = NULL; \ + } while (0) + +#define FZ_INIT_AWKWARD_STORABLE(S_,RC,DROP,DROPPABLE) \ + do { fz_storable *S = &(S_)->storable; S->refs = (RC); \ + S->drop = (DROP); S->droppable = (DROPPABLE); \ } while (0) /** diff --git a/misc/mupdf/include/mupdf/fitz/story-writer.h b/misc/mupdf/include/mupdf/fitz/story-writer.h new file mode 100644 index 0000000..70cbcd5 --- /dev/null +++ b/misc/mupdf/include/mupdf/fitz/story-writer.h @@ -0,0 +1,209 @@ +// Copyright (C) 2022 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#ifndef MUPDF_FITZ_STORY_WRITER_H +#define MUPDF_FITZ_STORY_WRITER_H + +#include "mupdf/fitz/story.h" +#include "mupdf/fitz/writer.h" + +/* + * A fz_story_element_position plus page number information; used with + * fz_write_story() and fz_write_stabilized_story(). + */ +typedef struct +{ + fz_story_element_position element; + int page_num; +} fz_write_story_position; + +/* + * A set of fz_write_story_position items; used with + * fz_write_stabilized_story(). + */ +typedef struct +{ + fz_write_story_position *positions; + int num; +} fz_write_story_positions; + + +/* + * Callback type used by fz_write_story() and fz_write_stabilized_story(). + * + * Should set *rect to rect number . If this is on a new page should also + * set *mediabox and return 1, otherwise return 0. + * + * ref: + * As passed to fz_write_story() or fz_write_stabilized_story(). + * num: + * The rect number. Will typically increment by one each time, being reset + * to zero when fz_write_stabilized_story() starts a new iteration. + * filled: + * From earlier internal call to fz_place_story(). + * rect: + * Out param. + * ctm: + * Out param, defaults to fz_identity. + * mediabox: + * Out param, only used if we return 1. + */ +typedef int (fz_write_story_rectfn)(fz_context *ctx, void *ref, int num, fz_rect filled, fz_rect *rect, fz_matrix *ctm, fz_rect *mediabox); + +/* + * Callback used by fz_write_story() to report information about element + * positions. Slightly different from fz_story_position_callback() because + * also includes the page number. + * + * ref: + * As passed to fz_write_story() or fz_write_stabilized_story(). + * position: + * Called via internal call to fz_story_position_callback(). + */ +typedef void (fz_write_story_positionfn)(fz_context *ctx, void *ref, const fz_write_story_position *position); + +/* + * Callback for fz_write_story(), called twice for each page, before (after=0) + * and after (after=1) the story is written. + * + * ref: + * As passed to fz_write_story() or fz_write_stabilized_story(). + * page_num: + * Page number, starting from 1. + * mediabox: + * As returned from fz_write_story_rectfn(). + * dev: + * Created from the fz_writer passed to fz_write_story() or + * fz_write_stabilized_story(). + * after: + * 0 - before writing the story. + * 1 - after writing the story. + */ +typedef void (fz_write_story_pagefn)(fz_context *ctx, void *ref, int page_num, fz_rect mediabox, fz_device *dev, int after); + +/* + * Callback type for fz_write_stabilized_story(). + * + * Should populate the supplied buffer with html content for use with internal + * calls to fz_new_story(). This may include extra content derived from + * information in , for example a table of contents. + * + * ref: + * As passed to fz_write_stabilized_story(). + * positions: + * Information from previous iteration. + * buffer: + * Where to write the new content. Will be initially empty. + */ +typedef void (fz_write_story_contentfn)(fz_context *ctx, void *ref, const fz_write_story_positions *positions, fz_buffer *buffer); + + +/* + * Places and writes a story to a fz_document_writer. Avoids the need + * for calling code to implement a loop that calls fz_place_story() + * and fz_draw_story() etc, at the expense of having to provide a + * fz_write_story_rectfn() callback. + * + * story: + * The story to place and write. + * writer: + * Where to write the story; can be NULL. + * rectfn: + * Should return information about the rect to be used in the next + * internal call to fz_place_story(). + * rectfn_ref: + * Passed to rectfn(). + * positionfn: + * If not NULL, is called via internal calls to fz_story_positions(). + * positionfn_ref: + * Passed to positionfn(). + * pagefn: + * If not NULL, called at start and end of each page (before and after all + * story content has been written to the device). + * pagefn_ref: + * Passed to pagefn(). + */ +void fz_write_story( + fz_context *ctx, + fz_document_writer *writer, + fz_story *story, + fz_write_story_rectfn rectfn, + void *rectfn_ref, + fz_write_story_positionfn positionfn, + void *positionfn_ref, + fz_write_story_pagefn pagefn, + void *pagefn_ref + ); + + +/* + * Does iterative layout of html content to a fz_document_writer. For example + * this allows one to add a table of contents section while ensuring that page + * numbers are patched up until stable. + * + * Repeatedly creates new story from (contentfn(), contentfn_ref, user_css, em) + * and lays it out with internal call to fz_write_story(); uses a NULL writer + * and populates a fz_write_story_positions which is passed to the next call of + * contentfn(). + * + * When the html from contentfn() becomes unchanged, we do a final iteration + * using . + * + * writer: + * Where to write in the final iteration. + * user_css: + * Used in internal calls to fz_new_story(). + * em: + * Used in internal calls to fz_new_story(). + * contentfn: + * Should return html content for use with fz_new_story(), possibly + * including extra content such as a table-of-contents. + * contentfn_ref: + * Passed to contentfn(). + * rectfn: + * Should return information about the rect to be used in the next + * internal call to fz_place_story(). + * rectfn_ref: + * Passed to rectfn(). + * fz_write_story_pagefn: + * If not NULL, called at start and end of each page (before and after all + * story content has been written to the device). + * pagefn_ref: + * Passed to pagefn(). + * dir: + * NULL, or a directory context to load images etc from. + */ +void fz_write_stabilized_story( + fz_context *ctx, + fz_document_writer *writer, + const char *user_css, + float em, + fz_write_story_contentfn contentfn, + void *contentfn_ref, + fz_write_story_rectfn rectfn, + void *rectfn_ref, + fz_write_story_pagefn pagefn, + void *pagefn_ref, + fz_archive *dir + ); + +#endif diff --git a/misc/mupdf/include/mupdf/fitz/story.h b/misc/mupdf/include/mupdf/fitz/story.h new file mode 100644 index 0000000..248dd3b --- /dev/null +++ b/misc/mupdf/include/mupdf/fitz/story.h @@ -0,0 +1,232 @@ +// Copyright (C) 2004-2021 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#ifndef MUPDF_FITZ_STORY_H +#define MUPDF_FITZ_STORY_H + +#include "mupdf/fitz/system.h" +#include "mupdf/fitz/context.h" +#include "mupdf/fitz/buffer.h" +#include "mupdf/fitz/device.h" +#include "mupdf/fitz/xml.h" +#include "mupdf/fitz/archive.h" + +/* + This header file provides an API for laying out and placing styled + text on a page, or pages. + + First a text story is created from some styled HTML. + + Next, this story can be laid out into a given rectangle (possibly + retrying several times with updated rectangles as required). + + Next, the laid out story can be drawn to a given device. + + In the case where the text story cannot be fitted into the given + areas all at once, these two steps can be repeated multiple + times until the text story is completely consumed. + + Finally, the text story can be dropped in the usual fashion. +*/ + + +typedef struct fz_story fz_story; + +/* + Create a text story using styled html. + + Passing a NULL buffer will be treated as an empty document. + Passing a NULL user_css will be treated as an empty CSS string. + A non-NULL dir will allow images etc to be loaded. The + story keeps its own reference, so the caller can drop its + reference after this call. +*/ +fz_story *fz_new_story(fz_context *ctx, fz_buffer *buf, const char *user_css, float em, fz_archive *dir); + +/* + Retrieve the warnings given from parsing this story. + + If there are warnings, this will be returned as a NULL terminated + C string. If there are no warnings, this will return NULL. + + These warnings will not be complete until AFTER any DOM manipulations + have been completed. + + This function does not need to be called, but once it has been + the DOM is no longer accessible, and any fz_xml pointer + retrieved from fz_story_docment is no longer valid. +*/ +const char *fz_story_warnings(fz_context *ctx, fz_story *story); + +/* + Equivalent to fz_place_story_flags with flags being 0. +*/ +int fz_place_story(fz_context *ctx, fz_story *story, fz_rect where, fz_rect *filled); + +/* + Place (or continue placing) a story into the supplied rectangle + 'where', updating 'filled' with the actual area that was used. + Returns zero (FZ_PLACE_STORY_RETURN_ALL_FITTED) if all the + content fitted, non-zero if there is more to fit. + + If the FZ_PLACE_STORY_FLAG_NO_OVERFLOW flag is set, then a + return code of FZ_PLACE_STORY_RETURN_OVERFLOW_WIDTH will be + returned when the next item (word) to be placed would not fit + in a rectangle of that given width. + + Note, that filled may not be returned as a strict subset of + where, due to padding/margins at the bottom of pages, and + non-wrapping content extending to the right. + + Subsequent calls will attempt to place the same section of story + again and again, until the placed story is drawn using fz_draw_story, + whereupon subsequent calls to fz_place_story will attempt to place + the unused remainder of the story. + + After this function is called, the DOM is no longer accessible, + and any fz_xml pointer retrieved from fz_story_document is no + longer valid. + + flags: Additional flags controlling layout. Pass 0 if none + required. +*/ +int fz_place_story_flags(fz_context *ctx, fz_story *story, fz_rect where, fz_rect *filled, int flags); + +enum +{ + /* Avoid the usual HTML behaviour of overflowing the box horizontally + * in some circumstances. We now abort the place in such cases and + * return with */ + FZ_PLACE_STORY_FLAG_NO_OVERFLOW = 1, + + /* Specific return codes from fz_place_story_flags. Also + * "non-zero" for 'more to fit'. */ + FZ_PLACE_STORY_RETURN_ALL_FITTED = 0, + FZ_PLACE_STORY_RETURN_OVERFLOW_WIDTH = 2 +}; + +/* + Draw the placed story to the given device. + + This moves the point at which subsequent calls to fz_place_story + will restart placing to the end of what has just been output. +*/ +void fz_draw_story(fz_context *ctx, fz_story *story, fz_device *dev, fz_matrix ctm); + +/* + Reset the position within the story at which the next layout call + will continue to the start of the story. +*/ +void fz_reset_story(fz_context *ctx, fz_story *story); + +/* + Drop the html story. +*/ +void fz_drop_story(fz_context *ctx, fz_story *story); + +/* + Get a borrowed reference to the DOM document pointer for this + story. Do not destroy this reference, it will be destroyed + when the story is laid out. + + This only makes sense before the first placement of the story + or retrieval of the warnings. Once either of those things happen + the DOM representation is destroyed. +*/ +fz_xml *fz_story_document(fz_context *ctx, fz_story *story); + + +typedef struct +{ + /* The overall depth of this element in the box structure. + * This can be used to compare the relative depths of different + * elements, but shouldn't be relied upon not to change between + * different versions of MuPDF. */ + int depth; + + /* The heading level of this element. 0 if not a header, or 1-6 for h1-h6. */ + int heading; + + /* The id for this element. */ + const char *id; + + /* The href for this element. */ + const char *href; + + /* The rectangle for this element. */ + fz_rect rect; + + /* The immediate text for this element. */ + const char *text; + + /* This indicates whether this opens and/or closes this element. + * + * As we traverse the tree we do a depth first search. In order for + * the caller of fz_story_positions to know whether a given element + * is inside another element, we therefore announce 'start' and 'stop' + * for each element. For instance, with: + * + *
+ *

Chapter 1

... + *

Chapter 2

... + * ... + *
+ *
+ *

Chapter 10

... + *

Chapter 11

... + * ... + *
+ * + * We would announce: + * + id='part1' (open) + * + header=1 "Chapter 1" (open/close) + * + header=1 "Chapter 2" (open/close) + * ... + * + id='part1' (close) + * + id='part2' (open) + * + header=1 "Chapter 10" (open/close) + * + header=1 "Chapter 11" (open/close) + * ... + * + id='part2' (close) + * + * If bit 0 is set, then this 'opens' the element. + * If bit 1 is set, then this 'closes' the element. + */ + int open_close; + + /* A count of the number of rectangles that the layout code has split the + * story into so far. After the first layout, this will be 1. If a + * layout is repeated, this number is not incremented. */ + int rectangle_num; +} fz_story_element_position; + +typedef void (fz_story_position_callback)(fz_context *ctx, void *arg, const fz_story_element_position *); + +/* + Enumerate the positions for key blocks in the story. + + This will cause the supplied function to be called with details of each + element in the story that is either a header, or has an id. +*/ +void fz_story_positions(fz_context *ctx, fz_story *story, fz_story_position_callback *cb, void *arg); + +#endif diff --git a/misc/mupdf/include/mupdf/fitz/stream.h b/misc/mupdf/include/mupdf/fitz/stream.h index 669154d..f5e0f67 100644 --- a/misc/mupdf/include/mupdf/fitz/stream.h +++ b/misc/mupdf/include/mupdf/fitz/stream.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_STREAM_H #define MUPDF_FITZ_STREAM_H @@ -55,6 +55,15 @@ typedef struct fz_stream fz_stream; */ fz_stream *fz_open_file(fz_context *ctx, const char *filename); +/** + Open the named file and wrap it in a stream. + + Does the same as fz_open_file, but in the event the file + does not open, it will return NULL rather than throw an + exception. +*/ +fz_stream *fz_try_open_file(fz_context *ctx, const char *name); + #ifdef _WIN32 /** Open the named file and wrap it in a stream. @@ -136,6 +145,10 @@ int64_t fz_tell(fz_context *ctx, fz_stream *stm); offset: The offset to seek to. whence: From where the offset is measured (see fseek). + SEEK_SET - start of stream. + SEEK_CUR - current position. + SEEK_END - end of stream. + */ void fz_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence); @@ -180,6 +193,14 @@ fz_buffer *fz_read_all(fz_context *ctx, fz_stream *stm, size_t initial); */ fz_buffer *fz_read_file(fz_context *ctx, const char *filename); +/** + Read all the contents of a file into a buffer. + + Returns NULL if the file does not exist, otherwise + behaves exactly as fz_read_file. +*/ +fz_buffer *fz_try_read_file(fz_context *ctx, const char *filename); + /** fz_read_[u]int(16|24|32|64)(_le)? @@ -217,6 +238,27 @@ float fz_read_float(fz_context *ctx, fz_stream *stm); */ void fz_read_string(fz_context *ctx, fz_stream *stm, char *buffer, int len); +/** + Read a utf-8 rune from a stream. + + In the event of encountering badly formatted utf-8 codes + (such as a leading code with an unexpected number of following + codes) no error/exception is given, but undefined values may be + returned. +*/ +int fz_read_rune(fz_context *ctx, fz_stream *in); + +/** + Read a utf-16 rune from a stream. (little endian and + big endian respectively). + + In the event of encountering badly formatted utf-16 codes + (mismatched surrogates) no error/exception is given, but + undefined values may be returned. +*/ +int fz_read_utf16_le(fz_context *ctx, fz_stream *stm); +int fz_read_utf16_be(fz_context *ctx, fz_stream *stm); + /** A function type for use when implementing fz_streams. The supplied function of this type is called @@ -297,9 +339,12 @@ fz_stream *fz_new_stream(fz_context *ctx, void *state, fz_stream_next_fn *next, truncated: Flag to store success/failure indication in. + worst_case: 0 for unknown, otherwise an upper bound for the + size of the stream. + Returns a buffer created from reading from the stream. */ -fz_buffer *fz_read_best(fz_context *ctx, fz_stream *stm, size_t initial, int *truncated); +fz_buffer *fz_read_best(fz_context *ctx, fz_stream *stm, size_t initial, int *truncated, size_t worst_case); /** Read a line from stream into the buffer until either a @@ -353,6 +398,7 @@ static inline size_t fz_available(fz_context *ctx, fz_stream *stm, size_t max) fz_catch(ctx) { fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); + fz_report_error(ctx); fz_warn(ctx, "read error; treating as end of file"); stm->error = 1; c = EOF; @@ -387,6 +433,7 @@ static inline int fz_read_byte(fz_context *ctx, fz_stream *stm) fz_catch(ctx) { fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); + fz_report_error(ctx); fz_warn(ctx, "read error; treating as end of file"); stm->error = 1; c = EOF; @@ -421,6 +468,7 @@ static inline int fz_peek_byte(fz_context *ctx, fz_stream *stm) fz_catch(ctx) { fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); + fz_report_error(ctx); fz_warn(ctx, "read error; treating as end of file"); stm->error = 1; c = EOF; diff --git a/misc/mupdf/include/mupdf/fitz/string-util.h b/misc/mupdf/include/mupdf/fitz/string-util.h index a5ed185..76062e3 100644 --- a/misc/mupdf/include/mupdf/fitz/string-util.h +++ b/misc/mupdf/include/mupdf/fitz/string-util.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,13 +17,14 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_STRING_H #define MUPDF_FITZ_STRING_H #include "mupdf/fitz/system.h" +#include "mupdf/fitz/context.h" /* The Unicode character used to incoming character whose value is * unknown or unrepresentable. */ @@ -98,10 +99,45 @@ void *fz_memmem(const void *haystack, size_t haystacklen, const void *needle, si void fz_dirname(char *dir, const char *path, size_t dirsize); /** - decode url escapes. + Find the filename component in a path. +*/ +const char *fz_basename(const char *path); + +/** + Like fz_decode_uri_component but in-place. */ char *fz_urldecode(char *url); +/** + * Return a new string representing the unencoded version of the given URI. + * This decodes all escape sequences except those that would result in a reserved + * character that are part of the URI syntax (; / ? : @ & = + $ , #). + */ +char *fz_decode_uri(fz_context *ctx, const char *s); + +/** + * Return a new string representing the unencoded version of the given URI component. + * This decodes all escape sequences! + */ +char *fz_decode_uri_component(fz_context *ctx, const char *s); + +/** + * Return a new string representing the provided string encoded as a URI. + */ +char *fz_encode_uri(fz_context *ctx, const char *s); + +/** + * Return a new string representing the provided string encoded as an URI component. + * This also encodes the special reserved characters (; / ? : @ & = + $ , #). + */ +char *fz_encode_uri_component(fz_context *ctx, const char *s); + +/** + * Return a new string representing the provided string encoded as an URI path name. + * This also encodes the special reserved characters except /. + */ +char *fz_encode_uri_pathname(fz_context *ctx, const char *s); + /** create output file name using a template. @@ -121,6 +157,14 @@ void fz_format_output_path(fz_context *ctx, char *path, size_t size, const char */ char *fz_cleanname(char *name); +/** + rewrite path to the shortest string that names the same path. + + Eliminates multiple and trailing slashes, interprets "." and + "..". Allocates a new string that the caller must free. +*/ +char *fz_cleanname_strdup(fz_context *ctx, const char *name); + /** Resolve a path to an absolute file name. The resolved path buffer must be of at least PATH_MAX size. @@ -205,6 +249,12 @@ const char *fz_runeptr(const char *str, int idx); */ int fz_utflen(const char *s); +/* + Convert a wchar string into a new heap allocated utf8 one. +*/ +char *fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s); + + /** Locale-independent decimal to binary conversion. On overflow return (-)INFINITY and set errno to ERANGE. On underflow return @@ -217,9 +267,15 @@ int fz_grisu(float f, char *s, int *exp); /** Check and parse string into page ranges: - ( ','? ([0-9]+|'N') ( '-' ([0-9]+|N) )? )+ + /,?(-?\d+|N)(-(-?\d+|N))?/ */ int fz_is_page_range(fz_context *ctx, const char *s); const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n); +/** + Unicode aware tolower and toupper functions. +*/ +int fz_tolower(int c); +int fz_toupper(int c); + #endif diff --git a/misc/mupdf/include/mupdf/fitz/structured-text.h b/misc/mupdf/include/mupdf/fitz/structured-text.h index a4bf14b..ae108b4 100644 --- a/misc/mupdf/include/mupdf/fitz/structured-text.h +++ b/misc/mupdf/include/mupdf/fitz/structured-text.h @@ -17,20 +17,21 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_STRUCTURED_TEXT_H #define MUPDF_FITZ_STRUCTURED_TEXT_H #include "mupdf/fitz/system.h" +#include "mupdf/fitz/types.h" #include "mupdf/fitz/context.h" #include "mupdf/fitz/geometry.h" #include "mupdf/fitz/font.h" #include "mupdf/fitz/image.h" #include "mupdf/fitz/output.h" #include "mupdf/fitz/device.h" -#include "mupdf/fitz/document.h" +#include "mupdf/fitz/pool.h" /** Simple text layout (for use with annotation editing primarily). @@ -132,6 +133,7 @@ enum FZ_STEXT_DEHYPHENATE = 16, FZ_STEXT_PRESERVE_SPANS = 32, FZ_STEXT_MEDIABOX_CLIP = 64, + FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE = 128, }; /** @@ -184,7 +186,8 @@ struct fz_stext_line */ struct fz_stext_char { - int c; + int c; /* unicode character value */ + int bidi; /* even for LTR, odd for RTL */ int color; /* sRGB hex color */ fz_point origin; fz_quad quad; @@ -244,7 +247,7 @@ void fz_print_stext_page_as_text(fz_context *ctx, fz_output *out, fz_stext_page NOTE: This is an experimental interface and subject to change without notice. */ -int fz_search_stext_page(fz_context *ctx, fz_stext_page *text, const char *needle, fz_quad *quads, int max_quads); +int fz_search_stext_page(fz_context *ctx, fz_stext_page *text, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max); /** Return a list of quads to highlight lines inside the selection @@ -280,11 +283,12 @@ char *fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_poi char *fz_copy_rectangle(fz_context *ctx, fz_stext_page *page, fz_rect area, int crlf); /** - Options for creating a pixmap and draw device. + Options for creating structured text. */ typedef struct { int flags; + float scale; } fz_stext_options; /** @@ -341,6 +345,9 @@ fz_device *fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_st the languages/scripts that should be used for OCR (e.g. "eng,ara"). + datadir: NULL (for ""), or a pointer to a path string otherwise + provided to Tesseract in the TESSDATA_PREFIX environment variable. + progress: NULL, or function to be called periodically to indicate progress. Return 0 to continue, or 1 to cancel. progress_arg is returned as the void *. The int is a value between 0 and 100 to @@ -350,7 +357,7 @@ fz_device *fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_st function. */ fz_device *fz_new_ocr_device(fz_context *ctx, fz_device *target, fz_matrix ctm, fz_rect mediabox, int with_list, const char *language, - int (*progress)(fz_context *, void *, int), void *progress_arg); + const char *datadir, int (*progress)(fz_context *, void *, int), void *progress_arg); fz_document *fz_open_reflowed_document(fz_context *ctx, fz_document *underdoc, const fz_stext_options *opts); diff --git a/misc/mupdf/include/mupdf/fitz/system.h b/misc/mupdf/include/mupdf/fitz/system.h index 378afa0..6ca13ac 100644 --- a/misc/mupdf/include/mupdf/fitz/system.h +++ b/misc/mupdf/include/mupdf/fitz/system.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_SYSTEM_H #define MUPDF_FITZ_SYSTEM_H @@ -165,9 +165,6 @@ static __inline int signbit(double x) #ifdef _WIN32 -char *fz_utf8_from_wchar(const wchar_t *s); -wchar_t *fz_wchar_from_utf8(const char *s); - /* really a FILE* but we don't want to include stdio.h here */ void *fz_fopen_utf8(const char *name, const char *mode); int fz_remove_utf8(const char *name); @@ -182,13 +179,21 @@ void fz_free_argv(int argc, char **argv); #define S_ISDIR(mode) ((mode) & S_IFDIR) #endif +int64_t fz_stat_ctime(const char *path); +int64_t fz_stat_mtime(const char *path); +int fz_mkdir(char *path); + + /* inline is standard in C++. For some compilers we can enable it within - * C too. */ + * C too. Some compilers think they know better than we do about when + * to actually honour inline (particularly for large functions); use + * fz_forceinline to kick them into really inlining. */ #ifndef __cplusplus #if defined (__STDC_VERSION_) && (__STDC_VERSION__ >= 199901L) /* C99 */ #elif defined(_MSC_VER) && (_MSC_VER >= 1500) /* MSVC 9 or newer */ #define inline __inline +#define fz_forceinline __forceinline #elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC 3 or newer */ #define inline __inline #else /* Unknown or ancient */ @@ -196,6 +201,10 @@ void fz_free_argv(int argc, char **argv); #endif #endif +#ifndef fz_forceinline +#define fz_forceinline inline +#endif + /* restrict is standard in C99, but not in all C++ compilers. */ #if defined (__STDC_VERSION_) && (__STDC_VERSION__ >= 199901L) /* C99 */ #define FZ_RESTRICT restrict @@ -399,6 +408,10 @@ static inline float my_sinf(float x) x -= xn; xn *= x2 / 72.0f; x += xn; + if (x > 1) + x = 1; + else if (x < -1) + x = -1; return x; } diff --git a/misc/mupdf/include/mupdf/fitz/text.h b/misc/mupdf/include/mupdf/fitz/text.h index cb6ffe3..d7562f9 100644 --- a/misc/mupdf/include/mupdf/fitz/text.h +++ b/misc/mupdf/include/mupdf/fitz/text.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_TEXT_H #define MUPDF_FITZ_TEXT_H @@ -46,6 +46,7 @@ typedef struct float x, y; int gid; /* -1 for one gid to many ucs mappings */ int ucs; /* -1 for one ucs to many gid mappings */ + int cid; /* CID for CJK fonts, raw character code for other fonts; or unicode for non-PDF formats. */ } fz_text_item; #define FZ_LANG_TAG2(c1,c2) ((c1-'a'+1) + ((c2-'a'+1)*27)) @@ -118,6 +119,8 @@ void fz_drop_text(fz_context *ctx, const fz_text *text); unicode: The unicode character for the glyph. + cid: The CJK CID value or raw character code. + wmode: 1 for vertical mode, 0 for horizontal. bidi_level: The bidirectional level for this glyph. @@ -131,6 +134,7 @@ void fz_drop_text(fz_context *ctx, const fz_text *text); Throws exception on failure to allocate. */ void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int glyph, int unicode, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language); +void fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int glyph, int unicode, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang); /** Add a UTF8 string to a text object. diff --git a/misc/mupdf/include/mupdf/fitz/track-usage.h b/misc/mupdf/include/mupdf/fitz/track-usage.h index 53bc72a..69e8425 100644 --- a/misc/mupdf/include/mupdf/fitz/track-usage.h +++ b/misc/mupdf/include/mupdf/fitz/track-usage.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef TRACK_USAGE_H #define TRACK_USAGE_H diff --git a/misc/mupdf/include/mupdf/fitz/transition.h b/misc/mupdf/include/mupdf/fitz/transition.h index 3529b89..89a8087 100644 --- a/misc/mupdf/include/mupdf/fitz/transition.h +++ b/misc/mupdf/include/mupdf/fitz/transition.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_TRANSITION_H #define MUPDF_FITZ_TRANSITION_H diff --git a/misc/mupdf/include/mupdf/fitz/tree.h b/misc/mupdf/include/mupdf/fitz/tree.h index 1ad64cf..b4d7ac6 100644 --- a/misc/mupdf/include/mupdf/fitz/tree.h +++ b/misc/mupdf/include/mupdf/fitz/tree.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_TREE_H #define MUPDF_FITZ_TREE_H diff --git a/misc/mupdf/include/mupdf/fitz/types.h b/misc/mupdf/include/mupdf/fitz/types.h new file mode 100644 index 0000000..1299d2a --- /dev/null +++ b/misc/mupdf/include/mupdf/fitz/types.h @@ -0,0 +1,41 @@ +// Copyright (C) 2021 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#ifndef MUPDF_FITZ_TYPES_H +#define MUPDF_FITZ_TYPES_H + +typedef struct fz_document fz_document; + +/** + Locations within the document are referred to in terms of + chapter and page, rather than just a page number. For some + documents (such as epub documents with large numbers of pages + broken into many chapters) this can make navigation much faster + as only the required chapter needs to be decoded at a time. +*/ +typedef struct +{ + int chapter; + int page; +} fz_location; + +#endif diff --git a/misc/mupdf/include/mupdf/fitz/util.h b/misc/mupdf/include/mupdf/fitz/util.h index e7a1b9d..0048508 100644 --- a/misc/mupdf/include/mupdf/fitz/util.h +++ b/misc/mupdf/include/mupdf/fitz/util.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_UTIL_H #define MUPDF_FITZ_UTIL_H @@ -30,6 +30,9 @@ #include "mupdf/fitz/pixmap.h" #include "mupdf/fitz/structured-text.h" #include "mupdf/fitz/buffer.h" +#include "mupdf/fitz/xml.h" +#include "mupdf/fitz/archive.h" +#include "mupdf/fitz/display-list.h" /** Create a display list. @@ -72,6 +75,8 @@ fz_pixmap *fz_new_pixmap_from_page_with_separations(fz_context *ctx, fz_page *pa fz_pixmap *fz_new_pixmap_from_page_number_with_separations(fz_context *ctx, fz_document *doc, int number, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha); fz_pixmap *fz_new_pixmap_from_page_contents_with_separations(fz_context *ctx, fz_page *page, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha); +fz_pixmap *fz_fill_pixmap_from_display_list(fz_context *ctx, fz_display_list *list, fz_matrix ctm, fz_pixmap *pix); + /** Extract text from page. @@ -95,36 +100,38 @@ fz_buffer *fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *lis Record the hits in the hit_bbox array and return the number of hits. Will stop looking once it has filled hit_max rectangles. */ -int fz_search_page(fz_context *ctx, fz_page *page, const char *needle, fz_quad *hit_bbox, int hit_max); -int fz_search_page_number(fz_context *ctx, fz_document *doc, int number, const char *needle, fz_quad *hit_bbox, int hit_max); -int fz_search_chapter_page_number(fz_context *ctx, fz_document *doc, int chapter, int page, const char *needle, fz_quad *hit_bbox, int hit_max); -int fz_search_display_list(fz_context *ctx, fz_display_list *list, const char *needle, fz_quad *hit_bbox, int hit_max); +int fz_search_page(fz_context *ctx, fz_page *page, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max); +int fz_search_page_number(fz_context *ctx, fz_document *doc, int number, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max); +int fz_search_chapter_page_number(fz_context *ctx, fz_document *doc, int chapter, int page, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max); +int fz_search_display_list(fz_context *ctx, fz_display_list *list, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max); /** Parse an SVG document into a display-list. */ -fz_display_list *fz_new_display_list_from_svg(fz_context *ctx, fz_buffer *buf, const char *base_uri, fz_archive *zip, float *w, float *h); +fz_display_list *fz_new_display_list_from_svg(fz_context *ctx, fz_buffer *buf, const char *base_uri, fz_archive *dir, float *w, float *h); /** Create a scalable image from an SVG document. */ -fz_image *fz_new_image_from_svg(fz_context *ctx, fz_buffer *buf, const char *base_uri, fz_archive *zip); +fz_image *fz_new_image_from_svg(fz_context *ctx, fz_buffer *buf, const char *base_uri, fz_archive *dir); /** Parse an SVG document into a display-list. */ -fz_display_list *fz_new_display_list_from_svg_xml(fz_context *ctx, fz_xml_doc *xmldoc, fz_xml *xml, const char *base_uri, fz_archive *zip, float *w, float *h); +fz_display_list *fz_new_display_list_from_svg_xml(fz_context *ctx, fz_xml_doc *xmldoc, fz_xml *xml, const char *base_uri, fz_archive *dir, float *w, float *h); /** Create a scalable image from an SVG document. */ -fz_image *fz_new_image_from_svg_xml(fz_context *ctx, fz_xml_doc *xmldoc, fz_xml *xml, const char *base_uri, fz_archive *zip); +fz_image *fz_new_image_from_svg_xml(fz_context *ctx, fz_xml_doc *xmldoc, fz_xml *xml, const char *base_uri, fz_archive *dir); /** Write image as a data URI (for HTML and SVG output). */ void fz_write_image_as_data_uri(fz_context *ctx, fz_output *out, fz_image *image); void fz_write_pixmap_as_data_uri(fz_context *ctx, fz_output *out, fz_pixmap *pixmap); +void fz_append_image_as_data_uri(fz_context *ctx, fz_buffer *out, fz_image *image); +void fz_append_pixmap_as_data_uri(fz_context *ctx, fz_buffer *out, fz_pixmap *pixmap); /** Use text extraction to convert the input document into XHTML, diff --git a/misc/mupdf/include/mupdf/fitz/version.h b/misc/mupdf/include/mupdf/fitz/version.h index 2d3f6b8..492a6e4 100644 --- a/misc/mupdf/include/mupdf/fitz/version.h +++ b/misc/mupdf/include/mupdf/fitz/version.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,15 +17,15 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_VERSION_H #define MUPDF_FITZ_VERSION_H #ifndef FZ_VERSION -#define FZ_VERSION "1.19.0" +#define FZ_VERSION "1.24.2" #define FZ_VERSION_MAJOR 1 -#define FZ_VERSION_MINOR 19 -#define FZ_VERSION_PATCH 0 +#define FZ_VERSION_MINOR 24 +#define FZ_VERSION_PATCH 2 #endif #endif diff --git a/misc/mupdf/include/mupdf/fitz/write-pixmap.h b/misc/mupdf/include/mupdf/fitz/write-pixmap.h index cae6586..8ddb1ef 100644 --- a/misc/mupdf/include/mupdf/fitz/write-pixmap.h +++ b/misc/mupdf/include/mupdf/fitz/write-pixmap.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2023 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_WRITE_PIXMAP_H #define MUPDF_FITZ_WRITE_PIXMAP_H @@ -31,6 +31,7 @@ #include "mupdf/fitz/bitmap.h" #include "mupdf/fitz/buffer.h" #include "mupdf/fitz/image.h" +#include "mupdf/fitz/writer.h" /** PCL output @@ -175,6 +176,7 @@ typedef struct int compress; int strip_height; char language[256]; + char datadir[1024]; /* Updated as we move through the job */ int page_count; @@ -189,6 +191,7 @@ typedef struct compression=flate: Flate compression strip-height=n: Strip height (default 16) ocr-language=: OCR Language (default eng) + ocr-datadir=: OCR data path (default rely on TESSDATA_PREFIX) */ fz_pdfocr_options *fz_parse_pdfocr_options(fz_context *ctx, fz_pdfocr_options *opts, const char *args); @@ -205,7 +208,7 @@ fz_band_writer *fz_new_pdfocr_band_writer(fz_context *ctx, fz_output *out, const /** Set the progress callback for a pdfocr bandwriter. */ -void fz_pdfocr_band_writer_set_progress(fz_context *ctx, fz_band_writer *writer, int (*progress)(fz_context *, void *, int), void *progress_arg); +void fz_pdfocr_band_writer_set_progress(fz_context *ctx, fz_band_writer *writer, fz_pdfocr_progress_fn *progress_fn, void *progress_arg); /** Write a (Greyscale or RGB) pixmap as pdfocr. @@ -222,11 +225,37 @@ void fz_save_pixmap_as_pdfocr(fz_context *ctx, fz_pixmap *pixmap, char *filename */ void fz_save_pixmap_as_png(fz_context *ctx, fz_pixmap *pixmap, const char *filename); +/** + Write a pixmap as a JPEG. +*/ +void fz_write_pixmap_as_jpeg(fz_context *ctx, fz_output *out, fz_pixmap *pix, int quality, int invert_cmyk); + +/** + Save a pixmap as a JPEG. +*/ +void fz_save_pixmap_as_jpeg(fz_context *ctx, fz_pixmap *pixmap, const char *filename, int quality); + /** Write a (Greyscale or RGB) pixmap as a png. */ void fz_write_pixmap_as_png(fz_context *ctx, fz_output *out, const fz_pixmap *pixmap); +/** + Pixmap data as JP2K with no subsampling. + + quality = 100 = lossless + otherwise for a factor of x compression use 100-x. (so 80 is 1:20 compression) +*/ +void fz_write_pixmap_as_jpx(fz_context *ctx, fz_output *out, fz_pixmap *pix, int quality); + +/** + Save pixmap data as JP2K with no subsampling. + + quality = 100 = lossless + otherwise for a factor of x compression use 100-x. (so 80 is 1:20 compression) +*/ +void fz_save_pixmap_as_jpx(fz_context *ctx, fz_pixmap *pixmap, const char *filename, int q); + /** Create a new png band writer (greyscale or RGB, with or without alpha). @@ -239,6 +268,11 @@ fz_band_writer *fz_new_png_band_writer(fz_context *ctx, fz_output *out); Ownership of the buffer is returned. */ fz_buffer *fz_new_buffer_from_image_as_png(fz_context *ctx, fz_image *image, fz_color_params color_params); +fz_buffer *fz_new_buffer_from_image_as_pnm(fz_context *ctx, fz_image *image, fz_color_params color_params); +fz_buffer *fz_new_buffer_from_image_as_pam(fz_context *ctx, fz_image *image, fz_color_params color_params); +fz_buffer *fz_new_buffer_from_image_as_psd(fz_context *ctx, fz_image *image, fz_color_params color_params); +fz_buffer *fz_new_buffer_from_image_as_jpeg(fz_context *ctx, fz_image *image, fz_color_params color_params, int quality, int invert_cmyk); +fz_buffer *fz_new_buffer_from_image_as_jpx(fz_context *ctx, fz_image *image, fz_color_params color_params, int quality); /** Reencode a given pixmap as a PNG into a buffer. @@ -246,6 +280,11 @@ fz_buffer *fz_new_buffer_from_image_as_png(fz_context *ctx, fz_image *image, fz_ Ownership of the buffer is returned. */ fz_buffer *fz_new_buffer_from_pixmap_as_png(fz_context *ctx, fz_pixmap *pixmap, fz_color_params color_params); +fz_buffer *fz_new_buffer_from_pixmap_as_pnm(fz_context *ctx, fz_pixmap *pixmap, fz_color_params color_params); +fz_buffer *fz_new_buffer_from_pixmap_as_pam(fz_context *ctx, fz_pixmap *pixmap, fz_color_params color_params); +fz_buffer *fz_new_buffer_from_pixmap_as_psd(fz_context *ctx, fz_pixmap *pix, fz_color_params color_params); +fz_buffer *fz_new_buffer_from_pixmap_as_jpeg(fz_context *ctx, fz_pixmap *pixmap, fz_color_params color_params, int quality, int invert_cmyk); +fz_buffer *fz_new_buffer_from_pixmap_as_jpx(fz_context *ctx, fz_pixmap *pix, fz_color_params color_params, int quality); /** Save a pixmap as a pnm (greyscale or rgb, no alpha). diff --git a/misc/mupdf/include/mupdf/fitz/writer.h b/misc/mupdf/include/mupdf/fitz/writer.h index 1610ffa..ed7f529 100644 --- a/misc/mupdf/include/mupdf/fitz/writer.h +++ b/misc/mupdf/include/mupdf/fitz/writer.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2023 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_WRITER_H #define MUPDF_FITZ_WRITER_H @@ -118,6 +118,9 @@ fz_document_writer *fz_new_document_writer(fz_context *ctx, const char *path, co fz_document_writer * fz_new_document_writer_with_output(fz_context *ctx, fz_output *out, const char *format, const char *options); +fz_document_writer * +fz_new_document_writer_with_buffer(fz_context *ctx, fz_buffer *buf, const char *format, const char *options); + /** Document writers for various possible output formats. @@ -150,10 +153,25 @@ fz_document_writer *fz_new_pwg_writer_with_output(fz_context *ctx, fz_output *ou fz_document_writer *fz_new_cbz_writer(fz_context *ctx, const char *path, const char *options); fz_document_writer *fz_new_cbz_writer_with_output(fz_context *ctx, fz_output *out, const char *options); +/** + Used to report progress of the OCR operation. + + page: Current page being processed. + + percent: Progress of the OCR operation for the + current page in percent. Whether it reaches 100 + once a page is finished, depends on the OCR engine. + + Return 0 to continue progress, return 1 to cancel the + operation. +*/ +typedef int (fz_pdfocr_progress_fn)(fz_context *ctx, void *progress_arg, int page, int percent); + fz_document_writer *fz_new_pdfocr_writer(fz_context *ctx, const char *path, const char *options); fz_document_writer *fz_new_pdfocr_writer_with_output(fz_context *ctx, fz_output *out, const char *options); -void fz_pdfocr_writer_set_progress(fz_context *ctx, fz_document_writer *writer, int (*progress)(fz_context *, void *, int), void *); +void fz_pdfocr_writer_set_progress(fz_context *ctx, fz_document_writer *writer, fz_pdfocr_progress_fn *progress, void *); +fz_document_writer *fz_new_jpeg_pixmap_writer(fz_context *ctx, const char *path, const char *options); fz_document_writer *fz_new_png_pixmap_writer(fz_context *ctx, const char *path, const char *options); fz_document_writer *fz_new_pam_pixmap_writer(fz_context *ctx, const char *path, const char *options); fz_document_writer *fz_new_pnm_pixmap_writer(fz_context *ctx, const char *path, const char *options); diff --git a/misc/mupdf/include/mupdf/fitz/xml.h b/misc/mupdf/include/mupdf/fitz/xml.h index ac0d74a..7792f4a 100644 --- a/misc/mupdf/include/mupdf/fitz/xml.h +++ b/misc/mupdf/include/mupdf/fitz/xml.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,45 +17,81 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_FITZ_XML_H #define MUPDF_FITZ_XML_H #include "mupdf/fitz/system.h" #include "mupdf/fitz/context.h" +#include "mupdf/fitz/buffer.h" +#include "mupdf/fitz/pool.h" +#include "mupdf/fitz/archive.h" /** XML document model */ -typedef struct fz_xml_doc fz_xml_doc; typedef struct fz_xml fz_xml; +/* For backwards compatibility */ +typedef fz_xml fz_xml_doc; + +/** + Parse the contents of buffer into a tree of xml nodes. + + preserve_white: whether to keep or delete all-whitespace nodes. +*/ +fz_xml *fz_parse_xml(fz_context *ctx, fz_buffer *buf, int preserve_white); + /** Parse the contents of buffer into a tree of xml nodes. preserve_white: whether to keep or delete all-whitespace nodes. */ -fz_xml_doc *fz_parse_xml(fz_context *ctx, fz_buffer *buf, int preserve_white); +fz_xml *fz_parse_xml_stream(fz_context *ctx, fz_stream *stream, int preserve_white); + +/** + Parse the contents of an archive entry into a tree of xml nodes. + + preserve_white: whether to keep or delete all-whitespace nodes. +*/ +fz_xml *fz_parse_xml_archive_entry(fz_context *ctx, fz_archive *dir, const char *filename, int preserve_white); + +/** + Try and parse the contents of an archive entry into a tree of xml nodes. + + preserve_white: whether to keep or delete all-whitespace nodes. + + Will return NULL if the archive entry can't be found. Otherwise behaves + the same as fz_parse_xml_archive_entry. May throw exceptions. +*/ +fz_xml *fz_try_parse_xml_archive_entry(fz_context *ctx, fz_archive *dir, const char *filename, int preserve_white); /** Parse the contents of a buffer into a tree of XML nodes, using the HTML5 parsing algorithm. */ -fz_xml_doc *fz_parse_xml_from_html5(fz_context *ctx, fz_buffer *buf); +fz_xml *fz_parse_xml_from_html5(fz_context *ctx, fz_buffer *buf); + +/** + Add a reference to the XML. +*/ +fz_xml *fz_keep_xml(fz_context *ctx, fz_xml *xml); /** - Free the XML node and all its children and siblings. + Drop a reference to the XML. When the last reference is + dropped, the node and all its children and siblings will + be freed. */ -void fz_drop_xml(fz_context *ctx, fz_xml_doc *xml); +void fz_drop_xml(fz_context *ctx, fz_xml *xml); /** Detach a node from the tree, unlinking it from its parent, and setting the document root to the node. */ -void fz_detach_xml(fz_context *ctx, fz_xml_doc *xml, fz_xml *node); +void fz_detach_xml(fz_context *ctx, fz_xml *node); /** Return the topmost XML node of a document. @@ -113,6 +149,11 @@ char *fz_xml_att_alt(fz_xml *item, const char *one, const char *two); */ int fz_xml_att_eq(fz_xml *item, const char *name, const char *match); +/** + Add an attribute to an XML node. +*/ +void fz_xml_add_att(fz_context *ctx, fz_pool *pool, fz_xml *node, const char *key, const char *val); + /** Return the text content of an XML node. Return NULL if the node is a tag. @@ -120,7 +161,13 @@ int fz_xml_att_eq(fz_xml *item, const char *name, const char *match); char *fz_xml_text(fz_xml *item); /** - Pretty-print an XML tree to stdout. + Pretty-print an XML tree to given output. +*/ +void fz_output_xml(fz_context *ctx, fz_output *out, fz_xml *item, int level); + +/** + Pretty-print an XML tree to stdout. (Deprecated, use + fz_output_xml in preference). */ void fz_debug_xml(fz_xml *item, int level); @@ -150,7 +197,8 @@ fz_xml *fz_xml_find_down(fz_xml *item, const char *tag); /** Search the siblings of XML nodes starting with item looking for - the first with the given tag, and with a matching attribute. + the first with the given tag (or any tag if tag is NULL), and + with a matching attribute. Return NULL if none found. */ @@ -158,8 +206,8 @@ fz_xml *fz_xml_find_match(fz_xml *item, const char *tag, const char *att, const /** Search the siblings of XML nodes starting with the first sibling - of item looking for the first with the given tag, and with a - matching attribute. + of item looking for the first with the given tag (or any tag if tag + is NULL), and with a matching attribute. Return NULL if none found. */ @@ -167,11 +215,183 @@ fz_xml *fz_xml_find_next_match(fz_xml *item, const char *tag, const char *att, c /** Search the siblings of XML nodes starting with the first child - of item looking for the first with the given tag, and with a - matching attribute. + of item looking for the first with the given tag (or any tag if + tag is NULL), and with a matching attribute. Return NULL if none found. */ fz_xml *fz_xml_find_down_match(fz_xml *item, const char *tag, const char *att, const char *match); +/** + Perform a depth first search from item, returning the first + child that matches the given tag (or any tag if tag is NULL), + with the given attribute (if att is non NULL), that matches + match (if match is non NULL). +*/ +fz_xml *fz_xml_find_dfs(fz_xml *item, const char *tag, const char *att, const char *match); + +/** + Perform a depth first search from item, returning the first + child that matches the given tag (or any tag if tag is NULL), + with the given attribute (if att is non NULL), that matches + match (if match is non NULL). The search stops if it ever + reaches the top of the tree, or the declared 'top' item. +*/ +fz_xml *fz_xml_find_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top); + +/** + Perform a depth first search onwards from item, returning the first + child that matches the given tag (or any tag if tag is NULL), + with the given attribute (if att is non NULL), that matches + match (if match is non NULL). +*/ +fz_xml *fz_xml_find_next_dfs(fz_xml *item, const char *tag, const char *att, const char *match); + +/** + Perform a depth first search onwards from item, returning the first + child that matches the given tag (or any tag if tag is NULL), + with the given attribute (if att is non NULL), that matches + match (if match is non NULL). The search stops if it ever reaches + the top of the tree, or the declared 'top' item. +*/ +fz_xml *fz_xml_find_next_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top); + +/** + DOM-like functions for html in xml. +*/ + +/** + Return a borrowed reference for the 'body' element of + the given DOM. +*/ +fz_xml *fz_dom_body(fz_context *ctx, fz_xml *dom); + +/** + Return a borrowed reference for the document (the top + level element) of the DOM. +*/ +fz_xml *fz_dom_document_element(fz_context *ctx, fz_xml *dom); + +/** + Create an element of a given tag type for the given DOM. + + The element is not linked into the DOM yet. +*/ +fz_xml *fz_dom_create_element(fz_context *ctx, fz_xml *dom, const char *tag); + +/** + Create a text node for the given DOM. + + The element is not linked into the DOM yet. +*/ +fz_xml *fz_dom_create_text_node(fz_context *ctx, fz_xml *dom, const char *text); + +/** + Find the first element matching the requirements in a depth first traversal from elt. + + The tagname must match tag, unless tag is NULL, when all tag names are considered to match. + + If att is NULL, then all tags match. + Otherwise: + If match is NULL, then only nodes that have an att attribute match. + If match is non-NULL, then only nodes that have an att attribute that matches match match. + + Returns NULL (if no match found), or a borrowed reference to the first matching element. +*/ +fz_xml *fz_dom_find(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match); + +/** + Find the next element matching the requirements. +*/ +fz_xml *fz_dom_find_next(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match); + +/** + Insert an element as the last child of a parent, unlinking the + child from its current position if required. +*/ +void fz_dom_append_child(fz_context *ctx, fz_xml *parent, fz_xml *child); + +/** + Insert an element (new_elt), before another element (node), + unlinking the new_elt from its current position if required. +*/ +void fz_dom_insert_before(fz_context *ctx, fz_xml *node, fz_xml *new_elt); + +/** + Insert an element (new_elt), after another element (node), + unlinking the new_elt from its current position if required. +*/ +void fz_dom_insert_after(fz_context *ctx, fz_xml *node, fz_xml *new_elt); + +/** + Remove an element from the DOM. The element can be added back elsewhere + if required. + + No reference counting changes for the element. +*/ +void fz_dom_remove(fz_context *ctx, fz_xml *elt); + +/** + Clone an element (and its children). + + A borrowed reference to the clone is returned. The clone is not + yet linked into the DOM. +*/ +fz_xml *fz_dom_clone(fz_context *ctx, fz_xml *elt); + +/** + Return a borrowed reference to the first child of a node, + or NULL if there isn't one. +*/ +fz_xml *fz_dom_first_child(fz_context *ctx, fz_xml *elt); + +/** + Return a borrowed reference to the parent of a node, + or NULL if there isn't one. +*/ +fz_xml *fz_dom_parent(fz_context *ctx, fz_xml *elt); + +/** + Return a borrowed reference to the next sibling of a node, + or NULL if there isn't one. +*/ +fz_xml *fz_dom_next(fz_context *ctx, fz_xml *elt); + +/** + Return a borrowed reference to the previous sibling of a node, + or NULL if there isn't one. +*/ +fz_xml *fz_dom_previous(fz_context *ctx, fz_xml *elt); + +/** + Add an attribute to an element. + + Ownership of att and value remain with the caller. +*/ +void fz_dom_add_attribute(fz_context *ctx, fz_xml *elt, const char *att, const char *value); + +/** + Remove an attribute from an element. +*/ +void fz_dom_remove_attribute(fz_context *ctx, fz_xml *elt, const char *att); + +/** + Retrieve the value of a given attribute from a given element. + + Returns a borrowed pointer to the value or NULL if not found. +*/ +const char *fz_dom_attribute(fz_context *ctx, fz_xml *elt, const char *att); + +/** + Enumerate through the attributes of an element. + + Call with i=0,1,2,3... to enumerate attributes. + + On return *att and the return value will be NULL if there are not + that many attributes to read. Otherwise, *att will be filled in + with a borrowed pointer to the attribute name, and the return + value will be a borrowed pointer to the value. +*/ +const char *fz_dom_get_attribute(fz_context *ctx, fz_xml *elt, int i, const char **att); + #endif diff --git a/misc/mupdf/include/mupdf/helpers/mu-office-lib.h b/misc/mupdf/include/mupdf/helpers/mu-office-lib.h index d9b951d..4738f3d 100644 --- a/misc/mupdf/include/mupdf/helpers/mu-office-lib.h +++ b/misc/mupdf/include/mupdf/helpers/mu-office-lib.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. /** * Mu Office Library diff --git a/misc/mupdf/include/mupdf/helpers/mu-threads.h b/misc/mupdf/include/mupdf/helpers/mu-threads.h index faa56da..556b399 100644 --- a/misc/mupdf/include/mupdf/helpers/mu-threads.h +++ b/misc/mupdf/include/mupdf/helpers/mu-threads.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_HELPERS_MU_THREADS_H #define MUPDF_HELPERS_MU_THREADS_H diff --git a/misc/mupdf/include/mupdf/helpers/pkcs7-openssl.h b/misc/mupdf/include/mupdf/helpers/pkcs7-openssl.h index 519ac76..50c4689 100644 --- a/misc/mupdf/include/mupdf/helpers/pkcs7-openssl.h +++ b/misc/mupdf/include/mupdf/helpers/pkcs7-openssl.h @@ -17,12 +17,15 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PKCS7_OPENSSL_H #define MUPDF_PKCS7_OPENSSL_H +#include "mupdf/pdf/document.h" +#include "mupdf/pdf/form.h" + /* This an example pkcs7 implementation using openssl. These are the types of functions that you * will likely need to sign documents and check signatures within documents. In particular, to * sign a document, you need a function that derives a pdf_pkcs7_signer object from a certificate diff --git a/misc/mupdf/include/mupdf/html.h b/misc/mupdf/include/mupdf/html.h new file mode 100644 index 0000000..ce439b3 --- /dev/null +++ b/misc/mupdf/include/mupdf/html.h @@ -0,0 +1,53 @@ +// Copyright (C) 2023 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +// This header allows people to easily build HTML-based document handlers. + +#ifndef MUPDF_HTML_HTML_H +#define MUPDF_HTML_HTML_H + +#include "mupdf/fitz/system.h" +#include "mupdf/fitz/context.h" +#include "mupdf/fitz/document.h" + +/* + HTML types required +*/ +typedef struct fz_html_s fz_html; +typedef struct fz_html_font_set_s fz_html_font_set; + +typedef struct +{ + const char *format_name; + fz_buffer *(*convert_to_html)(fz_context *ctx, fz_html_font_set *set, fz_buffer *buf, fz_archive *dir, const char *user_css); + int try_xml; + int try_html5; + int patch_mobi; +} fz_htdoc_format_t; + +fz_document *fz_htdoc_open_document_with_buffer(fz_context *ctx, fz_archive *dir, fz_buffer *buf, const fz_htdoc_format_t *format); + +fz_document *fz_htdoc_open_document_with_stream_and_dir(fz_context *ctx, fz_stream *stm, fz_archive *dir, const fz_htdoc_format_t *format); + + + +#endif /* MUPDF_HTML_HTML_H */ diff --git a/misc/mupdf/include/mupdf/memento.h b/misc/mupdf/include/mupdf/memento.h index 9b513e8..b2d01b9 100644 --- a/misc/mupdf/include/mupdf/memento.h +++ b/misc/mupdf/include/mupdf/memento.h @@ -1,14 +1,16 @@ -/* Copyright (C) 2009-2018 Artifex Software, Inc. +/* Copyright (C) 2009-2022 Artifex Software, Inc. All Rights Reserved. This software is provided AS-IS with no warranty, either express or implied. - This software is distributed under license and may not be copied, modified - or distributed except as expressly authorized under the terms of that - license. Refer to licensing information at http://www.artifex.com - or contact Artifex Software, Inc., 1305 Grant Avenue - Suite 200, - Novato, CA 94945, U.S.A., +1(415)492-9861, for further information. + This software is distributed under license and may not be copied, + modified or distributed except as expressly authorized under the terms + of the license contained in the file COPYING in this distribution. + + Refer to licensing information at http://www.artifex.com or contact + Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, + CA 94129, USA, for further information. */ /* Memento: A library to aid debugging of memory leaks/heap corruption. @@ -139,19 +141,52 @@ * Memento has some experimental code in it to trap new/delete (and * new[]/delete[] if required) calls. * - * In order for this to work, either: + * In all cases, Memento will provide a C API that new/delete + * operators can be built upon: + * void *Memento_cpp_new(size_t size); + * void Memento_cpp_delete(void *pointer); + * void *Memento_cpp_new_array(size_t size); + * void Memento_cpp_delete_array(void *pointer); + * + * There are various ways that actual operator definitions can be + * provided: + * + * 1) If memento.c is built with the c++ compiler, then global new + * and delete operators will be built in to memento by default. * - * 1) Build memento.c with the c++ compiler. + * 2) If memento.c is built as normal with the C compiler, then + * no such veneers will be built in. The caller must provide them + * themselves. This can be done either by: + * + * a) Copying the lines between: + * // C++ Operator Veneers - START + * and + * // C++ Operator Veneers - END + * from memento.c into a C++ file within their own project. * * or * - * 2) Build memento.c as normal with the C compiler, then from any - * one of your .cpp files, do: + * b) Add the following lines to a C++ file in the project: + * #define MEMENTO_CPP_EXTRAS_ONLY + * #include "memento.c" + * + * 3) For those people that would like to be able to compile memento.c + * with a C compiler, and provide new/delete veneers globally + * within their own C++ code (so avoiding the need for memento.h to + * be included from every file), define MEMENTO_NO_CPLUSPLUS as you + * build, and Memento will not provide any veneers itself, instead + * relying on the library user to provide them. * - * #define MEMENTO_CPP_EXTRAS_ONLY - * #include "memento.c" + * For convenience the lines to implement such veneers can be found + * at the end of memento.c between: + * // C++ Operator Veneers - START + * and + * // C++ Operator Veneers - END * - * In the case where MEMENTO is not defined, this will not do anything. + * Memento's interception of new/delete can be disabled at runtime + * by using Memento_setIgnoreNewDelete(1). Alternatively the + * MEMENTO_IGNORENEWDELETE environment variable can be set to 1 to + * achieve the same result. * * Both Windows and GCC provide separate new[] and delete[] operators * for arrays. Apparently some systems do not. If this is the case for @@ -167,7 +202,7 @@ * it's really easy: * git clone git://github.com/ianlancetaylor/libbacktrace * cd libbacktrace - * ./configure + * ./configure --enable-shared * make * * This leaves the build .so as .libs/libbacktrace.so @@ -182,10 +217,21 @@ * sudo cp .libs/libbacktrace.so /opt/lib/ */ +#ifdef __cplusplus + +// Avoids problems with strdup()'s throw() attribute on Linux. +#include + +extern "C" { +#endif + #ifndef MEMENTO_H +/* Include all these first, so our definitions below do + * not conflict with them. */ #include #include +#include #define MEMENTO_H @@ -211,8 +257,6 @@ #define MEMENTO_ALLOCFILL 0xa8 #define MEMENTO_FREEFILL 0xa9 -#define MEMENTO_FREELIST_MAX 0x2000000 - int Memento_checkBlock(void *); int Memento_checkAllMemory(void); int Memento_check(void); @@ -229,18 +273,32 @@ int Memento_failAt(int); int Memento_failThisEvent(void); void Memento_listBlocks(void); void Memento_listNewBlocks(void); +void Memento_listPhasedBlocks(void); size_t Memento_setMax(size_t); void Memento_stats(void); void *Memento_label(void *, const char *); void Memento_tick(void); +int Memento_setVerbose(int); + +/* Terminate backtraces if we see specified function name. E.g. +'cfunction_call' will exclude Python interpreter functions when Python calls C +code. Returns 0 on success, -1 on failure (out of memory). */ +int Memento_addBacktraceLimitFnname(const char *fnname); + +/* If is 0, we do not call Memento_fin() in an atexit() handler. */ +int Memento_setAtexitFin(int atexitfin); + +int Memento_setIgnoreNewDelete(int ignore); void *Memento_malloc(size_t s); void *Memento_realloc(void *, size_t s); void Memento_free(void *); void *Memento_calloc(size_t, size_t); char *Memento_strdup(const char*); +#if !defined(MEMENTO_GS_HACKS) && !defined(MEMENTO_MUPDF_HACKS) int Memento_asprintf(char **ret, const char *format, ...); int Memento_vasprintf(char **ret, const char *format, va_list ap); +#endif void Memento_info(void *addr); void Memento_listBlockInfo(void); @@ -274,6 +332,13 @@ void Memento_fin(void); void Memento_bt(void); +void *Memento_cpp_new(size_t size); +void Memento_cpp_delete(void *pointer); +void *Memento_cpp_new_array(size_t size); +void Memento_cpp_delete_array(void *pointer); + +void Memento_showHash(unsigned int hash); + #ifdef MEMENTO #ifndef COMPILING_MEMENTO_C @@ -282,9 +347,11 @@ void Memento_bt(void); #define realloc Memento_realloc #define calloc Memento_calloc #define strdup Memento_strdup +#if !defined(MEMENTO_GS_HACKS) && !defined(MEMENTO_MUPDF_HACKS) #define asprintf Memento_asprintf #define vasprintf Memento_vasprintf #endif +#endif #else @@ -293,8 +360,10 @@ void Memento_bt(void); #define Memento_realloc MEMENTO_UNDERLYING_REALLOC #define Memento_calloc MEMENTO_UNDERLYING_CALLOC #define Memento_strdup strdup +#if !defined(MEMENTO_GS_HACKS) && !defined(MEMENTO_MUPDF_HACKS) #define Memento_asprintf asprintf #define Memento_vasprintf vasprintf +#endif #define Memento_checkBlock(A) 0 #define Memento_checkAllMemory() 0 @@ -311,6 +380,7 @@ void Memento_bt(void); #define Memento_failThisEvent() 0 #define Memento_listBlocks() do {} while (0) #define Memento_listNewBlocks() do {} while (0) +#define Memento_listPhasedBlocks() do {} while (0) #define Memento_setMax(A) 0 #define Memento_stats() do {} while (0) #define Memento_label(A,B) (A) @@ -331,6 +401,7 @@ void Memento_bt(void); #define Memento_checkBytePointerOrNull(A) 0 #define Memento_checkShortPointerOrNull(A) 0 #define Memento_checkIntPointerOrNull(A) 0 +#define Memento_setIgnoreNewDelete(v) 0 #define Memento_tick() do {} while (0) #define Memento_startLeaking() do {} while (0) @@ -339,7 +410,14 @@ void Memento_bt(void); #define Memento_bt() do {} while (0) #define Memento_sequence() (0) #define Memento_squeezing() (0) +#define Memento_setVerbose(A) (A) +#define Memento_addBacktraceLimitFnname(A) (0) +#define Memento_setAtexitFin(atexitfin) (0) #endif /* MEMENTO */ +#ifdef __cplusplus +} +#endif + #endif /* MEMENTO_H */ diff --git a/misc/mupdf/include/mupdf/pdf.h b/misc/mupdf/include/mupdf/pdf.h index fe28c18..e160563 100644 --- a/misc/mupdf/include/mupdf/pdf.h +++ b/misc/mupdf/include/mupdf/pdf.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_H #define MUPDF_PDF_H @@ -47,6 +47,8 @@ extern "C" { #include "mupdf/pdf/javascript.h" #include "mupdf/pdf/clean.h" +#include "mupdf/pdf/image-rewriter.h" + #ifdef __cplusplus } diff --git a/misc/mupdf/include/mupdf/pdf/annot.h b/misc/mupdf/include/mupdf/pdf/annot.h index 9aa4393..4bc6e1a 100644 --- a/misc/mupdf/include/mupdf/pdf/annot.h +++ b/misc/mupdf/include/mupdf/pdf/annot.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2023 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,12 +17,20 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_ANNOT_H #define MUPDF_PDF_ANNOT_H +#include "mupdf/fitz/display-list.h" +#include "mupdf/fitz/stream.h" +#include "mupdf/fitz/structured-text.h" +#include "mupdf/pdf/object.h" +#include "mupdf/pdf/page.h" + +typedef struct pdf_annot pdf_annot; + enum pdf_annot_type { PDF_ANNOT_TEXT, @@ -252,12 +260,76 @@ void pdf_walk_tree(fz_context *ctx, pdf_obj *tree, pdf_obj *kid_name, Resolve a link within a document. */ int pdf_resolve_link(fz_context *ctx, pdf_document *doc, const char *uri, float *xp, float *yp); +fz_link_dest pdf_resolve_link_dest(fz_context *ctx, pdf_document *doc, const char *uri); + +/* + Create an action object given a link URI. The action will + be a GoTo or URI action depending on whether the link URI + specifies a document internal or external destination. +*/ +pdf_obj *pdf_new_action_from_link(fz_context *ctx, pdf_document *doc, const char *uri); + +/* + Create a destination object given a link URI expected to adhere + to the Adobe specification "Parameters for Opening PDF files" + from the Adobe Acrobat SDK. The resulting destination object + will either be a PDF string, or a PDF array referring to a page + and suitable zoom level settings. In the latter case the page + can be referred to by PDF object number or by page number, this + is controlled by the is_remote argument. For remote destinations + it is not possible to refer to the page by object number, so + page numbers are used instead. +*/ +pdf_obj *pdf_new_dest_from_link(fz_context *ctx, pdf_document *doc, const char *uri, int is_remote); + +/* + Create a link URI string according to the Adobe specification + "Parameters for Opening PDF files" from the Adobe Acrobat SDK, + version 8.1, which can, at the time of writing, be found here: + + https://web.archive.org/web/20170921000830/http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/pdf_open_parameters.pdf + + The resulting string must be freed by the caller. +*/ +char *pdf_new_uri_from_explicit_dest(fz_context *ctx, fz_link_dest dest); + +/* + Create a remote link URI string according to the Adobe specification + "Parameters for Opening PDF files" from the Adobe Acrobat SDK, + version 8.1, which can, at the time of writing, be found here: + + https://web.archive.org/web/20170921000830/http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/pdf_open_parameters.pdf + + The file: URI scheme is used in the resulting URI if the remote document + is specified by a system independent path (already taking the recommendations + in table 3.40 of the PDF 1.7 specification into account), and either a + destination name or a page number and zoom level are appended: + file:///path/doc.pdf#page=42&view=FitV,100 + file:///path/doc.pdf#nameddest=G42.123456 + + If a URL is used to specify the remote document, then its scheme takes + precedence and either a destination name or a page number and zoom level + are appended: + ftp://example.com/alpha.pdf#page=42&view=Fit + https://example.com/bravo.pdf?query=parameter#page=42&view=Fit + + The resulting string must be freed by the caller. +*/ +char *pdf_append_named_dest_to_uri(fz_context *ctx, const char *url, const char *name); +char *pdf_append_explicit_dest_to_uri(fz_context *ctx, const char *url, fz_link_dest dest); +char *pdf_new_uri_from_path_and_named_dest(fz_context *ctx, const char *path, const char *name); +char *pdf_new_uri_from_path_and_explicit_dest(fz_context *ctx, const char *path, fz_link_dest dest); /* Create transform to fit appearance stream to annotation Rect */ fz_matrix pdf_annot_transform(fz_context *ctx, pdf_annot *annot); +/* + Create a new link object. +*/ +fz_link *pdf_new_link(fz_context *ctx, pdf_page *page, fz_rect rect, const char *uri, pdf_obj *obj); + /* create a new annotation of the specified type on the specified page. The returned pdf_annot structure is owned by the @@ -271,6 +343,26 @@ pdf_annot *pdf_create_annot_raw(fz_context *ctx, pdf_page *page, enum pdf_annot_ */ fz_link *pdf_create_link(fz_context *ctx, pdf_page *page, fz_rect bbox, const char *uri); +/* + delete an existing link from the specified page. +*/ +void pdf_delete_link(fz_context *ctx, pdf_page *page, fz_link *link); + +enum pdf_border_style +{ + PDF_BORDER_STYLE_SOLID = 0, + PDF_BORDER_STYLE_DASHED, + PDF_BORDER_STYLE_BEVELED, + PDF_BORDER_STYLE_INSET, + PDF_BORDER_STYLE_UNDERLINE, +}; + +enum pdf_border_effect +{ + PDF_BORDER_EFFECT_NONE = 0, + PDF_BORDER_EFFECT_CLOUDY, +}; + /* create a new annotation of the specified type on the specified page. Populate it with sensible defaults per the type. @@ -301,6 +393,11 @@ void pdf_delete_annot(fz_context *ctx, pdf_page *page, pdf_annot *annot); void pdf_set_annot_popup(fz_context *ctx, pdf_annot *annot, fz_rect rect); fz_rect pdf_annot_popup(fz_context *ctx, pdf_annot *annot); +/* + Check to see if an annotation has a rect. +*/ +int pdf_annot_has_rect(fz_context *ctx, pdf_annot *annot); + /* Check to see if an annotation has an ink list. */ @@ -331,6 +428,20 @@ int pdf_annot_has_interior_color(fz_context *ctx, pdf_annot *annot); */ int pdf_annot_has_line_ending_styles(fz_context *ctx, pdf_annot *annot); +/* + Check to see if an annotation has quadding. +*/ +int pdf_annot_has_quadding(fz_context *ctx, pdf_annot *annot); +/* + Check to see if an annotation has a border. +*/ +int pdf_annot_has_border(fz_context *ctx, pdf_annot *annot); + +/* + Check to see if an annotation has a border effect. +*/ +int pdf_annot_has_border_effect(fz_context *ctx, pdf_annot *annot); + /* Check to see if an annotation has an icon name. */ @@ -358,9 +469,40 @@ fz_rect pdf_annot_rect(fz_context *ctx, pdf_annot *annot); /* Retrieve the annotation border line width in points. + DEPRECATED: Use pdf_annot_border_width instead. */ float pdf_annot_border(fz_context *ctx, pdf_annot *annot); +/* + Retrieve the annotation border style. + */ +enum pdf_border_style pdf_annot_border_style(fz_context *ctx, pdf_annot *annot); + +/* + Retrieve the annotation border width in points. + */ +float pdf_annot_border_width(fz_context *ctx, pdf_annot *annot); + +/* + How many items does the annotation border dash pattern have? + */ +int pdf_annot_border_dash_count(fz_context *ctx, pdf_annot *annot); + +/* + How long is dash item i in the annotation border dash pattern? + */ +float pdf_annot_border_dash_item(fz_context *ctx, pdf_annot *annot, int i); + +/* + Retrieve the annotation border effect. + */ +enum pdf_border_effect pdf_annot_border_effect(fz_context *ctx, pdf_annot *annot); + +/* + Retrieve the annotation border effect intensity. + */ +float pdf_annot_border_effect_intensity(fz_context *ctx, pdf_annot *annot); + /* Retrieve the annotation opacity. (0 transparent, 1 solid). */ @@ -427,6 +569,13 @@ fz_point pdf_annot_ink_list_stroke_vertex(fz_context *ctx, pdf_annot *annot, int */ void pdf_set_annot_flags(fz_context *ctx, pdf_annot *annot, int flags); +/* + Set the stamp appearance stream to a custom image. + Fits the image to the current Rect, and shrinks the Rect + to fit the image aspect ratio. +*/ +void pdf_set_annot_stamp_image(fz_context *ctx, pdf_annot *annot, fz_image *image); + /* Set the bounding box for an annotation, in doc space. */ @@ -434,9 +583,40 @@ void pdf_set_annot_rect(fz_context *ctx, pdf_annot *annot, fz_rect rect); /* Set the border width for an annotation, in points. + DEPRECATED: Use pdf_set_annot_border_width instead. */ void pdf_set_annot_border(fz_context *ctx, pdf_annot *annot, float width); +/* + Set the border style for an annotation. +*/ +void pdf_set_annot_border_style(fz_context *ctx, pdf_annot *annot, enum pdf_border_style style); + +/* + Set the border width for an annotation in points; +*/ +void pdf_set_annot_border_width(fz_context *ctx, pdf_annot *annot, float width); + +/* + Clear the entire border dash pattern for an annotation. +*/ +void pdf_clear_annot_border_dash(fz_context *ctx, pdf_annot *annot); + +/* + Add an item to the end of the border dash pattern for an annotation. +*/ +void pdf_add_annot_border_dash_item(fz_context *ctx, pdf_annot *annot, float length); + +/* + Set the border effect for an annotation. +*/ +void pdf_set_annot_border_effect(fz_context *ctx, pdf_annot *annot, enum pdf_border_effect effect); + +/* + Set the border effect intensity for an annotation. +*/ +void pdf_set_annot_border_effect_intensity(fz_context *ctx, pdf_annot *annot, float intensity); + /* Set the opacity for an annotation, between 0 (transparent) and 1 (solid). @@ -535,6 +715,7 @@ void pdf_set_annot_line_ending_styles(fz_context *ctx, pdf_annot *annot, enum pd const char *pdf_annot_icon_name(fz_context *ctx, pdf_annot *annot); int pdf_annot_is_open(fz_context *ctx, pdf_annot *annot); +int pdf_annot_is_standard_stamp(fz_context *ctx, pdf_annot *annot); void pdf_annot_line(fz_context *ctx, pdf_annot *annot, fz_point *a, fz_point *b); void pdf_set_annot_line(fz_context *ctx, pdf_annot *annot, fz_point a, fz_point b); @@ -563,7 +744,21 @@ void pdf_print_default_appearance(fz_context *ctx, char *buf, int nbuf, const ch void pdf_annot_default_appearance(fz_context *ctx, pdf_annot *annot, const char **font, float *size, int *n, float color[4]); void pdf_set_annot_default_appearance(fz_context *ctx, pdf_annot *annot, const char *font, float size, int n, const float *color); +/* + * Request that an appearance stream should be generated for an annotation if none is present. + * It will be created in future calls to pdf_update_annot or pdf_update_page. + */ +void pdf_annot_request_synthesis(fz_context *ctx, pdf_annot *annot); + +/* + * Request that an appearance stream should be re-generated for an annotation + * the next time pdf_annot_update or pdf_page_update is called. + * You usually won't need to call this, because changing any annotation attributes + * via the pdf_annot functions will do so automatically. + * It will be created in future calls to pdf_update_annot or pdf_update_page. + */ void pdf_annot_request_resynthesis(fz_context *ctx, pdf_annot *annot); + int pdf_annot_needs_resynthesis(fz_context *ctx, pdf_annot *annot); void pdf_set_annot_resynthesised(fz_context *ctx, pdf_annot *annot); void pdf_dirty_annot(fz_context *ctx, pdf_annot *annot); @@ -649,18 +844,56 @@ fz_stext_page *pdf_new_stext_page_from_annot(fz_context *ctx, pdf_annot *annot, fz_layout_block *pdf_layout_text_widget(fz_context *ctx, pdf_annot *annot); -const char *pdf_guess_mime_type_from_file_name(fz_context *ctx, const char *filename); -pdf_obj *pdf_embedded_file_stream(fz_context *ctx, pdf_obj *fs); -const char *pdf_embedded_file_name(fz_context *ctx, pdf_obj *fs); -const char *pdf_embedded_file_type(fz_context *ctx, pdf_obj *fs); +typedef struct pdf_embedded_file_params pdf_embedded_file_params; + +/* + Parameters for and embedded file. Obtained through + pdf_get_embedded_file_params(). The creation and + modification date fields are < 0 if unknown. +*/ +struct pdf_embedded_file_params { + const char *filename; + const char *mimetype; + int size; + int64_t created; + int64_t modified; +}; + +/* + Check if pdf object is a file specification. +*/ int pdf_is_embedded_file(fz_context *ctx, pdf_obj *fs); -fz_buffer *pdf_load_embedded_file(fz_context *ctx, pdf_obj *fs); -pdf_obj *pdf_add_embedded_file(fz_context *ctx, pdf_document *doc, const char *filename, const char *mimetype, fz_buffer *contents); -char *pdf_parse_link_dest(fz_context *ctx, pdf_document *doc, pdf_obj *obj); -char *pdf_parse_link_action(fz_context *ctx, pdf_document *doc, pdf_obj *obj, int pagenum); +/* + Add an embedded file to the document. This can later + be passed e.g. to pdf_annot_set_filespec(). If unknown, + supply NULL for MIME type and -1 for the date arguments. + If a checksum is added it can later be verified by calling + pdf_verify_embedded_file_checksum(). +*/ +pdf_obj *pdf_add_embedded_file(fz_context *ctx, pdf_document *doc, const char *filename, const char *mimetype, fz_buffer *contents, int64_t created, int64_t modifed, int add_checksum); + +/* + Obtain parameters for embedded file: name, size, + creation and modification dates cnad MIME type. +*/ +void pdf_get_embedded_file_params(fz_context *ctx, pdf_obj *fs, pdf_embedded_file_params *out); + +/* + Load embedded file contents in a buffer which + needs to be dropped by the called after use. +*/ +fz_buffer *pdf_load_embedded_file_contents(fz_context *ctx, pdf_obj *fs); + +/* + Verifies the embedded file checksum. Returns 1 + if the verifiction is successful or there is no + checksum to be verified, or 0 if verification fails. +*/ +int pdf_verify_embedded_file_checksum(fz_context *ctx, pdf_obj *fs); + pdf_obj *pdf_lookup_dest(fz_context *ctx, pdf_document *doc, pdf_obj *needle); -fz_link *pdf_load_link_annots(fz_context *ctx, pdf_document *, pdf_obj *annots, int pagenum, fz_matrix page_ctm); +fz_link *pdf_load_link_annots(fz_context *ctx, pdf_document *, pdf_page *, pdf_obj *annots, int pagenum, fz_matrix page_ctm); void pdf_annot_MK_BG(fz_context *ctx, pdf_annot *annot, int *n, float color[4]); void pdf_annot_MK_BC(fz_context *ctx, pdf_annot *annot, int *n, float color[4]); @@ -677,4 +910,32 @@ void pdf_set_annot_hot(fz_context *ctx, pdf_annot *annot, int hot); void pdf_set_annot_appearance(fz_context *ctx, pdf_annot *annot, const char *appearance, const char *state, fz_matrix ctm, fz_rect bbox, pdf_obj *res, fz_buffer *contents); void pdf_set_annot_appearance_from_display_list(fz_context *ctx, pdf_annot *annot, const char *appearance, const char *state, fz_matrix ctm, fz_display_list *list); +/* + Check to see if an annotation has a file specification. +*/ +int pdf_annot_has_filespec(fz_context *ctx, pdf_annot *annot); + +/* + Retrieve the file specification for the given annotation. +*/ +pdf_obj *pdf_annot_filespec(fz_context *ctx, pdf_annot *annot); + +/* + Set the annotation file specification. +*/ +void pdf_set_annot_filespec(fz_context *ctx, pdf_annot *annot, pdf_obj *obj); + +/* + Get/set a hidden flag preventing the annotation from being + rendered when it is being edited. This flag is independent + of the hidden flag in the PDF annotation object described in the PDF specification. +*/ +int pdf_annot_hidden_for_editing(fz_context *ctx, pdf_annot *annot); +void pdf_set_annot_hidden_for_editing(fz_context *ctx, pdf_annot *annot, int hidden); + +/* + * Apply Redaction annotation by redacting page underneath and removing the annotation. + */ +int pdf_apply_redaction(fz_context *ctx, pdf_annot *annot, pdf_redact_options *opts); + #endif diff --git a/misc/mupdf/include/mupdf/pdf/clean.h b/misc/mupdf/include/mupdf/pdf/clean.h index f22b95d..415059d 100644 --- a/misc/mupdf/include/mupdf/pdf/clean.h +++ b/misc/mupdf/include/mupdf/pdf/clean.h @@ -17,15 +17,32 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_CLEAN_H #define MUPDF_PDF_CLEAN_H +#include "mupdf/pdf/document.h" +#include "mupdf/pdf/image-rewriter.h" + +typedef struct +{ + pdf_write_options write; + pdf_image_rewriter_options image; + + /* Experimental option. Subject to change. */ + int subset_fonts; +} pdf_clean_options; + /* Read infile, and write selected pages to outfile with the given options. */ -void pdf_clean_file(fz_context *ctx, char *infile, char *outfile, char *password, pdf_write_options *opts, int retainlen, char *retainlist[]); +void pdf_clean_file(fz_context *ctx, char *infile, char *outfile, char *password, pdf_clean_options *opts, int retainlen, char *retainlist[]); + +/* + Recreate page tree to include only the pages listed in the array, in the order listed. +*/ +void pdf_rearrange_pages(fz_context *ctx, pdf_document *doc, int count, const int *pages); #endif diff --git a/misc/mupdf/include/mupdf/pdf/cmap.h b/misc/mupdf/include/mupdf/pdf/cmap.h index c60029f..cc4c58f 100644 --- a/misc/mupdf/include/mupdf/pdf/cmap.h +++ b/misc/mupdf/include/mupdf/pdf/cmap.h @@ -17,13 +17,16 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_CMAP_H #define MUPDF_PDF_CMAP_H -#define PDF_MRANGE_CAP 8 +#include "mupdf/fitz/store.h" +#include "mupdf/pdf/document.h" + +#define PDF_MRANGE_CAP 32 typedef struct { @@ -100,6 +103,8 @@ void pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, unsigned int srclo, /* Add a single one-to-many mapping. + + len <= 256. */ void pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int one, int *many, size_t len); void pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap); diff --git a/misc/mupdf/include/mupdf/pdf/crypt.h b/misc/mupdf/include/mupdf/pdf/crypt.h index 043c097..5273b65 100644 --- a/misc/mupdf/include/mupdf/pdf/crypt.h +++ b/misc/mupdf/include/mupdf/pdf/crypt.h @@ -17,12 +17,15 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_CRYPT_H #define MUPDF_PDF_CRYPT_H +#include "mupdf/pdf/document.h" +#include "mupdf/pdf/object.h" + enum { PDF_ENCRYPT_KEEP, @@ -48,7 +51,9 @@ fz_stream *pdf_open_crypt_with_filter(fz_context *ctx, fz_stream *chain, pdf_cry int pdf_crypt_version(fz_context *ctx, pdf_crypt *crypt); int pdf_crypt_revision(fz_context *ctx, pdf_crypt *crypt); -char *pdf_crypt_method(fz_context *ctx, pdf_crypt *crypt); +const char *pdf_crypt_method(fz_context *ctx, pdf_crypt *crypt); +const char *pdf_crypt_string_method(fz_context *ctx, pdf_crypt *crypt); +const char *pdf_crypt_stream_method(fz_context *ctx, pdf_crypt *crypt); int pdf_crypt_length(fz_context *ctx, pdf_crypt *crypt); int pdf_crypt_permissions(fz_context *ctx, pdf_crypt *crypt); int pdf_crypt_encrypt_metadata(fz_context *ctx, pdf_crypt *crypt); diff --git a/misc/mupdf/include/mupdf/pdf/document.h b/misc/mupdf/include/mupdf/pdf/document.h index baf770a..cdab913 100644 --- a/misc/mupdf/include/mupdf/pdf/document.h +++ b/misc/mupdf/include/mupdf/pdf/document.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2023 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,13 +17,18 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_DOCUMENT_H #define MUPDF_PDF_DOCUMENT_H #include "mupdf/fitz/export.h" +#include "mupdf/fitz/document.h" +#include "mupdf/fitz/hash.h" +#include "mupdf/fitz/stream.h" +#include "mupdf/fitz/xml.h" +#include "mupdf/pdf/object.h" typedef struct pdf_xref pdf_xref; typedef struct pdf_ocg_descriptor pdf_ocg_descriptor; @@ -31,6 +36,7 @@ typedef struct pdf_ocg_descriptor pdf_ocg_descriptor; typedef struct pdf_page pdf_page; typedef struct pdf_annot pdf_annot; typedef struct pdf_js pdf_js; +typedef struct pdf_document pdf_document; enum { @@ -73,6 +79,76 @@ typedef void (pdf_doc_event_cb)(fz_context *ctx, pdf_document *doc, pdf_doc_even */ typedef void (pdf_free_doc_event_data_cb)(fz_context *ctx, void *data); +typedef struct pdf_js_console pdf_js_console; + +/* + Callback called when the console is dropped because it + is being replaced or the javascript is being disabled + by a call to pdf_disable_js(). +*/ +typedef void (pdf_js_console_drop_cb)(pdf_js_console *console, void *user); + +/* + Callback signalling that a piece of javascript is asking + the javascript console to be displayed. +*/ +typedef void (pdf_js_console_show_cb)(void *user); + +/* + Callback signalling that a piece of javascript is asking + the javascript console to be hidden. +*/ +typedef void (pdf_js_console_hide_cb)(void *user); + +/* + Callback signalling that a piece of javascript is asking + the javascript console to remove all its contents. +*/ +typedef void (pdf_js_console_clear_cb)(void *user); + +/* + Callback signalling that a piece of javascript is appending + the given message to the javascript console contents. +*/ +typedef void (pdf_js_console_write_cb)(void *user, const char *msg); + +/* + The callback functions relating to a javascript console. +*/ +typedef struct pdf_js_console { + pdf_js_console_drop_cb *drop; + pdf_js_console_show_cb *show; + pdf_js_console_hide_cb *hide; + pdf_js_console_clear_cb *clear; + pdf_js_console_write_cb *write; +} pdf_js_console; + +/* + Retrieve the currently set javascript console, or NULL + if none is set. +*/ +pdf_js_console *pdf_js_get_console(fz_context *ctx, pdf_document *doc); + +/* + Set a new javascript console. + + console: A set of callback functions informing about + what pieces of executed js is trying to do + to the js console. The caller transfers ownership of + console when calling pdf_js_set_console(). Once it and + the corresponding user pointer are no longer needed + console->drop() will be called passing both the console + and the user pointer. + + user: Opaque data that will be passed unchanged to all + js console callbacks when called. The caller ensures + that this is valid until either the js console is + replaced by calling pdf_js_set_console() again with a + new console, or pdf_disable_js() is called. In either + case the caller to ensures that the user data is freed. +*/ +void pdf_js_set_console(fz_context *ctx, pdf_document *doc, pdf_js_console *console, void *user); + /* Open a PDF document. @@ -146,6 +222,10 @@ int pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, cha fz_outline *pdf_load_outline(fz_context *ctx, pdf_document *doc); +fz_outline_iterator *pdf_new_outline_iterator(fz_context *ctx, pdf_document *doc); + +void pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc); + /* Get the number of layer configurations defined in this document. @@ -153,8 +233,13 @@ fz_outline *pdf_load_outline(fz_context *ctx, pdf_document *doc); */ int pdf_count_layer_configs(fz_context *ctx, pdf_document *doc); -void pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc); - +/* + Configure visibility of individual layers in this document. +*/ +int pdf_count_layers(fz_context *ctx, pdf_document *doc); +const char *pdf_layer_name(fz_context *ctx, pdf_document *doc, int layer); +int pdf_layer_is_enabled(fz_context *ctx, pdf_document *doc, int layer); +void pdf_enable_layer(fz_context *ctx, pdf_document *doc, int layer, int enabled); typedef struct { @@ -318,6 +403,7 @@ struct pdf_document fz_stream *file; int version; + int is_fdf; int64_t startxref; int64_t file_size; pdf_crypt *crypt; @@ -339,14 +425,17 @@ struct pdf_document pdf_xref *saved_xref_sections; int *xref_index; int save_in_progress; - int has_xref_streams; - int has_old_style_xrefs; + int last_xref_was_old_style; int has_linearization_object; - int rev_page_count; + int map_page_count; pdf_rev_page_map *rev_page_map; + pdf_obj **fwd_page_map; + int page_tree_broken; int repair_attempted; + int repair_in_progress; + int non_structural_change; /* True if we are modifying the document in a way that does not change the (page) structure */ /* State indicating which file parsing method we are using */ int file_reading_linearly; @@ -488,6 +577,7 @@ void pdf_graft_mapped_page(fz_context *ctx, pdf_graft_map *map, int page_to, pdf pdf operations, together with a set of resources. This sequence/set pair can then be used as the basis for adding a page to the document (see pdf_add_page). + Returns a kept reference. doc: The document for which these are intended. @@ -544,9 +634,11 @@ pdf_obj *pdf_add_page(fz_context *ctx, pdf_document *doc, fz_rect mediabox, int doc: The document to insert into. - at: The page number to insert at. 0 inserts at the start. - negative numbers, or INT_MAX insert at the end. Otherwise - n inserts after page n. + at: The page number to insert at (pages numbered from 0). + 0 <= n <= page_count inserts before page n. Negative numbers + or INT_MAX are treated as page count, and insert at the end. + 0 inserts at the start. All existing pages are after the + insertion point are shuffled up. page: The page to insert. */ @@ -577,6 +669,24 @@ void pdf_delete_page(fz_context *ctx, pdf_document *doc, int number); */ void pdf_delete_page_range(fz_context *ctx, pdf_document *doc, int start, int end); +/* + Get page label (string) from a page number (index). +*/ +void pdf_page_label(fz_context *ctx, pdf_document *doc, int page, char *buf, size_t size); +void pdf_page_label_imp(fz_context *ctx, fz_document *doc, int chapter, int page, char *buf, size_t size); + +typedef enum { + PDF_PAGE_LABEL_NONE = 0, + PDF_PAGE_LABEL_DECIMAL = 'D', + PDF_PAGE_LABEL_ROMAN_UC = 'R', + PDF_PAGE_LABEL_ROMAN_LC = 'r', + PDF_PAGE_LABEL_ALPHA_UC = 'A', + PDF_PAGE_LABEL_ALPHA_LC = 'a', +} pdf_page_label_style; + +void pdf_set_page_labels(fz_context *ctx, pdf_document *doc, int index, pdf_page_label_style style, const char *prefix, int start); +void pdf_delete_page_labels(fz_context *ctx, pdf_document *doc, int index); + fz_text_language pdf_document_language(fz_context *ctx, pdf_document *doc); void pdf_set_document_language(fz_context *ctx, pdf_document *doc, fz_text_language lang); @@ -605,6 +715,9 @@ typedef struct char opwd_utf8[128]; /* Owner password. */ char upwd_utf8[128]; /* User password. */ int do_snapshot; /* Do not use directly. Use the snapshot functions. */ + int do_preserve_metadata; /* When cleaning, preserve metadata unchanged. */ + int do_use_objstms; /* Use objstms if possible */ + int compression_effort; /* 0 for default. 100 = max, 1 = min. */ } pdf_write_options; FZ_DATA extern const pdf_write_options pdf_default_write_options; @@ -683,4 +796,27 @@ void pdf_load_journal(fz_context *ctx, pdf_document *doc, const char *filename); */ void pdf_read_journal(fz_context *ctx, pdf_document *doc, fz_stream *stm); +/* + Minimize the memory used by a document. + + We walk the in memory xref tables, evicting the PDF objects + therein that aren't in use. + + This reduces the current memory use, but any subsequent use + of these objects will load them back into memory again. +*/ +void pdf_minimize_document(fz_context *ctx, pdf_document *doc); + +/* + Map a pdf object representing a structure tag through + an optional role_map and convert to an fz_structure. +*/ +fz_structure pdf_structure_type(fz_context *ctx, pdf_obj *role_map, pdf_obj *tag); + +/* + Run the document structure to a device. +*/ +void pdf_run_document_structure(fz_context *ctx, pdf_document *doc, fz_device *dev, fz_cookie *cookie); + + #endif diff --git a/misc/mupdf/include/mupdf/pdf/event.h b/misc/mupdf/include/mupdf/pdf/event.h index 561b680..5f138ee 100644 --- a/misc/mupdf/include/mupdf/pdf/event.h +++ b/misc/mupdf/include/mupdf/pdf/event.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,12 +17,14 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_EVENT_H #define MUPDF_PDF_EVENT_H +#include "mupdf/pdf/document.h" + /* Document events: the objects via which MuPDF informs the calling app of occurrences emanating from the document, possibly from user interaction @@ -68,10 +70,12 @@ void *pdf_get_doc_event_callback_data(fz_context *ctx, pdf_document *doc); */ typedef struct { + pdf_document *doc; const char *message; int icon_type; int button_group_type; const char *title; + int has_check_box; const char *check_box_message; int initially_checked; int finally_checked; diff --git a/misc/mupdf/include/mupdf/pdf/font.h b/misc/mupdf/include/mupdf/pdf/font.h index 0b2a56f..3b44cfc 100644 --- a/misc/mupdf/include/mupdf/pdf/font.h +++ b/misc/mupdf/include/mupdf/pdf/font.h @@ -17,13 +17,15 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_FONT_H #define MUPDF_PDF_FONT_H #include "mupdf/pdf/cmap.h" +#include "mupdf/fitz/device.h" +#include "mupdf/fitz/font.h" enum { @@ -95,6 +97,7 @@ typedef struct pdf_vmtx *vmtx; int is_embedded; + int t3loading; } pdf_font_desc; void pdf_set_font_wmode(fz_context *ctx, pdf_font_desc *font, int wmode); @@ -150,6 +153,14 @@ pdf_obj *pdf_add_cjk_font(fz_context *ctx, pdf_document *doc, fz_font *font, int */ pdf_obj *pdf_add_substitute_font(fz_context *ctx, pdf_document *doc, fz_font *font); -int pdf_font_writing_supported(fz_font *font); +int pdf_font_writing_supported(fz_context *ctx, fz_font *font); + +/* + Subset fonts by scanning the document to establish usage, and then + rewriting the font files. + + EXPERIMENTAL AND SUBJECT TO CHANGE. +*/ +void pdf_subset_fonts(fz_context *ctx, pdf_document *doc, int pages_len, const int *pages); #endif diff --git a/misc/mupdf/include/mupdf/pdf/form.h b/misc/mupdf/include/mupdf/pdf/form.h index 1d88636..2b8d30f 100644 --- a/misc/mupdf/include/mupdf/pdf/form.h +++ b/misc/mupdf/include/mupdf/pdf/form.h @@ -17,12 +17,15 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_FORM_H #define MUPDF_PDF_FORM_H +#include "mupdf/fitz/display-list.h" +#include "mupdf/pdf/document.h" + /* Types of widget */ enum pdf_widget_type { @@ -151,13 +154,14 @@ void pdf_calculate_form(fz_context *ctx, pdf_document *doc); void pdf_reset_form(fz_context *ctx, pdf_document *doc, pdf_obj *fields, int exclude); int pdf_field_type(fz_context *ctx, pdf_obj *field); +const char *pdf_field_type_string(fz_context *ctx, pdf_obj *field); int pdf_field_flags(fz_context *ctx, pdf_obj *field); /* Retrieve the name for a field as a C string that must be freed by the caller. */ -char *pdf_field_name(fz_context *ctx, pdf_obj *field); +char *pdf_load_field_name(fz_context *ctx, pdf_obj *field); const char *pdf_field_value(fz_context *ctx, pdf_obj *field); void pdf_create_field_name(fz_context *ctx, pdf_document *doc, const char *prefix, char *buf, size_t len); @@ -319,14 +323,6 @@ fz_pixmap *pdf_preview_signature_as_pixmap(fz_context *ctx, const char *reason, const char *location); -/* - check a signature's certificate chain and digest - - This is a helper function defined to provide compatibility with older - versions of mupdf -*/ -int pdf_check_signature(fz_context *ctx, pdf_pkcs7_verifier *verifier, pdf_document *doc, pdf_obj *signature, char *ebuf, size_t ebufsize); - void pdf_drop_signer(fz_context *ctx, pdf_pkcs7_signer *signer); void pdf_drop_verifier(fz_context *ctx, pdf_pkcs7_verifier *verifier); @@ -375,4 +371,10 @@ void pdf_annot_event_page_close(fz_context *ctx, pdf_annot *annot); void pdf_annot_event_page_visible(fz_context *ctx, pdf_annot *annot); void pdf_annot_event_page_invisible(fz_context *ctx, pdf_annot *annot); +/* + * Bake appearances of annotations and/or widgets into static page content, + * and remove the corresponding interactive PDF objects. + */ +void pdf_bake_document(fz_context *ctx, pdf_document *doc, int bake_annots, int bake_widgets); + #endif diff --git a/misc/mupdf/include/mupdf/pdf/image-rewriter.h b/misc/mupdf/include/mupdf/pdf/image-rewriter.h new file mode 100644 index 0000000..f24c872 --- /dev/null +++ b/misc/mupdf/include/mupdf/pdf/image-rewriter.h @@ -0,0 +1,78 @@ +// Copyright (C) 2004-2021 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#ifndef MUPDF_PDF_IMAGE_REWRITER_H +#define MUPDF_PDF_IMAGE_REWRITER_H + +#include "mupdf/pdf/document.h" + +enum +{ + FZ_SUBSAMPLE_AVERAGE, + FZ_SUBSAMPLE_BICUBIC +}; + +enum +{ + FZ_RECOMPRESS_NEVER, + FZ_RECOMPRESS_SAME, + FZ_RECOMPRESS_LOSSLESS, + FZ_RECOMPRESS_JPEG, + FZ_RECOMPRESS_J2K, + FZ_RECOMPRESS_FAX +}; + +typedef struct +{ + int color_lossless_image_subsample_method; + int color_lossy_image_subsample_method; + int color_lossless_image_subsample_threshold; /* 0, or the threshold dpi at which to subsample color images. */ + int color_lossless_image_subsample_to; /* 0, or the dpi to subsample to */ + int color_lossy_image_subsample_threshold; /* 0, or the threshold dpi at which to subsample color images. */ + int color_lossy_image_subsample_to; /* 0, or the dpi to subsample to */ + int color_lossless_image_recompress_method; /* Which compression method to use for losslessly compressed color images? */ + int color_lossy_image_recompress_method; /* Which compression method to use for lossy compressed color images? */ + char *color_lossy_image_recompress_quality; + char *color_lossless_image_recompress_quality; + int gray_lossless_image_subsample_method; + int gray_lossy_image_subsample_method; + int gray_lossless_image_subsample_threshold; /* 0, or the threshold at which to subsample gray images. */ + int gray_lossless_image_subsample_to; /* 0, or the dpi to subsample to */ + int gray_lossy_image_subsample_threshold; /* 0, or the threshold at which to subsample gray images. */ + int gray_lossy_image_subsample_to; /* 0, or the dpi to subsample to */ + int gray_lossless_image_recompress_method; /* Which compression method to use for losslessly compressed gray images? */ + int gray_lossy_image_recompress_method; /* Which compression method to use for lossy compressed gray images? */ + char *gray_lossy_image_recompress_quality; + char *gray_lossless_image_recompress_quality; + int bitonal_image_subsample_method; + int bitonal_image_subsample_threshold; /* 0, or the threshold at which to subsample bitonal images. */ + int bitonal_image_subsample_to; /* 0, or the dpi to subsample to */ + int bitonal_image_recompress_method; /* Which compression method to use for bitonal images? */ + char *bitonal_image_recompress_quality; +} pdf_image_rewriter_options; + +/* + Rewrite images within the given document. +*/ +void pdf_rewrite_images(fz_context *ctx, pdf_document *doc, pdf_image_rewriter_options *opts); + +#endif diff --git a/misc/mupdf/include/mupdf/pdf/interpret.h b/misc/mupdf/include/mupdf/pdf/interpret.h index 463c7fd..b9b1a78 100644 --- a/misc/mupdf/include/mupdf/pdf/interpret.h +++ b/misc/mupdf/include/mupdf/pdf/interpret.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2023 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,26 +17,51 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef PDF_INTERPRET_H #define PDF_INTERPRET_H #include "mupdf/pdf/font.h" #include "mupdf/pdf/resource.h" +#include "mupdf/pdf/document.h" typedef struct pdf_gstate pdf_gstate; typedef struct pdf_processor pdf_processor; void *pdf_new_processor(fz_context *ctx, int size); +pdf_processor *pdf_keep_processor(fz_context *ctx, pdf_processor *proc); void pdf_close_processor(fz_context *ctx, pdf_processor *proc); void pdf_drop_processor(fz_context *ctx, pdf_processor *proc); +typedef enum +{ + PDF_PROCESSOR_REQUIRES_DECODED_IMAGES = 1 +} pdf_processor_requirements; + struct pdf_processor { + int refs; + + int closed; + + /* close the processor. Also closes any chained processors. */ void (*close_processor)(fz_context *ctx, pdf_processor *proc); void (*drop_processor)(fz_context *ctx, pdf_processor *proc); + void (*reset_processor)(fz_context *ctx, pdf_processor *proc); + + /* At any stage, we can have one set of resources in place. + * This function gives us a set of resources to use. We remember + * any previous set on a stack, so we can pop back to it later. + * Our responsibility (as well as remembering it for our own use) + * is to pass either it, or a filtered version of it onto any + * chained processor. */ + void (*push_resources)(fz_context *ctx, pdf_processor *proc, pdf_obj *res); + /* Pop the resources stack. This must be passed on to any chained + * processors. This returns a pointer to the resource dict just + * popped by the deepest filter. The caller inherits this reference. */ + pdf_obj *(*pop_resources)(fz_context *ctx, pdf_processor *proc); /* general graphics state */ void (*op_w)(fz_context *ctx, pdf_processor *proc, float linewidth); @@ -51,7 +76,7 @@ struct pdf_processor void (*op_gs_BM)(fz_context *ctx, pdf_processor *proc, const char *blendmode); void (*op_gs_ca)(fz_context *ctx, pdf_processor *proc, float alpha); void (*op_gs_CA)(fz_context *ctx, pdf_processor *proc, float alpha); - void (*op_gs_SMask)(fz_context *ctx, pdf_processor *proc, pdf_obj *smask, pdf_obj *page_resources, float *bc, int luminosity); + void (*op_gs_SMask)(fz_context *ctx, pdf_processor *proc, pdf_obj *smask, float *bc, int luminosity, pdf_obj *tr); void (*op_gs_end)(fz_context *ctx, pdf_processor *proc); /* special graphics state */ @@ -134,7 +159,7 @@ struct pdf_processor void (*op_BI)(fz_context *ctx, pdf_processor *proc, fz_image *image, const char *colorspace_name); void (*op_sh)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade); void (*op_Do_image)(fz_context *ctx, pdf_processor *proc, const char *name, fz_image *image); - void (*op_Do_form)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *form, pdf_obj *page_resources); + void (*op_Do_form)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *form); /* marked content */ void (*op_MP)(fz_context *ctx, pdf_processor *proc, const char *tag); @@ -159,6 +184,8 @@ struct pdf_processor /* interpreter state that persists across content streams */ const char *usage; int hidden; + + pdf_processor_requirements requirements; }; typedef struct @@ -184,9 +211,11 @@ typedef struct float stack[32]; } pdf_csi; +void pdf_count_q_balance(fz_context *ctx, pdf_document *doc, pdf_obj *res, pdf_obj *stm, int *prepend, int *append); + /* Functions to set up pdf_process structures */ -pdf_processor *pdf_new_run_processor(fz_context *ctx, fz_device *dev, fz_matrix ctm, const char *usage, pdf_gstate *gstate, fz_default_colorspaces *default_cs, fz_cookie *cookie); +pdf_processor *pdf_new_run_processor(fz_context *ctx, pdf_document *doc, fz_device *dev, fz_matrix ctm, int struct_parent, const char *usage, pdf_gstate *gstate, fz_default_colorspaces *default_cs, fz_cookie *cookie); /* Create a buffer processor. @@ -198,8 +227,21 @@ pdf_processor *pdf_new_run_processor(fz_context *ctx, fz_device *dev, fz_matrix ahxencode: If 0, then image streams will be send as binary, otherwise they will be asciihexencoded. + + newlines: If 0, then minimal spacing will be sent. If 1 + then newlines will be sent after every operator. +*/ +pdf_processor *pdf_new_buffer_processor(fz_context *ctx, fz_buffer *buffer, int ahxencode, int newlines); + +/* + Reopen a closed processor to be used again. + + This brings a processor back to life after a close. + Not all processors may support this, so this may throw + an exception. */ -pdf_processor *pdf_new_buffer_processor(fz_context *ctx, fz_buffer *buffer, int ahxencode); +void pdf_reset_processor(fz_context *ctx, pdf_processor *proc); + /* Create an output processor. This @@ -209,12 +251,89 @@ pdf_processor *pdf_new_buffer_processor(fz_context *ctx, fz_buffer *buffer, int ahxencode: If 0, then image streams will be send as binary, otherwise they will be asciihexencoded. + + newlines: If 0, then minimal spacing will be sent. If 1 + then newlines will be sent after every operator. */ -pdf_processor *pdf_new_output_processor(fz_context *ctx, fz_output *out, int ahxencode); +pdf_processor *pdf_new_output_processor(fz_context *ctx, fz_output *out, int ahxencode, int newlines); + +typedef struct pdf_filter_options pdf_filter_options; /* - opaque: Opaque value that is passed to all the filter functions. + Create a filter processor. This filters the PDF operators + it is fed, and passes them down (with some changes) to the + child filter. + + chain: The child processor to which the filtered operators + will be fed. + + The options field contains a pointer to a structure with + filter specific options in. +*/ +typedef pdf_processor *(pdf_filter_factory_fn)(fz_context *ctx, pdf_document *doc, pdf_processor *chain, int struct_parents, fz_matrix transform, pdf_filter_options *options, void *factory_options); + +/* + A pdf_filter_factory is a pdf_filter_factory_fn, plus the options + needed to instantiate it. +*/ +typedef struct +{ + pdf_filter_factory_fn *filter; + void *options; +} pdf_filter_factory; + +/* + recurse: Filter resources recursively. + + instance_forms: Always recurse on XObject Form resources, but will + create a new instance of each XObject Form that is used, filtered + individually. + + ascii: If true, escape all binary data in the output. + + no_update: If true, do not update the document at the end. + + opaque: Opaque value that is passed to the complete function. + + complete: A function called at the end of processing. + This allows the caller to insert some extra content after + all other content. + + filters: Pointer to an array of filter factory/options. + The array is terminated by an entry with a NULL factory pointer. + Operators will be fed into the filter generated from the first + factory function in the list, and from there go to the filter + generated from the second factory in the list etc. + + newlines: If 0, then minimal whitespace will be produced. If 1, + then a newline will be sent after every operator. +*/ +struct pdf_filter_options +{ + int recurse; + int instance_forms; + int ascii; + int no_update; + void *opaque; + void (*complete)(fz_context *ctx, fz_buffer *buffer, void *opaque); + + pdf_filter_factory *filters; + int newlines; +}; + +typedef enum +{ + FZ_CULL_PATH_FILL, + FZ_CULL_PATH_STROKE, + FZ_CULL_PATH_FILL_STROKE, + FZ_CULL_CLIP_PATH, + FZ_CULL_GLYPH, + FZ_CULL_IMAGE, + FZ_CULL_SHADING +} fz_cull_type; + +/* image_filter: A function called to assess whether a given image should be removed or not. @@ -225,40 +344,25 @@ pdf_processor *pdf_new_output_processor(fz_context *ctx, fz_output *out, int ahx This allows the caller to insert some extra content if desired. - end_page: A function called at the end of a page. - This allows the caller to insert some extra content after - all other content. - - sanitize: If false, will only clean the syntax. This disables all filtering! - - recurse: Clean/sanitize/filter resources recursively. - - instance_forms: Always recurse on XObject Form resources, but will - create a new instance of each XObject Form that is used, filtered - individually. - - ascii: If true, escape all binary data in the output. + culler: A function called to see whether each object should + be culled or not. */ typedef struct { void *opaque; - fz_image *(*image_filter)(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image); + fz_image *(*image_filter)(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect scissor); int (*text_filter)(fz_context *ctx, void *opaque, int *ucsbuf, int ucslen, fz_matrix trm, fz_matrix ctm, fz_rect bbox); void (*after_text_object)(fz_context *ctx, void *opaque, pdf_document *doc, pdf_processor *chain, fz_matrix ctm); - void (*end_page)(fz_context *ctx, fz_buffer *buffer, void *arg); - - int recurse; - int instance_forms; - int sanitize; - int ascii; -} pdf_filter_options; + int (*culler)(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type); +} +pdf_sanitize_filter_options; /* - Create a filter processor. This filters the PDF operators - it is fed, and passes them down (with some changes) to the - child filter. + A sanitize filter factory. - The changes made by the filter are: + sopts = pointer to pdf_sanitize_filter_options. + + The changes made by a filter generated from this are: * No operations are allowed to change the top level gstate. Additional q/Q operators are inserted to prevent this. @@ -276,29 +380,75 @@ typedef struct The net graphical effect of the filtered operator stream should be identical to the incoming operator stream. +*/ +pdf_processor *pdf_new_sanitize_filter(fz_context *ctx, pdf_document *doc, pdf_processor *chain, int struct_parents, fz_matrix transform, pdf_filter_options *options, void *sopts); - chain: The child processor to which the filtered operators - will be fed. +pdf_obj *pdf_filter_xobject_instance(fz_context *ctx, pdf_obj *old_xobj, pdf_obj *page_res, fz_matrix ctm, pdf_filter_options *options, pdf_cycle_list *cycle_up); + +void pdf_processor_push_resources(fz_context *ctx, pdf_processor *proc, pdf_obj *res); + +pdf_obj *pdf_processor_pop_resources(fz_context *ctx, pdf_processor *proc); + +/* + opaque: Opaque value that is passed to all the filter functions. + + color_rewrite: function pointer called to rewrite a color + On entry: + *cs = reference to a pdf object representing the colorspace. + + *n = number of color components + + color = *n color values. - old_res: The incoming resource dictionary. + On exit: + *cs either the same (for no change in colorspace) or + updated to be a new one. Reference must be dropped, and + a new kept reference returned! - new_res: An (initially empty) resource dictionary that will - be populated by copying entries from the old dictionary to - the new one as they are used. At the end therefore, this - contains exactly those resource objects actually required. + *n = number of color components (maybe updated) - The filter options struct allows you to filter objects using callbacks. + color = *n color values (maybe updated) + + image_rewrite: function pointer called to rewrite an image + On entry: + *image = reference to an fz_image. + + On exit: + *image either the same (for no change) or updated + to be a new one. Reference must be dropped, and a + new kept reference returned. + + share_rewrite: function pointer called to rewrite a shade + + repeated_image_rewrite: If 0, then each image is rewritten only once. + Otherwise, it is called for every instance (useful if gathering + information about the ctm). */ -pdf_processor *pdf_new_filter_processor(fz_context *ctx, pdf_document *doc, pdf_processor *chain, pdf_obj *old_res, pdf_obj *new_res, int struct_parents, fz_matrix transform, pdf_filter_options *filter); -pdf_obj *pdf_filter_xobject_instance(fz_context *ctx, pdf_obj *old_xobj, pdf_obj *page_res, fz_matrix ctm, pdf_filter_options *filter); +typedef struct +{ + void *opaque; + void (*color_rewrite)(fz_context *ctx, void *opaque, pdf_obj **cs, int *n, float color[FZ_MAX_COLORS]); + void (*image_rewrite)(fz_context *ctx, void *opaque, fz_image **image, fz_matrix ctm, pdf_obj *obj); + pdf_shade_recolorer *shade_rewrite; + int repeated_image_rewrite; +} pdf_color_filter_options; + +pdf_processor * +pdf_new_color_filter(fz_context *ctx, pdf_document *doc, pdf_processor *chain, int struct_parents, fz_matrix transform, pdf_filter_options *options, void *copts); /* Functions to actually process annotations, glyphs and general stream objects. */ -void pdf_process_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *obj, pdf_obj *res, fz_cookie *cookie); +void pdf_process_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *res, pdf_obj *stm, fz_cookie *cookie, pdf_obj **out_res); void pdf_process_annot(fz_context *ctx, pdf_processor *proc, pdf_annot *annot, fz_cookie *cookie); void pdf_process_glyph(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *resources, fz_buffer *contents); +/* + Function to process a contents stream without handling the resources. + The caller is responsible for pushing/popping the resources. +*/ +void pdf_process_raw_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *rdb, pdf_obj *stmobj, fz_cookie *cookie); + /* Text handling helper functions */ typedef struct { @@ -307,6 +457,7 @@ typedef struct float scale; float leading; pdf_font_desc *font; + fz_string *fontname; float size; int render; float rise; diff --git a/misc/mupdf/include/mupdf/pdf/javascript.h b/misc/mupdf/include/mupdf/pdf/javascript.h index cc905a6..0de3a73 100644 --- a/misc/mupdf/include/mupdf/pdf/javascript.h +++ b/misc/mupdf/include/mupdf/pdf/javascript.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,12 +17,15 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_JAVASCRIPT_H #define MUPDF_PDF_JAVASCRIPT_H +#include "mupdf/pdf/document.h" +#include "mupdf/pdf/form.h" + void pdf_enable_js(fz_context *ctx, pdf_document *doc); void pdf_disable_js(fz_context *ctx, pdf_document *doc); int pdf_js_supported(fz_context *ctx, pdf_document *doc); @@ -35,6 +38,6 @@ char *pdf_js_event_value(pdf_js *js); void pdf_js_event_init_keystroke(pdf_js *js, pdf_obj *target, pdf_keystroke_event *evt); int pdf_js_event_result_keystroke(pdf_js *js, pdf_keystroke_event *evt); -void pdf_js_execute(pdf_js *js, const char *name, const char *code); +void pdf_js_execute(pdf_js *js, const char *name, const char *code, char **result); #endif diff --git a/misc/mupdf/include/mupdf/pdf/name-table.h b/misc/mupdf/include/mupdf/pdf/name-table.h index dc2d23f..c1f24a6 100644 --- a/misc/mupdf/include/mupdf/pdf/name-table.h +++ b/misc/mupdf/include/mupdf/pdf/name-table.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2023 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,11 +17,12 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. /* Alphabetically sorted list of all PDF names to be available as constants */ PDF_MAKE_NAME("1.2", 1_2) +PDF_MAKE_NAME("1.5", 1_5) PDF_MAKE_NAME("3D", 3D) PDF_MAKE_NAME("A", A) PDF_MAKE_NAME("A85", A85) @@ -49,10 +50,14 @@ PDF_MAKE_NAME("Annots", Annots) PDF_MAKE_NAME("AnyOff", AnyOff) PDF_MAKE_NAME("App", App) PDF_MAKE_NAME("Approved", Approved) +PDF_MAKE_NAME("Art", Art) PDF_MAKE_NAME("ArtBox", ArtBox) +PDF_MAKE_NAME("Artifact", Artifact) PDF_MAKE_NAME("AsIs", AsIs) PDF_MAKE_NAME("Ascent", Ascent) +PDF_MAKE_NAME("Aside", Aside) PDF_MAKE_NAME("AuthEvent", AuthEvent) +PDF_MAKE_NAME("Author", Author) PDF_MAKE_NAME("B", B) PDF_MAKE_NAME("BBox", BBox) PDF_MAKE_NAME("BC", BC) @@ -65,6 +70,7 @@ PDF_MAKE_NAME("Background", Background) PDF_MAKE_NAME("BaseEncoding", BaseEncoding) PDF_MAKE_NAME("BaseFont", BaseFont) PDF_MAKE_NAME("BaseState", BaseState) +PDF_MAKE_NAME("BibEntry", BibEntry) PDF_MAKE_NAME("BitsPerComponent", BitsPerComponent) PDF_MAKE_NAME("BitsPerCoordinate", BitsPerCoordinate) PDF_MAKE_NAME("BitsPerFlag", BitsPerFlag) @@ -73,6 +79,7 @@ PDF_MAKE_NAME("BlackIs1", BlackIs1) PDF_MAKE_NAME("BlackPoint", BlackPoint) PDF_MAKE_NAME("BleedBox", BleedBox) PDF_MAKE_NAME("Blinds", Blinds) +PDF_MAKE_NAME("BlockQuote", BlockQuote) PDF_MAKE_NAME("Border", Border) PDF_MAKE_NAME("Bounds", Bounds) PDF_MAKE_NAME("Box", Box) @@ -100,14 +107,17 @@ PDF_MAKE_NAME("CalCMYK", CalCMYK) PDF_MAKE_NAME("CalGray", CalGray) PDF_MAKE_NAME("CalRGB", CalRGB) PDF_MAKE_NAME("CapHeight", CapHeight) +PDF_MAKE_NAME("Caption", Caption) PDF_MAKE_NAME("Caret", Caret) PDF_MAKE_NAME("Catalog", Catalog) PDF_MAKE_NAME("Cert", Cert) PDF_MAKE_NAME("Ch", Ch) PDF_MAKE_NAME("Changes", Changes) PDF_MAKE_NAME("CharProcs", CharProcs) +PDF_MAKE_NAME("CheckSum", CheckSum) PDF_MAKE_NAME("Circle", Circle) PDF_MAKE_NAME("ClosedArrow", ClosedArrow) +PDF_MAKE_NAME("Code", Code) PDF_MAKE_NAME("Collection", Collection) PDF_MAKE_NAME("ColorSpace", ColorSpace) PDF_MAKE_NAME("ColorTransform", ColorTransform) @@ -166,15 +176,18 @@ PDF_MAKE_NAME("DigestLocation", DigestLocation) PDF_MAKE_NAME("DigestMethod", DigestMethod) PDF_MAKE_NAME("DigestValue", DigestValue) PDF_MAKE_NAME("Dissolve", Dissolve) +PDF_MAKE_NAME("Div", Div) PDF_MAKE_NAME("Dm", Dm) PDF_MAKE_NAME("DocMDP", DocMDP) PDF_MAKE_NAME("Document", Document) +PDF_MAKE_NAME("DocumentFragment", DocumentFragment) PDF_MAKE_NAME("Domain", Domain) PDF_MAKE_NAME("Draft", Draft) PDF_MAKE_NAME("Dur", Dur) PDF_MAKE_NAME("E", E) PDF_MAKE_NAME("EF", EF) PDF_MAKE_NAME("EarlyChange", EarlyChange) +PDF_MAKE_NAME("Em", Em) PDF_MAKE_NAME("EmbeddedFile", EmbeddedFile) PDF_MAKE_NAME("EmbeddedFiles", EmbeddedFiles) PDF_MAKE_NAME("Encode", Encode) @@ -190,6 +203,7 @@ PDF_MAKE_NAME("Expired", Expired) PDF_MAKE_NAME("ExtGState", ExtGState) PDF_MAKE_NAME("Extend", Extend) PDF_MAKE_NAME("F", F) +PDF_MAKE_NAME("FENote", FENote) PDF_MAKE_NAME("FL", FL) PDF_MAKE_NAME("FRM", FRM) PDF_MAKE_NAME("FS", FS) @@ -198,6 +212,7 @@ PDF_MAKE_NAME("Fade", Fade) PDF_MAKE_NAME("Ff", Ff) PDF_MAKE_NAME("FieldMDP", FieldMDP) PDF_MAKE_NAME("Fields", Fields) +PDF_MAKE_NAME("Figure", Figure) PDF_MAKE_NAME("FileAttachment", FileAttachment) PDF_MAKE_NAME("FileSize", FileSize) PDF_MAKE_NAME("Filespec", Filespec) @@ -231,17 +246,25 @@ PDF_MAKE_NAME("ForPublicRelease", ForPublicRelease) PDF_MAKE_NAME("Form", Form) PDF_MAKE_NAME("FormEx", FormEx) PDF_MAKE_NAME("FormType", FormType) +PDF_MAKE_NAME("Formula", Formula) PDF_MAKE_NAME("FreeText", FreeText) PDF_MAKE_NAME("Function", Function) PDF_MAKE_NAME("FunctionType", FunctionType) PDF_MAKE_NAME("Functions", Functions) PDF_MAKE_NAME("G", G) +PDF_MAKE_NAME("GTS_PDFX", GTS_PDFX) PDF_MAKE_NAME("Gamma", Gamma) PDF_MAKE_NAME("Glitter", Glitter) PDF_MAKE_NAME("GoTo", GoTo) PDF_MAKE_NAME("GoToR", GoToR) PDF_MAKE_NAME("Group", Group) PDF_MAKE_NAME("H", H) +PDF_MAKE_NAME("H1", H1) +PDF_MAKE_NAME("H2", H2) +PDF_MAKE_NAME("H3", H3) +PDF_MAKE_NAME("H4", H4) +PDF_MAKE_NAME("H5", H5) +PDF_MAKE_NAME("H6", H6) PDF_MAKE_NAME("Height", Height) PDF_MAKE_NAME("Helv", Helv) PDF_MAKE_NAME("Highlight", Highlight) @@ -256,6 +279,9 @@ PDF_MAKE_NAME("Identity", Identity) PDF_MAKE_NAME("Identity-H", Identity_H) PDF_MAKE_NAME("Identity-V", Identity_V) PDF_MAKE_NAME("Image", Image) +PDF_MAKE_NAME("ImageB", ImageB) +PDF_MAKE_NAME("ImageC", ImageC) +PDF_MAKE_NAME("ImageI", ImageI) PDF_MAKE_NAME("ImageMask", ImageMask) PDF_MAKE_NAME("Include", Include) PDF_MAKE_NAME("Index", Index) @@ -273,21 +299,27 @@ PDF_MAKE_NAME("JPXDecode", JPXDecode) PDF_MAKE_NAME("JS", JS) PDF_MAKE_NAME("JavaScript", JavaScript) PDF_MAKE_NAME("K", K) +PDF_MAKE_NAME("Keywords", Keywords) PDF_MAKE_NAME("Kids", Kids) PDF_MAKE_NAME("L", L) +PDF_MAKE_NAME("LBody", LBody) PDF_MAKE_NAME("LC", LC) PDF_MAKE_NAME("LE", LE) +PDF_MAKE_NAME("LI", LI) PDF_MAKE_NAME("LJ", LJ) PDF_MAKE_NAME("LW", LW) PDF_MAKE_NAME("LZ", LZ) PDF_MAKE_NAME("LZW", LZW) PDF_MAKE_NAME("LZWDecode", LZWDecode) PDF_MAKE_NAME("Lab", Lab) +PDF_MAKE_NAME("Label", Label) PDF_MAKE_NAME("Lang", Lang) PDF_MAKE_NAME("Last", Last) PDF_MAKE_NAME("LastChar", LastChar) PDF_MAKE_NAME("LastPage", LastPage) PDF_MAKE_NAME("Launch", Launch) +PDF_MAKE_NAME("Layer", Layer) +PDF_MAKE_NAME("Lbl", Lbl) PDF_MAKE_NAME("Length", Length) PDF_MAKE_NAME("Length1", Length1) PDF_MAKE_NAME("Length2", Length2) @@ -296,6 +328,7 @@ PDF_MAKE_NAME("Limits", Limits) PDF_MAKE_NAME("Line", Line) PDF_MAKE_NAME("Linearized", Linearized) PDF_MAKE_NAME("Link", Link) +PDF_MAKE_NAME("List", List) PDF_MAKE_NAME("Location", Location) PDF_MAKE_NAME("Lock", Lock) PDF_MAKE_NAME("Locked", Locked) @@ -325,10 +358,12 @@ PDF_MAKE_NAME("NewWindow", NewWindow) PDF_MAKE_NAME("Next", Next) PDF_MAKE_NAME("NextPage", NextPage) PDF_MAKE_NAME("NonEFontNoWarn", NonEFontNoWarn) +PDF_MAKE_NAME("NonStruct", NonStruct) PDF_MAKE_NAME("None", None) PDF_MAKE_NAME("Normal", Normal) PDF_MAKE_NAME("NotApproved", NotApproved) PDF_MAKE_NAME("NotForPublicRelease", NotForPublicRelease) +PDF_MAKE_NAME("Note", Note) PDF_MAKE_NAME("NumSections", NumSections) PDF_MAKE_NAME("Nums", Nums) PDF_MAKE_NAME("O", O) @@ -353,16 +388,22 @@ PDF_MAKE_NAME("Opt", Opt) PDF_MAKE_NAME("Order", Order) PDF_MAKE_NAME("Ordering", Ordering) PDF_MAKE_NAME("Outlines", Outlines) +PDF_MAKE_NAME("OutputCondition", OutputCondition) +PDF_MAKE_NAME("OutputConditionIdentifier", OutputConditionIdentifier) +PDF_MAKE_NAME("OutputIntent", OutputIntent) PDF_MAKE_NAME("OutputIntents", OutputIntents) PDF_MAKE_NAME("P", P) PDF_MAKE_NAME("PDF", PDF) PDF_MAKE_NAME("PS", PS) PDF_MAKE_NAME("Page", Page) +PDF_MAKE_NAME("PageLabels", PageLabels) PDF_MAKE_NAME("PageMode", PageMode) PDF_MAKE_NAME("Pages", Pages) PDF_MAKE_NAME("PaintType", PaintType) PDF_MAKE_NAME("Params", Params) PDF_MAKE_NAME("Parent", Parent) +PDF_MAKE_NAME("ParentTree", ParentTree) +PDF_MAKE_NAME("Part", Part) PDF_MAKE_NAME("Pattern", Pattern) PDF_MAKE_NAME("PatternType", PatternType) PDF_MAKE_NAME("Perms", Perms) @@ -376,25 +417,31 @@ PDF_MAKE_NAME("PrevPage", PrevPage) PDF_MAKE_NAME("Preview", Preview) PDF_MAKE_NAME("Print", Print) PDF_MAKE_NAME("PrinterMark", PrinterMark) +PDF_MAKE_NAME("Private", Private) PDF_MAKE_NAME("ProcSet", ProcSet) PDF_MAKE_NAME("Producer", Producer) -PDF_MAKE_NAME("Properties", Properties) PDF_MAKE_NAME("Prop_AuthTime", Prop_AuthTime) PDF_MAKE_NAME("Prop_AuthType", Prop_AuthType) PDF_MAKE_NAME("Prop_Build", Prop_Build) +PDF_MAKE_NAME("Properties", Properties) PDF_MAKE_NAME("PubSec", PubSec) PDF_MAKE_NAME("Push", Push) PDF_MAKE_NAME("Q", Q) PDF_MAKE_NAME("QuadPoints", QuadPoints) +PDF_MAKE_NAME("Quote", Quote) PDF_MAKE_NAME("R", R) +PDF_MAKE_NAME("RB", RB) PDF_MAKE_NAME("RBGroups", RBGroups) PDF_MAKE_NAME("RC", RC) PDF_MAKE_NAME("RClosedArrow", RClosedArrow) +PDF_MAKE_NAME("RD", RD) PDF_MAKE_NAME("REx", REx) PDF_MAKE_NAME("RGB", RGB) PDF_MAKE_NAME("RI", RI) PDF_MAKE_NAME("RL", RL) PDF_MAKE_NAME("ROpenArrow", ROpenArrow) +PDF_MAKE_NAME("RP", RP) +PDF_MAKE_NAME("RT", RT) PDF_MAKE_NAME("Range", Range) PDF_MAKE_NAME("Reason", Reason) PDF_MAKE_NAME("Rect", Rect) @@ -404,15 +451,18 @@ PDF_MAKE_NAME("Reference", Reference) PDF_MAKE_NAME("Registry", Registry) PDF_MAKE_NAME("ResetForm", ResetForm) PDF_MAKE_NAME("Resources", Resources) +PDF_MAKE_NAME("RoleMap", RoleMap) PDF_MAKE_NAME("Root", Root) PDF_MAKE_NAME("Rotate", Rotate) PDF_MAKE_NAME("Rows", Rows) +PDF_MAKE_NAME("Ruby", Ruby) PDF_MAKE_NAME("RunLengthDecode", RunLengthDecode) PDF_MAKE_NAME("S", S) PDF_MAKE_NAME("SMask", SMask) PDF_MAKE_NAME("SMaskInData", SMaskInData) PDF_MAKE_NAME("Schema", Schema) PDF_MAKE_NAME("Screen", Screen) +PDF_MAKE_NAME("Sect", Sect) PDF_MAKE_NAME("Separation", Separation) PDF_MAKE_NAME("Shading", Shading) PDF_MAKE_NAME("ShadingType", ShadingType) @@ -425,9 +475,11 @@ PDF_MAKE_NAME("Size", Size) PDF_MAKE_NAME("Slash", Slash) PDF_MAKE_NAME("Sold", Sold) PDF_MAKE_NAME("Sound", Sound) +PDF_MAKE_NAME("Span", Span) PDF_MAKE_NAME("Split", Split) PDF_MAKE_NAME("Square", Square) PDF_MAKE_NAME("Squiggly", Squiggly) +PDF_MAKE_NAME("St", St) PDF_MAKE_NAME("Stamp", Stamp) PDF_MAKE_NAME("Standard", Standard) PDF_MAKE_NAME("StdCF", StdCF) @@ -435,18 +487,30 @@ PDF_MAKE_NAME("StemV", StemV) PDF_MAKE_NAME("StmF", StmF) PDF_MAKE_NAME("StrF", StrF) PDF_MAKE_NAME("StrikeOut", StrikeOut) +PDF_MAKE_NAME("Strong", Strong) PDF_MAKE_NAME("StructParent", StructParent) PDF_MAKE_NAME("StructParents", StructParents) +PDF_MAKE_NAME("StructTreeRoot", StructTreeRoot) +PDF_MAKE_NAME("Sub", Sub) PDF_MAKE_NAME("SubFilter", SubFilter) +PDF_MAKE_NAME("Subject", Subject) PDF_MAKE_NAME("Subtype", Subtype) PDF_MAKE_NAME("Subtype2", Subtype2) PDF_MAKE_NAME("Supplement", Supplement) PDF_MAKE_NAME("Symb", Symb) PDF_MAKE_NAME("T", T) +PDF_MAKE_NAME("TBody", TBody) +PDF_MAKE_NAME("TD", TD) +PDF_MAKE_NAME("TFoot", TFoot) +PDF_MAKE_NAME("TH", TH) +PDF_MAKE_NAME("THead", THead) PDF_MAKE_NAME("TI", TI) +PDF_MAKE_NAME("TOC", TOC) +PDF_MAKE_NAME("TOCI", TOCI) PDF_MAKE_NAME("TR", TR) PDF_MAKE_NAME("TR2", TR2) PDF_MAKE_NAME("TU", TU) +PDF_MAKE_NAME("Table", Table) PDF_MAKE_NAME("Text", Text) PDF_MAKE_NAME("TilingType", TilingType) PDF_MAKE_NAME("Times", Times) @@ -491,6 +555,9 @@ PDF_MAKE_NAME("View", View) PDF_MAKE_NAME("W", W) PDF_MAKE_NAME("W2", W2) PDF_MAKE_NAME("WMode", WMode) +PDF_MAKE_NAME("WP", WP) +PDF_MAKE_NAME("WT", WT) +PDF_MAKE_NAME("Warichu", Warichu) PDF_MAKE_NAME("Watermark", Watermark) PDF_MAKE_NAME("WhitePoint", WhitePoint) PDF_MAKE_NAME("Widget", Widget) @@ -509,9 +576,11 @@ PDF_MAKE_NAME("XYZ", XYZ) PDF_MAKE_NAME("YStep", YStep) PDF_MAKE_NAME("Yes", Yes) PDF_MAKE_NAME("ZaDb", ZaDb) +PDF_MAKE_NAME("a", a) PDF_MAKE_NAME("adbe.pkcs7.detached", adbe_pkcs7_detached) PDF_MAKE_NAME("ca", ca) PDF_MAKE_NAME("n0", n0) PDF_MAKE_NAME("n1", n1) PDF_MAKE_NAME("n2", n2) PDF_MAKE_NAME("op", op) +PDF_MAKE_NAME("r", r) diff --git a/misc/mupdf/include/mupdf/pdf/object.h b/misc/mupdf/include/mupdf/pdf/object.h index 25c893b..18df8c7 100644 --- a/misc/mupdf/include/mupdf/pdf/object.h +++ b/misc/mupdf/include/mupdf/pdf/object.h @@ -17,12 +17,14 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_OBJECT_H #define MUPDF_PDF_OBJECT_H +#include "mupdf/fitz/stream.h" + typedef struct pdf_document pdf_document; typedef struct pdf_crypt pdf_crypt; typedef struct pdf_journal pdf_journal; @@ -61,6 +63,7 @@ pdf_obj *pdf_deep_copy_obj(fz_context *ctx, pdf_obj *obj); pdf_obj *pdf_keep_obj(fz_context *ctx, pdf_obj *obj); void pdf_drop_obj(fz_context *ctx, pdf_obj *obj); +pdf_obj *pdf_drop_singleton_obj(fz_context *ctx, pdf_obj *obj); int pdf_is_null(fz_context *ctx, pdf_obj *obj); int pdf_is_bool(fz_context *ctx, pdf_obj *obj); @@ -78,14 +81,56 @@ int pdf_is_indirect(fz_context *ctx, pdf_obj *obj); */ int pdf_obj_num_is_stream(fz_context *ctx, pdf_document *doc, int num); int pdf_is_stream(fz_context *ctx, pdf_obj *obj); + +/* Compare 2 objects. Returns 0 on match, non-zero on mismatch. + * Streams always mismatch. + */ int pdf_objcmp(fz_context *ctx, pdf_obj *a, pdf_obj *b); int pdf_objcmp_resolve(fz_context *ctx, pdf_obj *a, pdf_obj *b); + +/* Compare 2 objects. Returns 0 on match, non-zero on mismatch. + * Stream contents are explicitly checked. + */ +int pdf_objcmp_deep(fz_context *ctx, pdf_obj *a, pdf_obj *b); + int pdf_name_eq(fz_context *ctx, pdf_obj *a, pdf_obj *b); int pdf_obj_marked(fz_context *ctx, pdf_obj *obj); int pdf_mark_obj(fz_context *ctx, pdf_obj *obj); void pdf_unmark_obj(fz_context *ctx, pdf_obj *obj); +typedef struct pdf_cycle_list pdf_cycle_list; +struct pdf_cycle_list { + pdf_cycle_list *up; + int num; +}; +int pdf_cycle(fz_context *ctx, pdf_cycle_list *here, pdf_cycle_list *prev, pdf_obj *obj); + +typedef struct +{ + int len; + unsigned char bits[1]; +} pdf_mark_bits; + +pdf_mark_bits *pdf_new_mark_bits(fz_context *ctx, pdf_document *doc); +void pdf_drop_mark_bits(fz_context *ctx, pdf_mark_bits *marks); +void pdf_mark_bits_reset(fz_context *ctx, pdf_mark_bits *marks); +int pdf_mark_bits_set(fz_context *ctx, pdf_mark_bits *marks, pdf_obj *obj); + +typedef struct +{ + int len; + int max; + int *list; + int local_list[8]; +} pdf_mark_list; + +int pdf_mark_list_push(fz_context *ctx, pdf_mark_list *list, pdf_obj *obj); +void pdf_mark_list_pop(fz_context *ctx, pdf_mark_list *list); +int pdf_mark_list_check(fz_context *ctx, pdf_mark_list *list, pdf_obj *obj); +void pdf_mark_list_init(fz_context *ctx, pdf_mark_list *list); +void pdf_mark_list_free(fz_context *ctx, pdf_mark_list *list); + void pdf_set_obj_memo(fz_context *ctx, pdf_obj *obj, int bit, int memo); int pdf_obj_memo(fz_context *ctx, pdf_obj *obj, int bit, int *memo); @@ -105,6 +150,10 @@ size_t pdf_to_str_len(fz_context *ctx, pdf_obj *obj); int pdf_to_num(fz_context *ctx, pdf_obj *obj); int pdf_to_gen(fz_context *ctx, pdf_obj *obj); +int pdf_to_bool_default(fz_context *ctx, pdf_obj *obj, int def); +int pdf_to_int_default(fz_context *ctx, pdf_obj *obj, int def); +float pdf_to_real_default(fz_context *ctx, pdf_obj *obj, float def); + int pdf_array_len(fz_context *ctx, pdf_obj *array); pdf_obj *pdf_array_get(fz_context *ctx, pdf_obj *array, int i); void pdf_array_put(fz_context *ctx, pdf_obj *array, int i, pdf_obj *obj); @@ -129,6 +178,7 @@ pdf_obj *pdf_dict_gets(fz_context *ctx, pdf_obj *dict, const char *key); pdf_obj *pdf_dict_getsa(fz_context *ctx, pdf_obj *dict, const char *key, const char *abbrev); pdf_obj *pdf_dict_get_inheritable(fz_context *ctx, pdf_obj *dict, pdf_obj *key); pdf_obj *pdf_dict_getp_inheritable(fz_context *ctx, pdf_obj *dict, const char *path); +pdf_obj *pdf_dict_gets_inheritable(fz_context *ctx, pdf_obj *dict, const char *key); void pdf_dict_put(fz_context *ctx, pdf_obj *dict, pdf_obj *key, pdf_obj *val); void pdf_dict_put_drop(fz_context *ctx, pdf_obj *dict, pdf_obj *key, pdf_obj *val); void pdf_dict_get_put_drop(fz_context *ctx, pdf_obj *dict, pdf_obj *key, pdf_obj *val, pdf_obj **old_val); @@ -157,6 +207,7 @@ pdf_obj *pdf_dict_puts_dict(fz_context *ctx, pdf_obj *dict, const char *key, int int pdf_dict_get_bool(fz_context *ctx, pdf_obj *dict, pdf_obj *key); int pdf_dict_get_int(fz_context *ctx, pdf_obj *dict, pdf_obj *key); +int64_t pdf_dict_get_int64(fz_context *ctx, pdf_obj *dict, pdf_obj *key); float pdf_dict_get_real(fz_context *ctx, pdf_obj *dict, pdf_obj *key); const char *pdf_dict_get_name(fz_context *ctx, pdf_obj *dict, pdf_obj *key); const char *pdf_dict_get_string(fz_context *ctx, pdf_obj *dict, pdf_obj *key, size_t *sizep); @@ -165,6 +216,21 @@ fz_rect pdf_dict_get_rect(fz_context *ctx, pdf_obj *dict, pdf_obj *key); fz_matrix pdf_dict_get_matrix(fz_context *ctx, pdf_obj *dict, pdf_obj *key); int64_t pdf_dict_get_date(fz_context *ctx, pdf_obj *dict, pdf_obj *key); +int pdf_dict_get_bool_default(fz_context *ctx, pdf_obj *dict, pdf_obj *key, int def); +int pdf_dict_get_int_default(fz_context *ctx, pdf_obj *dict, pdf_obj *key, int def); +float pdf_dict_get_real_default(fz_context *ctx, pdf_obj *dict, pdf_obj *key, float def); + +int pdf_dict_get_inheritable_bool(fz_context *ctx, pdf_obj *dict, pdf_obj *key); +int pdf_dict_get_inheritable_int(fz_context *ctx, pdf_obj *dict, pdf_obj *key); +int64_t pdf_dict_get_inheritable_int64(fz_context *ctx, pdf_obj *dict, pdf_obj *key); +float pdf_dict_get_inheritable_real(fz_context *ctx, pdf_obj *dict, pdf_obj *key); +const char *pdf_dict_get_inheritable_name(fz_context *ctx, pdf_obj *dict, pdf_obj *key); +const char *pdf_dict_get_inheritable_string(fz_context *ctx, pdf_obj *dict, pdf_obj *key, size_t *sizep); +const char *pdf_dict_get_inheritable_text_string(fz_context *ctx, pdf_obj *dict, pdf_obj *key); +fz_rect pdf_dict_get_inheritable_rect(fz_context *ctx, pdf_obj *dict, pdf_obj *key); +fz_matrix pdf_dict_get_inheritable_matrix(fz_context *ctx, pdf_obj *dict, pdf_obj *key); +int64_t pdf_dict_get_inheritable_date(fz_context *ctx, pdf_obj *dict, pdf_obj *key); + void pdf_array_push_bool(fz_context *ctx, pdf_obj *array, int x); void pdf_array_push_int(fz_context *ctx, pdf_obj *array, int64_t x); void pdf_array_push_real(fz_context *ctx, pdf_obj *array, double x); @@ -174,6 +240,15 @@ void pdf_array_push_text_string(fz_context *ctx, pdf_obj *array, const char *x); pdf_obj *pdf_array_push_array(fz_context *ctx, pdf_obj *array, int initial); pdf_obj *pdf_array_push_dict(fz_context *ctx, pdf_obj *array, int initial); +void pdf_array_put_bool(fz_context *ctx, pdf_obj *array, int i, int x); +void pdf_array_put_int(fz_context *ctx, pdf_obj *array, int i, int64_t x); +void pdf_array_put_real(fz_context *ctx, pdf_obj *array, int i, double x); +void pdf_array_put_name(fz_context *ctx, pdf_obj *array, int i, const char *x); +void pdf_array_put_string(fz_context *ctx, pdf_obj *array, int i, const char *x, size_t n); +void pdf_array_put_text_string(fz_context *ctx, pdf_obj *array, int i, const char *x); +pdf_obj *pdf_array_put_array(fz_context *ctx, pdf_obj *array, int i, int initial); +pdf_obj *pdf_array_put_dict(fz_context *ctx, pdf_obj *array, int i, int initial); + int pdf_array_get_bool(fz_context *ctx, pdf_obj *array, int index); int pdf_array_get_int(fz_context *ctx, pdf_obj *array, int index); float pdf_array_get_real(fz_context *ctx, pdf_obj *array, int index); @@ -191,7 +266,7 @@ int pdf_obj_parent_num(fz_context *ctx, pdf_obj *obj); char *pdf_sprint_obj(fz_context *ctx, char *buf, size_t cap, size_t *len, pdf_obj *obj, int tight, int ascii); void pdf_print_obj(fz_context *ctx, fz_output *out, pdf_obj *obj, int tight, int ascii); -void pdf_print_encrypted_obj(fz_context *ctx, fz_output *out, pdf_obj *obj, int tight, int ascii, pdf_crypt *crypt, int num, int gen); +void pdf_print_encrypted_obj(fz_context *ctx, fz_output *out, pdf_obj *obj, int tight, int ascii, pdf_crypt *crypt, int num, int gen, int *sep); void pdf_debug_obj(fz_context *ctx, pdf_obj *obj); void pdf_debug_ref(fz_context *ctx, pdf_obj *obj); @@ -229,8 +304,39 @@ fz_rect pdf_to_rect(fz_context *ctx, pdf_obj *array); fz_matrix pdf_to_matrix(fz_context *ctx, pdf_obj *array); int64_t pdf_to_date(fz_context *ctx, pdf_obj *time); +/* + pdf_get_indirect_document and pdf_get_bound_document are + now deprecated. Please do not use them in future. They will + be removed. + + Please use pdf_pin_document instead. +*/ pdf_document *pdf_get_indirect_document(fz_context *ctx, pdf_obj *obj); pdf_document *pdf_get_bound_document(fz_context *ctx, pdf_obj *obj); + +/* + pdf_pin_document returns a new reference to the document + to which obj is bound. The caller is responsible for + dropping this reference once they have finished with it. + + This is a replacement for pdf_get_indirect_document + and pdf_get_bound_document that are now deprecated. Those + returned a borrowed reference that did not need to be + dropped. + + Note that this can validly return NULL in various cases: + 1) When the object is of a simple type (such as a number + or a string), it contains no reference to the enclosing + document. 2) When the object has yet to be inserted into + a PDF document (such as during parsing). 3) And (in + future versions) when the document has been destroyed + but the object reference remains. + + It is the caller's responsibility to deal with a NULL + return here. +*/ +pdf_document *pdf_pin_document(fz_context *ctx, pdf_obj *obj); + void pdf_set_int(fz_context *ctx, pdf_obj *obj, int64_t i); /* Voodoo to create PDF_NAME(Foo) macros from name-table.h */ @@ -281,6 +387,10 @@ void pdf_begin_implicit_operation(fz_context *ctx, pdf_document *doc); /* Call this to end an operation. */ void pdf_end_operation(fz_context *ctx, pdf_document *doc); +/* Call this to abandon an operation. Revert to the state + * when you began. */ +void pdf_abandon_operation(fz_context *ctx, pdf_document *doc); + /* Call this to find out how many undo/redo steps there are, and the * current position we are within those. 0 = original document, * *steps = final edited version. */ @@ -319,4 +429,7 @@ void pdf_deserialise_journal(fz_context *ctx, pdf_document *doc, fz_stream *stm) /* Internal call as part of creating objects. */ void pdf_add_journal_fragment(fz_context *ctx, pdf_document *doc, int parent, pdf_obj *copy, fz_buffer *copy_stream, int newobj); +char *pdf_format_date(fz_context *ctx, int64_t time, char *s, size_t n); +int64_t pdf_parse_date(fz_context *ctx, const char *s); + #endif diff --git a/misc/mupdf/include/mupdf/pdf/page.h b/misc/mupdf/include/mupdf/pdf/page.h index 7af568e..59afa80 100644 --- a/misc/mupdf/include/mupdf/pdf/page.h +++ b/misc/mupdf/include/mupdf/pdf/page.h @@ -17,28 +17,45 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_PAGE_H #define MUPDF_PDF_PAGE_H #include "mupdf/pdf/interpret.h" +pdf_page *pdf_keep_page(fz_context *ctx, pdf_page *page); +void pdf_drop_page(fz_context *ctx, pdf_page *page); + int pdf_lookup_page_number(fz_context *ctx, pdf_document *doc, pdf_obj *pageobj); int pdf_count_pages(fz_context *ctx, pdf_document *doc); int pdf_count_pages_imp(fz_context *ctx, fz_document *doc, int chapter); pdf_obj *pdf_lookup_page_obj(fz_context *ctx, pdf_document *doc, int needle); +pdf_obj *pdf_lookup_page_loc(fz_context *ctx, pdf_document *doc, int needle, pdf_obj **parentp, int *indexp); + +/* + Cache the page tree for fast forward/reverse page lookups. + + No longer required. This is a No Op, now as page tree + maps are loaded automatically 'just in time'. +*/ void pdf_load_page_tree(fz_context *ctx, pdf_document *doc); + +/* + Discard the page tree maps. + + No longer required. This is a No Op, now as page tree + maps are discarded automatically 'just in time'. +*/ void pdf_drop_page_tree(fz_context *ctx, pdf_document *doc); /* - Find the page number of a named destination. + Internal function used to drop the page tree. - For use with looking up the destination page of a fragment - identifier in hyperlinks: foo.pdf#bar or foo.pdf#page=5. + Library users should not call this directly. */ -int pdf_lookup_anchor(fz_context *ctx, pdf_document *doc, const char *name, float *xp, float *yp); +void pdf_drop_page_tree_internal(fz_context *ctx, pdf_document *doc); /* Make page self sufficient. @@ -59,14 +76,87 @@ void pdf_flatten_inheritable_page_items(fz_context *ctx, pdf_obj *page); number: page number, where 0 is the first page of the document. */ pdf_page *pdf_load_page(fz_context *ctx, pdf_document *doc, int number); + +/* + Internal function to perform pdf_load_page. + + Do not call this directly. +*/ fz_page *pdf_load_page_imp(fz_context *ctx, fz_document *doc, int chapter, int number); -void pdf_page_obj_transform(fz_context *ctx, pdf_obj *pageobj, fz_rect *page_mediabox, fz_matrix *page_ctm); +/* + Enquire as to whether a given page uses transparency or not. +*/ +int pdf_page_has_transparency(fz_context *ctx, pdf_page *page); + +/* + Fetch the given box for a page, together with a transform that converts + from fitz coords to PDF coords. + + pageobj: The object that represents the page. + + outbox: If non-NULL, this will be filled in with the requested box + in fitz coordinates. + + outctm: A transform to map from fitz page space to PDF page space. + + box: Which box to return. +*/ +void pdf_page_obj_transform_box(fz_context *ctx, pdf_obj *pageobj, fz_rect *outbox, fz_matrix *out, fz_box_type box); + +/* + As for pdf_page_obj_transform_box, always requesting the + cropbox. +*/ +void pdf_page_obj_transform(fz_context *ctx, pdf_obj *pageobj, fz_rect *outbox, fz_matrix *outctm); + +/* + As for pdf_page_obj_transform_box, but working from a pdf_page + object rather than the pdf_obj representing the page. +*/ +void pdf_page_transform_box(fz_context *ctx, pdf_page *page, fz_rect *mediabox, fz_matrix *ctm, fz_box_type box); + +/* + As for pdf_page_transform_box, always requesting the + cropbox. +*/ void pdf_page_transform(fz_context *ctx, pdf_page *page, fz_rect *mediabox, fz_matrix *ctm); + +/* + Find the pdf object that represents the resources dictionary + for a page. + + This is a borrowed pointer that the caller should pdf_keep_obj + if. This may be NULL. +*/ pdf_obj *pdf_page_resources(fz_context *ctx, pdf_page *page); + +/* + Find the pdf object that represents the page contents + for a page. + + This is a borrowed pointer that the caller should pdf_keep_obj + if. This may be NULL. +*/ pdf_obj *pdf_page_contents(fz_context *ctx, pdf_page *page); + +/* + Find the pdf object that represents the transparency group + for a page. + + This is a borrowed pointer that the caller should pdf_keep_obj + if. This may be NULL. +*/ pdf_obj *pdf_page_group(fz_context *ctx, pdf_page *page); +/* + Modify the page boxes (using fitz space coordinates). + + Note that changing the CropBox will change the fitz coordinate space mapping, + invalidating all bounding boxes previously acquired. +*/ +void pdf_set_page_box(fz_context *ctx, pdf_page *page, fz_box_type box, fz_rect rect); + /* Get the separation details for a page. */ @@ -81,12 +171,12 @@ fz_link *pdf_load_links(fz_context *ctx, pdf_page *page); /* Determine the size of a page. - Determine the page size in user space units, taking page rotation + Determine the page size in points, taking page rotation into account. The page size is taken to be the crop box if it exists (visible area after cropping), otherwise the media box will be used (possibly including printing marks). */ -fz_rect pdf_bound_page(fz_context *ctx, pdf_page *page); +fz_rect pdf_bound_page(fz_context *ctx, pdf_page *page, fz_box_type box); /* Interpret a loaded page and render it on a device. @@ -136,24 +226,62 @@ void pdf_run_page_contents_with_usage(fz_context *ctx, pdf_page *page, fz_device void pdf_run_page_annots_with_usage(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie); void pdf_run_page_widgets_with_usage(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie); -void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_filter_options *filter); -void pdf_filter_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, pdf_filter_options *filter); +void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_filter_options *options); +void pdf_filter_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, pdf_filter_options *options); -fz_pixmap *pdf_new_pixmap_from_page_contents_with_usage(fz_context *ctx, pdf_page *page, fz_matrix ctm, fz_colorspace *cs, int alpha, const char *usage); -fz_pixmap *pdf_new_pixmap_from_page_with_usage(fz_context *ctx, pdf_page *page, fz_matrix ctm, fz_colorspace *cs, int alpha, const char *usage); -fz_pixmap *pdf_new_pixmap_from_page_contents_with_separations_and_usage(fz_context *ctx, pdf_page *page, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha, const char *usage); -fz_pixmap *pdf_new_pixmap_from_page_with_separations_and_usage(fz_context *ctx, pdf_page *page, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha, const char *usage); +fz_pixmap *pdf_new_pixmap_from_page_contents_with_usage(fz_context *ctx, pdf_page *page, fz_matrix ctm, fz_colorspace *cs, int alpha, const char *usage, fz_box_type box); +fz_pixmap *pdf_new_pixmap_from_page_with_usage(fz_context *ctx, pdf_page *page, fz_matrix ctm, fz_colorspace *cs, int alpha, const char *usage, fz_box_type box); +fz_pixmap *pdf_new_pixmap_from_page_contents_with_separations_and_usage(fz_context *ctx, pdf_page *page, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha, const char *usage, fz_box_type box); +fz_pixmap *pdf_new_pixmap_from_page_with_separations_and_usage(fz_context *ctx, pdf_page *page, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha, const char *usage, fz_box_type box); enum { + /* Do not change images at all */ PDF_REDACT_IMAGE_NONE, + + /* If the image intrudes across the redaction region (even if clipped), + * remove it. */ PDF_REDACT_IMAGE_REMOVE, + + /* If the image intrudes across the redaction region (even if clipped), + * replace the bit that intrudes with black pixels. */ PDF_REDACT_IMAGE_PIXELS, + + /* If the image, when clipped, intrudes across the redaction + * region, remove it completely. Note: clipped is a rough estimate + * based on the bbox of clipping paths. + * + * Essentially this says "remove any image that has visible parts + * that extend into the redaction region". + * + * This method can effectively 'leak' invisible information during + * the redaction phase, so should be used with caution. + */ + PDF_REDACT_IMAGE_REMOVE_UNLESS_INVISIBLE +}; + +enum { + PDF_REDACT_LINE_ART_NONE, + PDF_REDACT_LINE_ART_REMOVE_IF_COVERED, + PDF_REDACT_LINE_ART_REMOVE_IF_TOUCHED +}; + +enum { + /* Remove any text that overlaps with the redaction region, + * however slightly. This is the default option, and is the + * correct option for secure behaviour. */ + PDF_REDACT_TEXT_REMOVE, + /* Do not remove any text at all as part of this redaction + * operation. Using this option is INSECURE! Use at your own + * risk. */ + PDF_REDACT_TEXT_NONE }; typedef struct { int black_boxes; int image_method; + int line_art; + int text; } pdf_redact_options; int pdf_redact_page(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_redact_options *opts); diff --git a/misc/mupdf/include/mupdf/pdf/parse.h b/misc/mupdf/include/mupdf/pdf/parse.h index 99a6ec6..3eb9205 100644 --- a/misc/mupdf/include/mupdf/pdf/parse.h +++ b/misc/mupdf/include/mupdf/pdf/parse.h @@ -17,12 +17,14 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_PARSE_H #define MUPDF_PDF_PARSE_H +#include "mupdf/pdf/document.h" + typedef enum { PDF_TOK_ERROR, PDF_TOK_EOF, diff --git a/misc/mupdf/include/mupdf/pdf/resource.h b/misc/mupdf/include/mupdf/pdf/resource.h index a8c73a2..dee9748 100644 --- a/misc/mupdf/include/mupdf/pdf/resource.h +++ b/misc/mupdf/include/mupdf/pdf/resource.h @@ -17,12 +17,18 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_RESOURCE_H #define MUPDF_PDF_RESOURCE_H +#include "mupdf/fitz/font.h" +#include "mupdf/fitz/image.h" +#include "mupdf/fitz/shade.h" +#include "mupdf/fitz/store.h" +#include "mupdf/pdf/object.h" + void pdf_store_item(fz_context *ctx, pdf_obj *key, void *val, size_t itemsize); void *pdf_find_item(fz_context *ctx, fz_store_drop_fn *drop, pdf_obj *key); void pdf_remove_item(fz_context *ctx, fz_store_drop_fn *drop, pdf_obj *key); @@ -68,6 +74,25 @@ fz_colorspace *pdf_load_colorspace(fz_context *ctx, pdf_obj *obj); int pdf_is_tint_colorspace(fz_context *ctx, fz_colorspace *cs); fz_shade *pdf_load_shading(fz_context *ctx, pdf_document *doc, pdf_obj *obj); +void pdf_sample_shade_function(fz_context *ctx, float shade[256][FZ_MAX_COLORS+1], int n, int funcs, pdf_function **func, float t0, float t1); + +/** + Function to recolor a single color from a shade. +*/ +typedef void (pdf_recolor_vertex)(fz_context *ctx, void *opaque, fz_colorspace *dst_cs, float *d, fz_colorspace *src_cs, const float *src); + +/** + Function to handle recoloring a shade. + + Called with src_cs from the shade. If no recoloring is required, return NULL. Otherwise + fill in *dst_cs, and return a vertex recolorer. +*/ +typedef pdf_recolor_vertex *(pdf_shade_recolorer)(fz_context *ctx, void *opaque, fz_colorspace *src_cs, fz_colorspace **dst_cs); + +/** + Recolor a shade. +*/ +pdf_obj *pdf_recolor_shade(fz_context *ctx, pdf_obj *shade, pdf_shade_recolorer *reshade, void *opaque); fz_image *pdf_load_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict, fz_stream *file); int pdf_is_jpx_image(fz_context *ctx, pdf_obj *dict); diff --git a/misc/mupdf/include/mupdf/pdf/xref.h b/misc/mupdf/include/mupdf/pdf/xref.h index b61aa30..ad7bba2 100644 --- a/misc/mupdf/include/mupdf/pdf/xref.h +++ b/misc/mupdf/include/mupdf/pdf/xref.h @@ -1,4 +1,4 @@ -// Copyright (C) 2004-2021 Artifex Software, Inc. +// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // @@ -17,12 +17,14 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef MUPDF_PDF_XREF_H #define MUPDF_PDF_XREF_H +#include "mupdf/pdf/document.h" + /* Allocate a slot in the xref table and return a fresh unused object number. */ @@ -91,11 +93,34 @@ struct pdf_xref int64_t end_ofs; /* file offset to end of xref */ }; +/** + Retrieve the pdf_xref_entry for a given object. + + This can cause xref reorganisations (solidifications etc) due to + repairs, so all held pdf_xref_entries should be considered + invalid after this call (other than the returned one). +*/ pdf_xref_entry *pdf_cache_object(fz_context *ctx, pdf_document *doc, int num); int pdf_count_objects(fz_context *ctx, pdf_document *doc); + +/** + Resolve an indirect object (or chain of objects). + + This can cause xref reorganisations (solidifications etc) due to + repairs, so all held pdf_xref_entries should be considered + invalid after this call (other than the returned one). +*/ pdf_obj *pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref); pdf_obj *pdf_resolve_indirect_chain(fz_context *ctx, pdf_obj *ref); + +/** + Load a given object. + + This can cause xref reorganisations (solidifications etc) due to + repairs, so all held pdf_xref_entries should be considered + invalid after this call (other than the returned one). +*/ pdf_obj *pdf_load_object(fz_context *ctx, pdf_document *doc, int num); pdf_obj *pdf_load_unencrypted_object(fz_context *ctx, pdf_document *doc, int num); @@ -130,7 +155,7 @@ fz_stream *pdf_open_stream(fz_context *ctx, pdf_obj *ref); constraining to stream length, and without decryption. */ fz_stream *pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *chain, fz_compression_params *params); -fz_compressed_buffer *pdf_load_compressed_stream(fz_context *ctx, pdf_document *doc, int num); +fz_compressed_buffer *pdf_load_compressed_stream(fz_context *ctx, pdf_document *doc, int num, size_t worst_case); void pdf_load_compressed_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int length, fz_stream *cstm, int indexed, fz_compressed_image *image); fz_stream *pdf_open_stream_with_offset(fz_context *ctx, pdf_document *doc, int num, pdf_obj *dict, int64_t stm_ofs); fz_stream *pdf_open_contents_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj); @@ -140,6 +165,8 @@ pdf_obj *pdf_trailer(fz_context *ctx, pdf_document *doc); void pdf_set_populating_xref_trailer(fz_context *ctx, pdf_document *doc, pdf_obj *trailer); int pdf_xref_len(fz_context *ctx, pdf_document *doc); +pdf_obj *pdf_metadata(fz_context *ctx, pdf_document *doc); + /* Used while reading the individual xref sections from a file. */ @@ -151,8 +178,31 @@ pdf_xref_entry *pdf_get_populating_xref_entry(fz_context *ctx, pdf_document *doc This will never throw anything, or return NULL if it is only asked to return objects in range within a 'solid' xref. + + This may "solidify" the xref (so can cause allocations). */ pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i); + +/* + Map a function across all xref entries in a document. +*/ +void pdf_xref_entry_map(fz_context *ctx, pdf_document *doc, void (*fn)(fz_context *, pdf_xref_entry *, int i, pdf_document *doc, void *), void *arg); + + +/* + Used after loading a document to access entries. + + This will never throw anything, or return NULL if it is + only asked to return objects in range within a 'solid' + xref. + + This will never "solidify" the xref, so no entry may be found + (NULL will be returned) for free entries. + + Called with a valid i, this will never try/catch or throw. +*/ +pdf_xref_entry *pdf_get_xref_entry_no_change(fz_context *ctx, pdf_document *doc, int i); +pdf_xref_entry *pdf_get_xref_entry_no_null(fz_context *ctx, pdf_document *doc, int i); void pdf_replace_xref(fz_context *ctx, pdf_document *doc, pdf_xref_entry *entries, int n); void pdf_forget_xref(fz_context *ctx, pdf_document *doc); pdf_xref_entry *pdf_get_incremental_xref_entry(fz_context *ctx, pdf_document *doc, int i); @@ -166,6 +216,7 @@ void pdf_xref_store_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_ob void pdf_xref_remove_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field); int pdf_xref_obj_is_unsaved_signature(pdf_document *doc, pdf_obj *obj); void pdf_xref_ensure_local_object(fz_context *ctx, pdf_document *doc, int num); +int pdf_obj_is_incremental(fz_context *ctx, pdf_obj *obj); void pdf_repair_xref(fz_context *ctx, pdf_document *doc); void pdf_repair_obj_stms(fz_context *ctx, pdf_document *doc); @@ -180,7 +231,7 @@ void pdf_mark_xref(fz_context *ctx, pdf_document *doc); void pdf_clear_xref(fz_context *ctx, pdf_document *doc); void pdf_clear_xref_to_mark(fz_context *ctx, pdf_document *doc); -int pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, int64_t *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int64_t *tmpofs, pdf_obj **root); +int pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, int64_t *stmofsp, int64_t *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int64_t *tmpofs, pdf_obj **root); pdf_obj *pdf_progressive_advance(fz_context *ctx, pdf_document *doc, int pagenum); diff --git a/misc/mupdf/include/mupdf/ucdn.h b/misc/mupdf/include/mupdf/ucdn.h index c3994ec..f03ae69 100644 --- a/misc/mupdf/include/mupdf/ucdn.h +++ b/misc/mupdf/include/mupdf/ucdn.h @@ -21,6 +21,8 @@ extern "C" { #endif +#include "fitz/system.h" + #define UCDN_EAST_ASIAN_F 0 #define UCDN_EAST_ASIAN_H 1 #define UCDN_EAST_ASIAN_W 2 @@ -185,7 +187,14 @@ extern "C" { #define UCDN_SCRIPT_DIVES_AKURU 154 #define UCDN_SCRIPT_KHITAN_SMALL_SCRIPT 155 #define UCDN_SCRIPT_YEZIDI 156 -#define UCDN_LAST_SCRIPT 156 +#define UCDN_SCRIPT_VITHKUQI 157 +#define UCDN_SCRIPT_OLD_UYGHUR 158 +#define UCDN_SCRIPT_CYPRO_MINOAN 159 +#define UCDN_SCRIPT_TANGSA 160 +#define UCDN_SCRIPT_TOTO 161 +#define UCDN_SCRIPT_KAWI 162 +#define UCDN_SCRIPT_NAG_MUNDARI 163 +#define UCDN_LAST_SCRIPT 163 #define UCDN_LINEBREAK_CLASS_OP 0 #define UCDN_LINEBREAK_CLASS_CL 1 diff --git a/misc/mupdf/lib/x86-64-linux/libmupdf-third.a b/misc/mupdf/lib/x86-64-linux/libmupdf-third.a index c22297c..e4d29dc 100644 Binary files a/misc/mupdf/lib/x86-64-linux/libmupdf-third.a and b/misc/mupdf/lib/x86-64-linux/libmupdf-third.a differ diff --git a/misc/mupdf/lib/x86-64-linux/libmupdf.a b/misc/mupdf/lib/x86-64-linux/libmupdf.a index 5abcba5..d832558 100644 Binary files a/misc/mupdf/lib/x86-64-linux/libmupdf.a and b/misc/mupdf/lib/x86-64-linux/libmupdf.a differ diff --git a/misc/mupdf/version b/misc/mupdf/version index c1af674..0b4c970 100644 --- a/misc/mupdf/version +++ b/misc/mupdf/version @@ -1 +1 @@ -1.19.0 \ No newline at end of file +1.24.2 \ No newline at end of file diff --git a/testdata/sample_page0.png b/testdata/sample_page0.png index 4ce9fc3..ea7e2a2 100644 Binary files a/testdata/sample_page0.png and b/testdata/sample_page0.png differ diff --git a/testdata/sample_page1.png b/testdata/sample_page1.png index 2f9b4a4..5199f70 100644 Binary files a/testdata/sample_page1.png and b/testdata/sample_page1.png differ diff --git a/testdata/sample_page10.png b/testdata/sample_page10.png index fc923e6..2352968 100644 Binary files a/testdata/sample_page10.png and b/testdata/sample_page10.png differ diff --git a/testdata/sample_page11.png b/testdata/sample_page11.png index dcff174..048b275 100644 Binary files a/testdata/sample_page11.png and b/testdata/sample_page11.png differ diff --git a/testdata/sample_page12.png b/testdata/sample_page12.png index d2c42d1..d8d4ae9 100644 Binary files a/testdata/sample_page12.png and b/testdata/sample_page12.png differ diff --git a/testdata/sample_page2.png b/testdata/sample_page2.png index c26507c..2693a11 100644 Binary files a/testdata/sample_page2.png and b/testdata/sample_page2.png differ diff --git a/testdata/sample_page3.png b/testdata/sample_page3.png index cd637cc..f534fbb 100644 Binary files a/testdata/sample_page3.png and b/testdata/sample_page3.png differ diff --git a/testdata/sample_page4.png b/testdata/sample_page4.png index 18a4e27..69bad9e 100644 Binary files a/testdata/sample_page4.png and b/testdata/sample_page4.png differ diff --git a/testdata/sample_page5.png b/testdata/sample_page5.png index 9878f62..a7d7747 100644 Binary files a/testdata/sample_page5.png and b/testdata/sample_page5.png differ diff --git a/testdata/sample_page6.png b/testdata/sample_page6.png index 84fe0bd..c7fbc8d 100644 Binary files a/testdata/sample_page6.png and b/testdata/sample_page6.png differ diff --git a/testdata/sample_page7.png b/testdata/sample_page7.png index c72a51d..835dfd6 100644 Binary files a/testdata/sample_page7.png and b/testdata/sample_page7.png differ diff --git a/testdata/sample_page8.png b/testdata/sample_page8.png index f819b34..dc887d1 100644 Binary files a/testdata/sample_page8.png and b/testdata/sample_page8.png differ diff --git a/testdata/sample_page9.png b/testdata/sample_page9.png index 9c1f041..75220a3 100644 Binary files a/testdata/sample_page9.png and b/testdata/sample_page9.png differ