diff --git a/benches/oneshot.rs b/benches/oneshot.rs index 844fedb..056b617 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -47,7 +47,7 @@ fn bench_content() -> Vec { c.save_state(); c.set_flatness(10); c.restore_state(); - c.finish() + c.finish().into_bytes() } fn bench_new() -> Pdf { diff --git a/examples/limits.rs b/examples/limits.rs new file mode 100644 index 0000000..3a3ed6f --- /dev/null +++ b/examples/limits.rs @@ -0,0 +1,53 @@ +//! This example shows how you can track PDF limits of your chunks. + +use pdf_writer::{Chunk, Content, Limits, Name, Ref}; + +fn main() { + let mut limits = Limits::new(); + + let mut content = Content::new(); + content.transform([-3.4, 0.0, 0.0, 3.1, 100.0, 100.0]); + content.line_to(15.0, -26.1); + let buf = content.finish(); + // This will have the limits: + // - Max real number: 26.1 (for negative values we use their absolute value) + // - Max int number 100 (even though above 100.0 is a float number, it will be coerced into an + // integer, and thus counts towards the int limit) + limits.merge(buf.limits()); + + let mut chunk = Chunk::new(); + chunk.stream(Ref::new(1), &buf.into_bytes()); + chunk.type3_font(Ref::new(2)).name(Name(b"A_long_font_name")); + // This will update the limit for the maximum name and dictionary length. + limits.merge(chunk.limits()); + + // This is what the final PDF will look like. + assert_eq!( + chunk.as_bytes(), + b"1 0 obj +<< + /Length 34 +>> +stream +-3.4 0 0 3.1 100 100 cm +15 -26.1 l +endstream +endobj + +2 0 obj +<< + /Type /Font + /Subtype /Type3 + /Name /A_long_font_name +>> +endobj + +" + ); + + // And the limits should match, as well! + assert_eq!(limits.int(), 100); + assert_eq!(limits.real(), 26.1); + assert_eq!(limits.name_len(), 16); + assert_eq!(limits.dict_entries(), 3); +} diff --git a/src/buf.rs b/src/buf.rs index b6bf397..379cd66 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -1,29 +1,132 @@ use super::Primitive; -/// Additional methods for byte buffers. -pub trait BufExt { - fn push_val(&mut self, value: T); - fn push_int(&mut self, value: i32); - fn push_float(&mut self, value: f32); - fn push_decimal(&mut self, value: f32); - fn push_hex(&mut self, value: u8); - fn push_hex_u16(&mut self, value: u16); - fn push_octal(&mut self, value: u8); +use std::ops::Deref; + +/// Tracks the limits of data types used in a buffer. +#[derive(Clone, PartialEq, Debug, Default)] +pub struct Limits { + int: i32, + real: f32, + name_len: usize, + str_len: usize, + array_len: usize, + dict_entries: usize, +} + +impl Limits { + /// Create a new `Limits` struct with all values initialized to zero. + pub fn new() -> Self { + Self::default() + } + + /// Get the absolute value of the largest positive/negative integer number. + pub fn int(&self) -> i32 { + self.int + } + + /// Get the absolute value of the largest positive/negative real number. + pub fn real(&self) -> f32 { + self.real + } + + /// Get the maximum length of any used name. + pub fn name_len(&self) -> usize { + self.name_len + } + + /// Get the maximum length of any used array. + pub fn array_len(&self) -> usize { + self.array_len + } + + /// Get the maximum number of entries in any dictionary. + pub fn dict_entries(&self) -> usize { + self.dict_entries + } + + /// Get the maximum length of any used string. + pub fn str_len(&self) -> usize { + self.str_len + } + + pub(crate) fn register_int(&mut self, val: i32) { + self.int = self.int.max(val.abs()); + } + + pub(crate) fn register_real(&mut self, val: f32) { + self.real = self.real.max(val.abs()); + } + + pub(crate) fn register_name_len(&mut self, len: usize) { + self.name_len = self.name_len.max(len); + } + + pub(crate) fn register_str_len(&mut self, len: usize) { + self.str_len = self.str_len.max(len); + } + + pub(crate) fn register_array_len(&mut self, len: usize) { + self.array_len = self.array_len.max(len); + } + + pub(crate) fn register_dict_entries(&mut self, len: usize) { + self.dict_entries = self.dict_entries.max(len); + } + + /// Merge two `Limits` with each other, taking the maximum + /// of each field from both. + pub fn merge(&mut self, other: &Limits) { + self.register_int(other.int); + self.register_real(other.real); + self.register_name_len(other.name_len); + self.register_str_len(other.str_len); + self.register_array_len(other.array_len); + self.register_dict_entries(other.dict_entries); + } +} + +/// A buffer of arbitrary PDF content. +#[derive(Clone, PartialEq, Debug)] +pub struct Buf { + pub(crate) inner: Vec, + pub(crate) limits: Limits, } -impl BufExt for Vec { +impl Buf { + pub(crate) fn new() -> Self { + Self { inner: Vec::new(), limits: Limits::new() } + } + + pub(crate) fn with_capacity(capacity: usize) -> Self { + Self { + inner: Vec::with_capacity(capacity), + limits: Limits::new(), + } + } + + /// Get the underlying bytes of the buffer. + pub fn into_bytes(self) -> Vec { + self.inner + } + + /// Return the limits of the buffer. + pub fn limits(&self) -> &Limits { + &self.limits + } + #[inline] - fn push_val(&mut self, value: T) { + pub(crate) fn push_val(&mut self, value: T) { value.write(self); } #[inline] - fn push_int(&mut self, value: i32) { - self.extend(itoa::Buffer::new().format(value).as_bytes()); + pub(crate) fn push_int(&mut self, value: i32) { + self.limits.register_int(value); + self.extend_slice(itoa::Buffer::new().format(value).as_bytes()); } #[inline] - fn push_float(&mut self, value: f32) { + pub(crate) fn push_float(&mut self, value: f32) { // Don't write the decimal point if we don't need it. // Also, integer formatting is way faster. if value as i32 as f32 == value { @@ -35,14 +138,16 @@ impl BufExt for Vec { /// Like `push_float`, but forces the decimal point. #[inline] - fn push_decimal(&mut self, value: f32) { + pub(crate) fn push_decimal(&mut self, value: f32) { + self.limits.register_real(value); + if value == 0.0 || (value.abs() > 1e-6 && value.abs() < 1e12) { - self.extend(ryu::Buffer::new().format(value).as_bytes()); + self.extend_slice(ryu::Buffer::new().format(value).as_bytes()); } else { #[inline(never)] - fn write_extreme(buf: &mut Vec, value: f32) { + fn write_extreme(buf: &mut Buf, value: f32) { use std::io::Write; - write!(buf, "{}", value).unwrap(); + write!(buf.inner, "{}", value).unwrap(); } write_extreme(self, value); @@ -50,7 +155,23 @@ impl BufExt for Vec { } #[inline] - fn push_hex(&mut self, value: u8) { + pub(crate) fn extend_slice(&mut self, other: &[u8]) { + self.inner.extend(other); + } + + #[inline] + pub(crate) fn extend(&mut self, other: &Buf) { + self.limits.merge(&other.limits); + self.inner.extend(&other.inner); + } + + #[inline] + pub(crate) fn push(&mut self, b: u8) { + self.inner.push(b); + } + + #[inline] + pub(crate) fn push_hex(&mut self, value: u8) { fn hex(b: u8) -> u8 { if b < 10 { b'0' + b @@ -64,13 +185,13 @@ impl BufExt for Vec { } #[inline] - fn push_hex_u16(&mut self, value: u16) { + pub(crate) fn push_hex_u16(&mut self, value: u16) { self.push_hex((value >> 8) as u8); self.push_hex(value as u8); } #[inline] - fn push_octal(&mut self, value: u8) { + pub(crate) fn push_octal(&mut self, value: u8) { fn octal(b: u8) -> u8 { b'0' + b } @@ -79,4 +200,87 @@ impl BufExt for Vec { self.push(octal((value >> 3) & 7)); self.push(octal(value & 7)); } + + #[inline] + pub(crate) fn reserve(&mut self, additional: usize) { + self.inner.reserve(additional) + } +} + +impl Deref for Buf { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Chunk, Content, Finish, Name, Rect, Ref, Str, TextStr}; + + #[test] + fn test_content_limits() { + let mut limits = Limits::default(); + + let mut content = Content::new(); + content.cubic_to(14.3, 16.2, 22.6, 30.9, 50.1, 40.0); + content.show(Str(b"Some text")); + content.set_font(Name(b"NotoSans"), 10.0); + let buf = content.finish(); + limits.merge(buf.limits()); + + let mut content = Content::new(); + content.line_to(55.0, -75.3); + content.set_font(Name(b"Noto"), 10.0); + content + .show_positioned() + .items() + .show(Str(b"A")) + .show(Str(b"B")) + .adjust(32.0); + content + .marked_content_point_with_properties(Name(b"Hi")) + .properties() + .actual_text(TextStr("text")); + let buf = content.finish(); + limits.merge(buf.limits()); + + assert_eq!( + limits, + Limits { + int: 55, + real: 75.3, + name_len: 10, + str_len: 9, + array_len: 3, + dict_entries: 1, + } + ) + } + + #[test] + fn test_chunk_limits() { + let mut limits = Limits::default(); + + let mut chunk = Chunk::new(); + let mut x_object = chunk.form_xobject(Ref::new(1), &[]); + x_object.bbox(Rect::new(4.0, 6.0, 22.1, 31.0)); + x_object.finish(); + + limits.merge(chunk.limits()); + + assert_eq!( + limits, + Limits { + int: 31, + real: 22.1, + name_len: 7, + str_len: 0, + array_len: 4, + dict_entries: 4, + } + ) + } } diff --git a/src/chunk.rs b/src/chunk.rs index e1f22b6..7dab512 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -1,4 +1,5 @@ use super::*; +use crate::buf::Buf; /// A builder for a collection of indirect PDF objects. /// @@ -12,7 +13,7 @@ use super::*; /// it at a time). #[derive(Clone)] pub struct Chunk { - pub(crate) buf: Vec, + pub(crate) buf: Buf, pub(crate) offsets: Vec<(Ref, usize)>, } @@ -25,7 +26,7 @@ impl Chunk { /// Create a new chunk with the specified initial capacity. pub fn with_capacity(capacity: usize) -> Self { - Self { buf: Vec::with_capacity(capacity), offsets: vec![] } + Self { buf: Buf::with_capacity(capacity), offsets: vec![] } } /// The number of bytes that were written so far. @@ -37,13 +38,18 @@ impl Chunk { /// The bytes already written so far. pub fn as_bytes(&self) -> &[u8] { - self.buf.as_slice() + self.buf.deref() + } + + /// Return the limits of the chunk. + pub fn limits(&self) -> &Limits { + self.buf.limits() } /// Add all objects from another chunk to this one. pub fn extend(&mut self, other: &Chunk) { let base = self.len(); - self.buf.extend_from_slice(&other.buf); + self.buf.extend(&other.buf); self.offsets .extend(other.offsets.iter().map(|&(id, offset)| (id, base + offset))); } diff --git a/src/content.rs b/src/content.rs index cbc2638..faa1bea 100644 --- a/src/content.rs +++ b/src/content.rs @@ -1,8 +1,9 @@ use super::*; +use crate::buf::Buf; /// A builder for a content stream. pub struct Content { - buf: Vec, + buf: Buf, q_depth: usize, } @@ -17,7 +18,7 @@ impl Content { /// Create a new content stream with the specified initial buffer capacity. pub fn with_capacity(capacity: usize) -> Self { - Self { buf: Vec::with_capacity(capacity), q_depth: 0 } + Self { buf: Buf::with_capacity(capacity), q_depth: 0 } } /// Start writing an arbitrary operation. @@ -26,10 +27,18 @@ impl Content { Operation::start(&mut self.buf, operator) } - /// Return the raw constructed byte stream. - pub fn finish(mut self) -> Vec { + /// Return the buffer of the content stream. + /// + /// The buffer is essentially a thin wrapper around two objects: + /// - A [`Limits`] object, which can optionally be used to keep + /// track of data such as the largest used integer or + /// the longest string used in the content streams, which is useful information + /// for some export modes. + /// - The actual underlying data of the content stream, which can be written + /// to a chunk (and optionally apply a filter before doing so). + pub fn finish(mut self) -> Buf { if self.buf.last() == Some(&b'\n') { - self.buf.pop(); + self.buf.inner.pop(); } self.buf } @@ -39,14 +48,14 @@ impl Content { /// /// This struct is created by [`Content::op`]. pub struct Operation<'a> { - buf: &'a mut Vec, + buf: &'a mut Buf, op: &'a str, first: bool, } impl<'a> Operation<'a> { #[inline] - pub(crate) fn start(buf: &'a mut Vec, op: &'a str) -> Self { + pub(crate) fn start(buf: &'a mut Buf, op: &'a str) -> Self { Self { buf, op, first: true } } @@ -87,7 +96,7 @@ impl Drop for Operation<'_> { if !self.first { self.buf.push(b' '); } - self.buf.extend(self.op.as_bytes()); + self.buf.extend_slice(self.op.as_bytes()); self.buf.push(b'\n'); } } @@ -1655,7 +1664,7 @@ mod tests { .restore_state(); assert_eq!( - content.finish(), + content.finish().into_bytes(), b"q\n1 2 3 4 re\nf\n[7 2] 4 d\n/MyImage Do\n2 3.5 /MyPattern scn\nQ" ); } @@ -1675,6 +1684,9 @@ mod tests { .show(Str(b"CD")); content.end_text(); - assert_eq!(content.finish(), b"/F1 12 Tf\nBT\n[] TJ\n[(AB) 2 (CD)] TJ\nET"); + assert_eq!( + content.finish().into_bytes(), + b"/F1 12 Tf\nBT\n[] TJ\n[(AB) 2 (CD)] TJ\nET" + ); } } diff --git a/src/font.rs b/src/font.rs index 53fc4ad..73e46df 100644 --- a/src/font.rs +++ b/src/font.rs @@ -1,3 +1,4 @@ +use crate::buf::Buf; use std::marker::PhantomData; use super::*; @@ -849,8 +850,8 @@ impl WMode { /// A builder for a `/ToUnicode` character map stream. pub struct UnicodeCmap { - buf: Vec, - mappings: Vec, + buf: Buf, + mappings: Buf, count: i32, glyph_id: PhantomData, } @@ -870,65 +871,65 @@ where pub fn with_writing_mode(name: Name, info: SystemInfo, mode: WMode) -> Self { // https://www.adobe.com/content/dam/acom/en/devnet/font/pdfs/5014.CIDFont_Spec.pdf - let mut buf = Vec::new(); + let mut buf = Buf::new(); // Static header. - buf.extend(b"%!PS-Adobe-3.0 Resource-CMap\n"); - buf.extend(b"%%DocumentNeededResources: procset CIDInit\n"); - buf.extend(b"%%IncludeResource: procset CIDInit\n"); + buf.extend_slice(b"%!PS-Adobe-3.0 Resource-CMap\n"); + buf.extend_slice(b"%%DocumentNeededResources: procset CIDInit\n"); + buf.extend_slice(b"%%IncludeResource: procset CIDInit\n"); // Dynamic header. - buf.extend(b"%%BeginResource: CMap "); - buf.extend(name.0); + buf.extend_slice(b"%%BeginResource: CMap "); + buf.extend_slice(name.0); buf.push(b'\n'); - buf.extend(b"%%Title: ("); - buf.extend(name.0); + buf.extend_slice(b"%%Title: ("); + buf.extend_slice(name.0); buf.push(b' '); - buf.extend(info.registry.0); + buf.extend_slice(info.registry.0); buf.push(b' '); - buf.extend(info.ordering.0); + buf.extend_slice(info.ordering.0); buf.push(b' '); buf.push_int(info.supplement); - buf.extend(b")\n"); - buf.extend(b"%%Version: 1\n"); - buf.extend(b"%%EndComments\n"); + buf.extend_slice(b")\n"); + buf.extend_slice(b"%%Version: 1\n"); + buf.extend_slice(b"%%EndComments\n"); // General body. - buf.extend(b"/CIDInit /ProcSet findresource begin\n"); - buf.extend(b"12 dict begin\n"); - buf.extend(b"begincmap\n"); - buf.extend(b"/CIDSystemInfo 3 dict dup begin\n"); - buf.extend(b" /Registry "); + buf.extend_slice(b"/CIDInit /ProcSet findresource begin\n"); + buf.extend_slice(b"12 dict begin\n"); + buf.extend_slice(b"begincmap\n"); + buf.extend_slice(b"/CIDSystemInfo 3 dict dup begin\n"); + buf.extend_slice(b" /Registry "); buf.push_val(info.registry); - buf.extend(b" def\n"); - buf.extend(b" /Ordering "); + buf.extend_slice(b" def\n"); + buf.extend_slice(b" /Ordering "); buf.push_val(info.ordering); - buf.extend(b" def\n"); - buf.extend(b" /Supplement "); + buf.extend_slice(b" def\n"); + buf.extend_slice(b" /Supplement "); buf.push_val(info.supplement); - buf.extend(b" def\n"); - buf.extend(b"end def\n"); - buf.extend(b"/CMapName "); + buf.extend_slice(b" def\n"); + buf.extend_slice(b"end def\n"); + buf.extend_slice(b"/CMapName "); buf.push_val(name); - buf.extend(b" def\n"); - buf.extend(b"/CMapVersion 1 def\n"); - buf.extend(b"/CMapType 0 def\n"); - buf.extend(b"/WMode "); + buf.extend_slice(b" def\n"); + buf.extend_slice(b"/CMapVersion 1 def\n"); + buf.extend_slice(b"/CMapType 0 def\n"); + buf.extend_slice(b"/WMode "); buf.push_int(mode.to_int()); - buf.extend(b" def\n"); + buf.extend_slice(b" def\n"); // We just cover the whole unicode codespace. - buf.extend(b"1 begincodespacerange\n"); + buf.extend_slice(b"1 begincodespacerange\n"); buf.push(b'<'); G::MIN.push(&mut buf); - buf.extend(b"> <"); + buf.extend_slice(b"> <"); G::MAX.push(&mut buf); - buf.extend(b">\n"); - buf.extend(b"endcodespacerange\n"); + buf.extend_slice(b">\n"); + buf.extend_slice(b"endcodespacerange\n"); Self { buf, - mappings: vec![], + mappings: Buf::new(), count: 0, glyph_id: PhantomData, } @@ -947,7 +948,7 @@ where ) { self.mappings.push(b'<'); glyph.push(&mut self.mappings); - self.mappings.extend(b"> <"); + self.mappings.extend_slice(b"> <"); for c in codepoints { for &mut part in c.encode_utf16(&mut [0; 2]) { @@ -955,7 +956,7 @@ where } } - self.mappings.extend(b">\n"); + self.mappings.extend_slice(b">\n"); self.count += 1; // At most 100 lines per range. @@ -965,17 +966,18 @@ where } /// Finish building the character map. - pub fn finish(mut self) -> Vec { + pub fn finish(mut self) -> Buf { // Flush the in-progress range. self.flush_range(); // End of body. - self.buf.extend(b"endcmap\n"); - self.buf.extend(b"CMapName currentdict /CMap defineresource pop\n"); - self.buf.extend(b"end\n"); - self.buf.extend(b"end\n"); - self.buf.extend(b"%%EndResource\n"); - self.buf.extend(b"%%EOF"); + self.buf.extend_slice(b"endcmap\n"); + self.buf + .extend_slice(b"CMapName currentdict /CMap defineresource pop\n"); + self.buf.extend_slice(b"end\n"); + self.buf.extend_slice(b"end\n"); + self.buf.extend_slice(b"%%EndResource\n"); + self.buf.extend_slice(b"%%EOF"); self.buf } @@ -983,13 +985,13 @@ where fn flush_range(&mut self) { if self.count > 0 { self.buf.push_int(self.count); - self.buf.extend(b" beginbfchar\n"); - self.buf.extend(&self.mappings); - self.buf.extend(b"endbfchar\n"); + self.buf.extend_slice(b" beginbfchar\n"); + self.buf.extend_slice(&self.mappings); + self.buf.extend_slice(b"endbfchar\n"); } self.count = 0; - self.mappings.clear(); + self.mappings.inner.clear(); } } @@ -1005,19 +1007,19 @@ impl GlyphId for u16 {} /// Module to seal the `GlyphId` trait. mod private { - use crate::buf::BufExt; + use crate::buf::Buf; pub trait Sealed { const MIN: Self; const MAX: Self; - fn push(self, buf: &mut Vec); + fn push(self, buf: &mut Buf); } impl Sealed for u8 { const MIN: Self = u8::MIN; const MAX: Self = u8::MAX; - fn push(self, buf: &mut Vec) { + fn push(self, buf: &mut Buf) { buf.push_hex(self); } } @@ -1026,7 +1028,7 @@ mod private { const MIN: Self = u16::MIN; const MAX: Self = u16::MAX; - fn push(self, buf: &mut Vec) { + fn push(self, buf: &mut Buf) { buf.push_hex_u16(self); } } diff --git a/src/functions.rs b/src/functions.rs index 50300b1..230c2cf 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -335,13 +335,13 @@ pub enum PostScriptOp<'a> { impl<'a> PostScriptOp<'a> { /// Encode a slice of operations into a byte stream. - pub fn encode(ops: &[Self]) -> Vec { - let mut buf = Vec::new(); + pub fn encode(ops: &[Self]) -> Buf { + let mut buf = Buf::new(); Self::write_slice(ops, &mut buf); buf } - fn write_slice(ops: &[Self], buf: &mut Vec) { + fn write_slice(ops: &[Self], buf: &mut Buf) { buf.push(b'{'); if ops.len() > 1 { buf.push(b' '); @@ -351,28 +351,28 @@ impl<'a> PostScriptOp<'a> { buf.push(b' '); } if ops.len() == 1 { - buf.pop(); + buf.inner.pop(); } buf.push(b'}'); } - fn write(&self, buf: &mut Vec) { + fn write(&self, buf: &mut Buf) { match *self { Self::Real(r) => buf.push_decimal(r), Self::Integer(i) => buf.push_val(i), Self::If(ops) => { Self::write_slice(ops, buf); buf.push(b' '); - buf.extend(self.operator()); + buf.extend_slice(self.operator()); } Self::IfElse(ops1, ops2) => { Self::write_slice(ops1, buf); buf.push(b' '); Self::write_slice(ops2, buf); buf.push(b' '); - buf.extend(self.operator()); + buf.extend_slice(self.operator()); } - _ => buf.extend(self.operator()), + _ => buf.extend_slice(self.operator()), } } @@ -446,7 +446,7 @@ mod tests { ]; assert_eq!( - PostScriptOp::encode(&ops), + &PostScriptOp::encode(&ops).into_bytes(), b"{ 3.0 2.0 mul exch dup 0.0 ge { 1.0 add } {neg} ifelse add }" ); } diff --git a/src/lib.rs b/src/lib.rs index 0c771f4..682ca59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -186,6 +186,7 @@ pub mod types { pub use xobject::SMaskInData; } +pub use self::buf::{Buf, Limits}; pub use self::chunk::Chunk; pub use self::content::Content; pub use self::object::{ @@ -197,7 +198,6 @@ use std::fmt::{self, Debug, Formatter}; use std::io::Write; use std::ops::{Deref, DerefMut}; -use self::buf::BufExt; use self::writers::*; /// A builder for a PDF file. @@ -225,7 +225,7 @@ impl Pdf { /// Create a new PDF with the specified initial buffer capacity. pub fn with_capacity(capacity: usize) -> Self { let mut chunk = Chunk::with_capacity(capacity); - chunk.buf.extend(b"%PDF-1.7\n%\x80\x80\x80\x80\n\n"); + chunk.buf.extend_slice(b"%PDF-1.7\n%\x80\x80\x80\x80\n\n"); Self { chunk, catalog_id: None, @@ -241,7 +241,7 @@ impl Pdf { /// /// _Default value_: \x80\x80\x80\x80 pub fn set_binary_marker(&mut self, marker: &[u8; 4]) { - self.chunk.buf[10..14].copy_from_slice(marker); + self.chunk.buf.inner[10..14].copy_from_slice(marker); } /// Set the PDF version. @@ -252,10 +252,10 @@ impl Pdf { /// _Default value_: 1.7. pub fn set_version(&mut self, major: u8, minor: u8) { if major < 10 { - self.chunk.buf[5] = b'0' + major; + self.chunk.buf.inner[5] = b'0' + major; } if minor < 10 { - self.chunk.buf[7] = b'0' + minor; + self.chunk.buf.inner[7] = b'0' + minor; } } @@ -300,12 +300,12 @@ impl Pdf { let xref_len = 1 + offsets.last().map_or(0, |p| p.0.get()); let xref_offset = buf.len(); - buf.extend(b"xref\n0 "); + buf.extend_slice(b"xref\n0 "); buf.push_int(xref_len); buf.push(b'\n'); if offsets.is_empty() { - write!(buf, "0000000000 65535 f\r\n").unwrap(); + write!(buf.inner, "0000000000 65535 f\r\n").unwrap(); } let mut written = 0; @@ -330,16 +330,16 @@ impl Pdf { } let gen = if free_id == 0 { "65535" } else { "00000" }; - write!(buf, "{:010} {} f\r\n", next % xref_len, gen).unwrap(); + write!(buf.inner, "{:010} {} f\r\n", next % xref_len, gen).unwrap(); written += 1; } - write!(buf, "{:010} 00000 n\r\n", offset).unwrap(); + write!(buf.inner, "{:010} 00000 n\r\n", offset).unwrap(); written += 1; } // Write the trailer dictionary. - buf.extend(b"trailer\n"); + buf.extend_slice(b"trailer\n"); let mut trailer = Obj::direct(&mut buf, 0).dict(); trailer.pair(Name(b"Size"), xref_len); @@ -361,12 +361,12 @@ impl Pdf { trailer.finish(); // Write where the cross-reference table starts. - buf.extend(b"\nstartxref\n"); - write!(buf, "{}", xref_offset).unwrap(); + buf.extend_slice(b"\nstartxref\n"); + write!(buf.inner, "{}", xref_offset).unwrap(); // Write the end of file marker. - buf.extend(b"\n%%EOF"); - buf + buf.extend_slice(b"\n%%EOF"); + buf.into_bytes() } } diff --git a/src/object.rs b/src/object.rs index ea34a1f..cf3c5de 100644 --- a/src/object.rs +++ b/src/object.rs @@ -1,3 +1,4 @@ +use crate::buf::Buf; use std::convert::TryFrom; use std::marker::PhantomData; use std::mem::ManuallyDrop; @@ -8,7 +9,7 @@ use super::*; /// A primitive PDF object. pub trait Primitive { /// Write the object into a buffer. - fn write(self, buf: &mut Vec); + fn write(self, buf: &mut Buf); } impl Primitive for &T @@ -16,32 +17,32 @@ where T: Copy, { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { (*self).write(buf); } } impl Primitive for bool { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { if self { - buf.extend(b"true"); + buf.extend_slice(b"true"); } else { - buf.extend(b"false"); + buf.extend_slice(b"false"); } } } impl Primitive for i32 { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { buf.push_int(self); } } impl Primitive for f32 { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { buf.push_float(self); } } @@ -69,15 +70,16 @@ impl Str<'_> { } impl Primitive for Str<'_> { - fn write(self, buf: &mut Vec) { - // We use: + fn write(self, buf: &mut Buf) { + buf.limits.register_str_len(self.0.len()); + // - Literal strings for ASCII with nice escape sequences to make it // also be represented fully in visible ASCII. We also escape // parentheses because they are delimiters. // - Hex strings for anything non-ASCII. if self.0.iter().all(|b| b.is_ascii()) { buf.reserve(self.0.len()); - buf.push(b'('); + buf.inner.push(b'('); let mut balanced = None; for &byte in self.0 { @@ -90,13 +92,13 @@ impl Primitive for Str<'_> { } buf.push(byte); } - b'\\' => buf.extend(br"\\"), + b'\\' => buf.extend_slice(br"\\"), b' '..=b'~' => buf.push(byte), - b'\n' => buf.extend(br"\n"), - b'\r' => buf.extend(br"\r"), - b'\t' => buf.extend(br"\t"), - b'\x08' => buf.extend(br"\b"), - b'\x0c' => buf.extend(br"\f"), + b'\n' => buf.extend_slice(br"\n"), + b'\r' => buf.extend_slice(br"\r"), + b'\t' => buf.extend_slice(br"\t"), + b'\x08' => buf.extend_slice(br"\b"), + b'\x0c' => buf.extend_slice(br"\f"), _ => { buf.push(b'\\'); buf.push_octal(byte); @@ -126,7 +128,9 @@ impl Primitive for Str<'_> { pub struct TextStr<'a>(pub &'a str); impl Primitive for TextStr<'_> { - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { + buf.limits.register_str_len(self.0.as_bytes().len()); + // ASCII and PDFDocEncoding match for 32 up to 126. if self.0.bytes().all(|b| matches!(b, 32..=126)) { Str(self.0.as_bytes()).write(buf); @@ -150,7 +154,9 @@ impl Primitive for TextStr<'_> { pub struct Name<'a>(pub &'a [u8]); impl Primitive for Name<'_> { - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { + buf.limits.register_name_len(self.0.len()); + buf.reserve(1 + self.0.len()); buf.push(b'/'); for &byte in self.0 { @@ -196,8 +202,8 @@ pub struct Null; impl Primitive for Null { #[inline] - fn write(self, buf: &mut Vec) { - buf.extend(b"null"); + fn write(self, buf: &mut Buf) { + buf.extend_slice(b"null"); } } @@ -245,9 +251,9 @@ impl Ref { impl Primitive for Ref { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { buf.push_int(self.0.get()); - buf.extend(b" 0 R"); + buf.extend_slice(b" 0 R"); } } @@ -281,7 +287,7 @@ impl Rect { impl Primitive for Rect { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { buf.push(b'['); buf.push_val(self.x1); buf.push(b' '); @@ -291,6 +297,8 @@ impl Primitive for Rect { buf.push(b' '); buf.push_val(self.y2); buf.push(b']'); + + buf.limits.register_array_len(4); } } @@ -393,22 +401,26 @@ impl Date { } impl Primitive for Date { - fn write(self, buf: &mut Vec) { - buf.extend(b"(D:"); + fn write(self, buf: &mut Buf) { + buf.extend_slice(b"(D:"); (|| { - write!(buf, "{:04}", self.year).unwrap(); - write!(buf, "{:02}", self.month?).unwrap(); - write!(buf, "{:02}", self.day?).unwrap(); - write!(buf, "{:02}", self.hour?).unwrap(); - write!(buf, "{:02}", self.minute?).unwrap(); - write!(buf, "{:02}", self.second?).unwrap(); + write!(buf.inner, "{:04}", self.year).unwrap(); + write!(buf.inner, "{:02}", self.month?).unwrap(); + write!(buf.inner, "{:02}", self.day?).unwrap(); + write!(buf.inner, "{:02}", self.hour?).unwrap(); + write!(buf.inner, "{:02}", self.minute?).unwrap(); + write!(buf.inner, "{:02}", self.second?).unwrap(); let utc_offset_hour = self.utc_offset_hour?; if utc_offset_hour == 0 && self.utc_offset_minute == 0 { buf.push(b'Z'); } else { - write!(buf, "{:+03}'{:02}", utc_offset_hour, self.utc_offset_minute) - .unwrap(); + write!( + buf.inner, + "{:+03}'{:02}", + utc_offset_hour, self.utc_offset_minute + ) + .unwrap(); } Some(()) })(); @@ -420,7 +432,7 @@ impl Primitive for Date { /// Writer for an arbitrary object. #[must_use = "not consuming this leaves the writer in an inconsistent state"] pub struct Obj<'a> { - buf: &'a mut Vec, + buf: &'a mut Buf, indirect: bool, indent: u8, } @@ -428,15 +440,15 @@ pub struct Obj<'a> { impl<'a> Obj<'a> { /// Start a new direct object. #[inline] - pub(crate) fn direct(buf: &'a mut Vec, indent: u8) -> Self { + pub(crate) fn direct(buf: &'a mut Buf, indent: u8) -> Self { Self { buf, indirect: false, indent } } /// Start a new indirect object. #[inline] - pub(crate) fn indirect(buf: &'a mut Vec, id: Ref) -> Self { + pub(crate) fn indirect(buf: &'a mut Buf, id: Ref) -> Self { buf.push_int(id.get()); - buf.extend(b" 0 obj\n"); + buf.extend_slice(b" 0 obj\n"); Self { buf, indirect: true, indent: 0 } } @@ -445,7 +457,7 @@ impl<'a> Obj<'a> { pub fn primitive(self, value: T) { value.write(self.buf); if self.indirect { - self.buf.extend(b"\nendobj\n\n"); + self.buf.extend_slice(b"\nendobj\n\n"); } } @@ -500,7 +512,7 @@ pub trait Rewrite<'a> { /// Writer for an array. pub struct Array<'a> { - buf: &'a mut Vec, + buf: &'a mut Buf, indirect: bool, indent: u8, len: i32, @@ -570,9 +582,10 @@ impl<'a> Array<'a> { impl Drop for Array<'_> { #[inline] fn drop(&mut self) { + self.buf.limits.register_array_len(self.len() as usize); self.buf.push(b']'); if self.indirect { - self.buf.extend(b"\nendobj\n\n"); + self.buf.extend_slice(b"\nendobj\n\n"); } } } @@ -646,14 +659,14 @@ impl<'a, T> TypedArray<'a, T> { /// Writer for a dictionary. pub struct Dict<'a> { - buf: &'a mut Vec, + buf: &'a mut Buf, indirect: bool, indent: u8, len: i32, } writer!(Dict: |obj| { - obj.buf.extend(b"<<"); + obj.buf.extend_slice(b"<<"); Self { buf: obj.buf, indirect: obj.indirect, @@ -721,15 +734,17 @@ impl<'a> Dict<'a> { impl Drop for Dict<'_> { #[inline] fn drop(&mut self) { + self.buf.limits.register_dict_entries(self.len as usize); + if self.len != 0 { self.buf.push(b'\n'); for _ in 0..self.indent - 2 { self.buf.push(b' '); } } - self.buf.extend(b">>"); + self.buf.extend_slice(b">>"); if self.indirect { - self.buf.extend(b"\nendobj\n\n"); + self.buf.extend_slice(b"\nendobj\n\n"); } } } @@ -847,11 +862,14 @@ impl<'a> Stream<'a> { impl Drop for Stream<'_> { fn drop(&mut self) { - self.dict.buf.extend(b"\n>>"); - self.dict.buf.extend(b"\nstream\n"); - self.dict.buf.extend(self.data.as_ref()); - self.dict.buf.extend(b"\nendstream"); - self.dict.buf.extend(b"\nendobj\n\n"); + let dict_len = self.dict.len as usize; + self.dict.buf.limits.register_dict_entries(dict_len); + + self.dict.buf.extend_slice(b"\n>>"); + self.dict.buf.extend_slice(b"\nstream\n"); + self.dict.buf.extend_slice(self.data.as_ref()); + self.dict.buf.extend_slice(b"\nendstream"); + self.dict.buf.extend_slice(b"\nendobj\n\n"); } } diff --git a/src/renumber.rs b/src/renumber.rs index cca7791..e564cd1 100644 --- a/src/renumber.rs +++ b/src/renumber.rs @@ -1,9 +1,12 @@ -use crate::{BufExt, Chunk, Ref}; +use crate::buf::Buf; +use crate::{Chunk, Ref}; /// Renumbers a chunk of objects. /// /// See [`Chunk::renumber`] for more details. pub fn renumber(source: &Chunk, target: &mut Chunk, mapping: &mut dyn FnMut(Ref) -> Ref) { + target.buf.limits.merge(source.limits()); + let mut iter = source.offsets.iter().copied().peekable(); while let Some((id, offset)) = iter.next() { let new = mapping(id); @@ -14,9 +17,9 @@ pub fn renumber(source: &Chunk, target: &mut Chunk, mapping: &mut dyn FnMut(Ref) target.buf.push_int(new.get()); target.buf.push(b' '); target.buf.push_int(gen); - target.buf.extend(b" obj\n"); + target.buf.extend_slice(b" obj\n"); patch_object(slice, &mut target.buf, mapping); - target.buf.extend(b"\nendobj\n\n"); + target.buf.extend_slice(b"\nendobj\n\n"); } } @@ -43,7 +46,7 @@ fn extract_object(slice: &[u8]) -> Option<(i32, &[u8])> { /// Processes the interior of an indirect object and patches all indirect /// references. -fn patch_object(slice: &[u8], buf: &mut Vec, mapping: &mut dyn FnMut(Ref) -> Ref) { +fn patch_object(slice: &[u8], buf: &mut Buf, mapping: &mut dyn FnMut(Ref) -> Ref) { // Find the next point of interest: // - 'R' is interesting because it could be an indirect reference // - Anything that could contain indirect-reference-like things that are not @@ -62,7 +65,7 @@ fn patch_object(slice: &[u8], buf: &mut Vec, mapping: &mut dyn FnMut(Ref) -> b'R' => { if let Some((head, id, gen)) = validate_ref(&slice[..seen]) { let new = mapping(id); - buf.extend(&slice[written..head]); + buf.extend_slice(&slice[written..head]); buf.push_int(new.get()); buf.push(b' '); buf.push_int(gen); @@ -112,7 +115,7 @@ fn patch_object(slice: &[u8], buf: &mut Vec, mapping: &mut dyn FnMut(Ref) -> seen += 1; } - buf.extend(&slice[written..]); + buf.extend_slice(&slice[written..]); } /// Validate a match for an indirect reference. @@ -183,9 +186,10 @@ mod tests { // Manually write an untidy object. c.offsets.push((Ref::new(8), c.buf.len())); - c.buf.extend(b"8 3 obj\n<>%\n\nendobj"); + c.buf.extend_slice(b"8 3 obj\n<>%\n\nendobj"); c.stream(Ref::new(17), b"1 0 R 2 0 R 3 0 R 4 0 R") .pair(Name(b"Ok"), TextStr(")4 0 R")) @@ -202,7 +206,7 @@ mod tests { }); test!( - r.buf, + r.buf.into_bytes(), b"1 0 obj", b"<<", b" /Nested <<",