-
Notifications
You must be signed in to change notification settings - Fork 241
/
PdfDocument.cs
310 lines (273 loc) · 12 KB
/
PdfDocument.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
namespace UglyToad.PdfPig
{
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using AcroForms;
using Content;
using Core;
using CrossReference;
using Encryption;
using Exceptions;
using Filters;
using Parser;
using Tokenization.Scanner;
using Tokens;
using Outline;
using Outline.Destinations;
/// <inheritdoc />
/// <summary>
/// Provides access to document level information for this PDF document as well as access to the <see cref="T:UglyToad.PdfPig.Content.Page"/>s contained in the document.
/// </summary>
public class PdfDocument : IDisposable
{
private bool isDisposed;
private readonly Lazy<AcroForm> documentForm;
private readonly HeaderVersion version;
private readonly IInputBytes inputBytes;
private readonly EncryptionDictionary? encryptionDictionary;
private readonly IPdfTokenScanner pdfScanner;
private readonly ILookupFilterProvider filterProvider;
private readonly BookmarksProvider bookmarksProvider;
private readonly ParsingOptions parsingOptions;
private readonly Pages pages;
private readonly NamedDestinations namedDestinations;
/// <summary>
/// The metadata associated with this document.
/// </summary>
public DocumentInformation Information { get; }
/// <summary>
/// Access to the underlying raw structure of the document.
/// </summary>
public Structure Structure { get; }
/// <summary>
/// Access to rare or advanced features of the PDF specification.
/// </summary>
public AdvancedPdfDocumentAccess Advanced { get; }
/// <summary>
/// The version number of the PDF specification which this file conforms to, for example 1.4.
/// </summary>
public double Version => version.Version;
/// <summary>
/// Get the number of pages in this document.
/// </summary>
public int NumberOfPages => pages.Count;
/// <summary>
/// Whether the document content is encrypted.
/// </summary>
[MemberNotNullWhen(true, nameof(encryptionDictionary))]
public bool IsEncrypted => encryptionDictionary != null;
internal PdfDocument(
IInputBytes inputBytes,
HeaderVersion version,
CrossReferenceTable crossReferenceTable,
Catalog catalog,
DocumentInformation information,
EncryptionDictionary? encryptionDictionary,
IPdfTokenScanner pdfScanner,
ILookupFilterProvider filterProvider,
AcroFormFactory acroFormFactory,
BookmarksProvider bookmarksProvider,
ParsingOptions parsingOptions)
{
this.inputBytes = inputBytes;
this.version = version ?? throw new ArgumentNullException(nameof(version));
this.encryptionDictionary = encryptionDictionary;
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
this.bookmarksProvider = bookmarksProvider ?? throw new ArgumentNullException(nameof(bookmarksProvider));
this.parsingOptions = parsingOptions;
Information = information ?? throw new ArgumentNullException(nameof(information));
pages = catalog.Pages;
namedDestinations = catalog.NamedDestinations;
Structure = new Structure(catalog, crossReferenceTable, pdfScanner);
Advanced = new AdvancedPdfDocumentAccess(pdfScanner, filterProvider, catalog);
documentForm = new Lazy<AcroForm>(() => acroFormFactory.GetAcroForm(catalog)!);
}
/// <summary>
/// Creates a <see cref="PdfDocument"/> for reading from the provided file bytes.
/// </summary>
/// <param name="fileBytes">The bytes of the PDF file.</param>
/// <param name="options">Optional parameters controlling parsing.</param>
/// <returns>A <see cref="PdfDocument"/> providing access to the file contents.</returns>
public static PdfDocument Open(byte[] fileBytes, ParsingOptions? options = null) => PdfDocumentFactory.Open(fileBytes, options);
/// <summary>
/// Opens a file and creates a <see cref="PdfDocument"/> for reading from the provided file path.
/// </summary>
/// <param name="filePath">The full path to the file location of the PDF file.</param>
/// <param name="options">Optional parameters controlling parsing.</param>
/// <returns>A <see cref="PdfDocument"/> providing access to the file contents.</returns>
public static PdfDocument Open(string filePath, ParsingOptions? options = null) => PdfDocumentFactory.Open(filePath, options);
/// <summary>
/// Creates a <see cref="PdfDocument"/> for reading from the provided stream.
/// The caller must manage disposing the stream. The created PdfDocument will not dispose the stream.
/// </summary>
/// <param name="stream">
/// A stream of the file contents, this must support reading and seeking.
/// The PdfDocument will not dispose of the provided stream.
/// </param>
/// <param name="options">Optional parameters controlling parsing.</param>
/// <returns>A <see cref="PdfDocument"/> providing access to the file contents.</returns>
public static PdfDocument Open(Stream stream, ParsingOptions? options = null) => PdfDocumentFactory.Open(stream, options);
/// <summary>
/// Add a page factory.
/// </summary>
public void AddPageFactory<TPage>(IPageFactory<TPage> pageFactory)
{
pages.AddPageFactory(pageFactory);
}
/// <summary>
/// Add a page factory.
/// </summary>
#if NET
public void AddPageFactory<TPage, [System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicConstructors)] TPageFactory>() where TPageFactory : IPageFactory<TPage>
#else
public void AddPageFactory<TPage, TPageFactory>() where TPageFactory : IPageFactory<TPage>
#endif
{
pages.AddPageFactory<TPage, TPageFactory>();
}
/// <summary>
/// Get the page with the specified page number (1 indexed).
/// </summary>
/// <param name="pageNumber">The number of the page to return, this starts from 1.</param>
/// <returns>The page.</returns>
public Page GetPage(int pageNumber)
{
if (isDisposed)
{
throw new ObjectDisposedException("Cannot access page after the document is disposed.");
}
parsingOptions.Logger.Debug($"Accessing page {pageNumber}.");
try
{
return pages.GetPage(pageNumber, namedDestinations, parsingOptions);
}
catch (Exception ex)
{
if (IsEncrypted)
{
throw new PdfDocumentEncryptedException("Document was encrypted which may have caused error when retrieving page.", encryptionDictionary, ex);
}
throw;
}
}
/// <summary>
/// Get the page with the specified page number (1 indexed), using the specified page factory.
/// </summary>
/// <typeparam name="TPage"></typeparam>
/// <param name="pageNumber">The number of the page to return, this starts from 1.</param>
/// <returns>The page.</returns>
public TPage GetPage<TPage>(int pageNumber)
{
if (isDisposed)
{
throw new ObjectDisposedException("Cannot access page after the document is disposed.");
}
parsingOptions.Logger.Debug($"Accessing page {pageNumber}.");
try
{
return pages.GetPage<TPage>(pageNumber, namedDestinations, parsingOptions);
}
catch (Exception ex)
{
if (IsEncrypted)
{
throw new PdfDocumentEncryptedException("Document was encrypted which may have caused error when retrieving page.", encryptionDictionary!, ex);
}
throw;
}
}
/// <summary>
/// Gets all pages in this document in order.
/// </summary>
public IEnumerable<Page> GetPages()
{
for (var i = 0; i < NumberOfPages; i++)
{
yield return GetPage(i + 1);
}
}
/// <summary>
/// Gets all pages in this document in order, using the specified page factory.
/// </summary>
public IEnumerable<TPage> GetPages<TPage>()
{
for (var i = 0; i < NumberOfPages; i++)
{
yield return GetPage<TPage>(i + 1);
}
}
/// <summary>
/// Get the document level metadata if present.
/// The metadata is XML in the (Extensible Metadata Platform) XMP format.
/// </summary>
/// <remarks>This will throw a <see cref="ObjectDisposedException"/> if called on a disposed <see cref="PdfDocument"/>.</remarks>
/// <param name="metadata">The metadata stream if it exists.</param>
/// <returns><see langword="true"/> if the metadata is present, <see langword="false"/> otherwise.</returns>
public bool TryGetXmpMetadata([NotNullWhen(true)] out XmpMetadata? metadata)
{
if (isDisposed)
{
throw new ObjectDisposedException("Cannot access the document metadata after the document is disposed.");
}
metadata = null;
if (!Structure.Catalog.CatalogDictionary.TryGet(NameToken.Metadata, pdfScanner, out StreamToken? xmpStreamToken))
{
return false;
}
metadata = new XmpMetadata(xmpStreamToken, filterProvider, pdfScanner);
return true;
}
/// <summary>
/// Gets the bookmarks if this document contains some.
/// </summary>
/// <remarks>This will throw a <see cref="ObjectDisposedException"/> if called on a disposed <see cref="PdfDocument"/>.</remarks>
public bool TryGetBookmarks([NotNullWhen(true)] out Bookmarks? bookmarks)
{
if (isDisposed)
{
throw new ObjectDisposedException("Cannot access the bookmarks after the document is disposed.");
}
bookmarks = bookmarksProvider.GetBookmarks(Structure.Catalog);
return bookmarks != null;
}
/// <summary>
/// Gets the form if this document contains one.
/// </summary>
/// <remarks>This will throw a <see cref="ObjectDisposedException"/> if called on a disposed <see cref="PdfDocument"/>.</remarks>
/// <returns>An <see cref="AcroForm"/> from the document or <see langword="null"/> if not present.</returns>
public bool TryGetForm(out AcroForm form)
{
if (isDisposed)
{
throw new ObjectDisposedException("Cannot access the form after the document is disposed.");
}
form = documentForm.Value;
return form != null;
}
/// <inheritdoc />
/// <summary>
/// Dispose the <see cref="T:UglyToad.PdfPig.PdfDocument" /> and close any unmanaged resources.
/// </summary>
public void Dispose()
{
try
{
Advanced.Dispose();
pdfScanner.Dispose();
inputBytes.Dispose();
pages.Dispose();
}
catch (Exception ex)
{
parsingOptions.Logger.Error("Failed disposing the PdfDocument due to an error.", ex);
}
finally
{
isDisposed = true;
}
}
}
}