Skip to content

Commit

Permalink
Add "pdf_data" utility script.
Browse files Browse the repository at this point in the history
  • Loading branch information
deven committed Jun 24, 2022
1 parent 380da31 commit 418b38f
Showing 1 changed file with 130 additions and 0 deletions.
130 changes: 130 additions & 0 deletions pdf_data
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/usr/bin/env perl
#
# Utility script for PDF::Data module.
#

# Require Perl 5.16; enable warnings.
use v5.16;
use warnings;

# Program version.
use version; our $version = version->declare('v1.0.0');

# Initialize modules.
use File::Basename qw[basename];
use FindBin;
use Getopt::Long qw[:config gnu_compat bundling no_getopt_compat require_order];
use IO::Handle qw[autoflush];
use POSIX qw[strftime];

# Use local library path if PDF::Data module exists there.
use if -f "$FindBin::Bin/lib/PDF/Data.pm", "lib", "$FindBin::Bin/lib";

# Load PDF::Data module.
use PDF::Data;

# Get program name.
our $program = basename $0;

# List of command-line options.
our @option_list = (
"verbose|v" => \( our $verbose = "" ),
"inplace|i" => \( our $inplace = "" ),

"dump" => \( our $dump = "" ),
"outline" => \( our $outline = "" ),

"find_bbox" => \( our $find_bbox = "" ),
"new_bbox" => \( our $new_bbox = "" ),

"decompress" => \( our $decompress = "" ),

"help|h|?" => \( our $print_help = "" ),
"version|V" => \( our $print_version = "" ),
);

# Help message.
our $help_message = <<EOF;
Usage: $program [-v] [-i] [--dump] [--outline] [--find_bbox] [--new_bbox] [--decompress] [<input.pdf>] [<output.pdf>]\n";
Utility script for PDF::Data module.
Options:
--verbose (-v) Enable verbose mode.
--inplace (-i) Rewrite PDF file in place.
--dump Dump complete parsed PDF data structure.
--outline Dump outline of parsed PDF data structure.
--find_bbox Find the bounding box of the PDF file.
--new_bbox Generate a new bounding box for the PDF file.
--decompress Decompress all compressed PDF stream objects.
--help (-h) Print this help message.
--version (-V) Print the version number of this program.
EOF

# Extract usage message from help message.
our ($usage) = $help_message =~ /\A\s*(Usage: .*?\n)\n/s or die "$program: Usage message missing!\n";

# Parse command-line options.
GetOptions(@option_list) or die $usage;

# For standard --help and --version options, print message and exit.
if ($print_help or $print_version) {
print "$program $version\n";
print $help_message if $print_help;
exit 0;
}

# Don't buffer output.
autoflush STDOUT;

# Get input and output PDF filenames from command-line arguments.
my $input_pdf = shift @ARGV || "-";
my $output_pdf = shift @ARGV || "-";

# Override output filename if --inplace is specified.
if ($inplace) {
die "Error: --inplace is incompatible with --dump and --outline!\n" if $dump or $outline;
$output_pdf = $input_pdf;
}

# Get current timestamp.
my $time = time;

# Attempt to get file modification timestamp.
unless ($input_pdf eq "-") {
die "$input_pdf $!\n" unless -e $input_pdf;
$time = (stat _)[9];
}

# Parse input PDF file.
my $pdf = PDF::Data->read_pdf($input_pdf);
printf STDERR "\nSuccessfully parsed PDF file %s.\n\n", ($input_pdf eq "-" ? "on standard input" : "\"$input_pdf\"");

# Find bounding box or generate new bounding box, if requested.
$pdf->find_bbox($pdf, $new_bbox) if $find_bbox or $new_bbox;

# Determine output mode.
if ($dump) {
# Dump PDF structure.
$pdf->dump_pdf($output_pdf, "dump");
} elsif ($outline) {
# Dump outline of PDF structure.
$pdf->dump_pdf($output_pdf, "outline");
} else {
# Rename original PDF file to a backup filename for --inplace.
if ($inplace and $output_pdf ne "-") {
rename $output_pdf, "$output_pdf.bak" or die "$output_pdf -> $output_pdf.bak: $!\n";
print STDERR "Renamed original PDF file \"$output_pdf\" to \"$output_pdf.bak\".\n\n";
}

# Generate output PDF.
$pdf->{-compress} = $pdf->{-minify} = 0;
$pdf->{-decompress} = 1 if $decompress;
$pdf->write_pdf($output_pdf, $time);
}

0 comments on commit 418b38f

Please sign in to comment.