From 1f500d30629ebc0e79fbce483efb3cfc1cef4f0d Mon Sep 17 00:00:00 2001 From: Josias Montag Date: Tue, 16 Jul 2024 14:55:37 +0200 Subject: [PATCH] =?UTF-8?q?Add=20=E2=80=94ignore-invalid-pages=20option=20?= =?UTF-8?q?(#45)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add —ignore-invalid-pages option * Update pdf.go change CLI option description --------- Co-authored-by: jerbob92 --- cmd/explode.go | 2 +- cmd/images.go | 2 +- cmd/merge.go | 2 +- cmd/pdf.go | 8 +++++--- cmd/render.go | 2 +- cmd/text.go | 2 +- cmd/thumbnails.go | 2 +- pdf/pdf.go | 16 +++++++++++++++- pdf/pdf_test.go | 28 ++++++++++++++++++++++------ 9 files changed, 48 insertions(+), 16 deletions(-) diff --git a/cmd/explode.go b/cmd/explode.go index 2adcbad..b5b5b91 100644 --- a/cmd/explode.go +++ b/cmd/explode.go @@ -66,7 +66,7 @@ var explodeCmd = &cobra.Command{ pageRange = cmd.Flag("pages").Value.String() } - parsedPageRange, _, err := pdf.NormalizePageRange(pageCount.PageCount, pageRange) + parsedPageRange, _, err := pdf.NormalizePageRange(pageCount.PageCount, pageRange, ignoreInvalidPages) if err != nil { handleError(cmd, fmt.Errorf("invalid page range '%s': %w\n", pageRange, err), ExitCodeInvalidPageRange) return diff --git a/cmd/images.go b/cmd/images.go index c46294d..09cded5 100644 --- a/cmd/images.go +++ b/cmd/images.go @@ -86,7 +86,7 @@ var imagesCmd = &cobra.Command{ pageRange = pages } - parsedPageRange, _, err := pdf.NormalizePageRange(pageCount.PageCount, pageRange) + parsedPageRange, _, err := pdf.NormalizePageRange(pageCount.PageCount, pageRange, ignoreInvalidPages) if err != nil { handleError(cmd, fmt.Errorf("invalid page range '%s': %w\n", pageRange, err), ExitCodeInvalidPageRange) return diff --git a/cmd/merge.go b/cmd/merge.go index 13df834..6abcc16 100644 --- a/cmd/merge.go +++ b/cmd/merge.go @@ -90,7 +90,7 @@ var mergeCmd = &cobra.Command{ return } - pageRange, calculatedPageCount, err := pdf.NormalizePageRange(pageCount.PageCount, "first-last") + pageRange, calculatedPageCount, err := pdf.NormalizePageRange(pageCount.PageCount, "first-last", false) if err != nil { closeFunc() handleError(cmd, fmt.Errorf("invalid page range 'first-last': %w\n", err), ExitCodeInvalidPageRange) diff --git a/cmd/pdf.go b/cmd/pdf.go index 368af4a..1602700 100644 --- a/cmd/pdf.go +++ b/cmd/pdf.go @@ -17,9 +17,10 @@ import ( var ( // Used for flags. - password string - stdFileDelimiter string - pages string + password string + stdFileDelimiter string + pages string + ignoreInvalidPages bool ) func addGenericPDFOptions(command *cobra.Command) { @@ -29,6 +30,7 @@ func addGenericPDFOptions(command *cobra.Command) { func addPagesOption(intro string, command *cobra.Command) { command.Flags().StringVarP(&pages, "pages", "", "first-last", intro+". Ranges are like '1-3,5', which will result in a PDF file with pages 1, 2, 3 and 5. You can use the keywords first and last. You can prepend a page number with r to start counting from the end. Examples: use '2-last' for the second page until the last page, use '3-r1' for page 3 until the second-last page.") + command.Flags().BoolVarP(&ignoreInvalidPages, "ignore-invalid-pages", "", false, "Ignore non-existing pages in the pages option.") } func isExperimentalError(err error) bool { diff --git a/cmd/render.go b/cmd/render.go index ad82e85..2e3a31c 100644 --- a/cmd/render.go +++ b/cmd/render.go @@ -88,7 +88,7 @@ var renderCmd = &cobra.Command{ pageRange = pages } - parsedPageRange, _, err := pdf.NormalizePageRange(pageCount.PageCount, pageRange) + parsedPageRange, _, err := pdf.NormalizePageRange(pageCount.PageCount, pageRange, ignoreInvalidPages) if err != nil { handleError(cmd, fmt.Errorf("invalid page range '%s': %w\n", pageRange, err), ExitCodeInvalidPageRange) return diff --git a/cmd/text.go b/cmd/text.go index 68d99af..c97c291 100644 --- a/cmd/text.go +++ b/cmd/text.go @@ -96,7 +96,7 @@ var textCmd = &cobra.Command{ pageRange = pages } - parsedPageRange, _, err := pdf.NormalizePageRange(pageCount.PageCount, pageRange) + parsedPageRange, _, err := pdf.NormalizePageRange(pageCount.PageCount, pageRange, ignoreInvalidPages) if err != nil { handleError(cmd, fmt.Errorf("invalid page range '%s': %w\n", pageRange, err), ExitCodeInvalidPageRange) return diff --git a/cmd/thumbnails.go b/cmd/thumbnails.go index 9bbf95b..c6a66cf 100644 --- a/cmd/thumbnails.go +++ b/cmd/thumbnails.go @@ -82,7 +82,7 @@ var thumbnailsCmd = &cobra.Command{ pageRange = pages } - parsedPageRange, _, err := pdf.NormalizePageRange(pageCount.PageCount, pageRange) + parsedPageRange, _, err := pdf.NormalizePageRange(pageCount.PageCount, pageRange, ignoreInvalidPages) if err != nil { handleError(cmd, fmt.Errorf("invalid page range '%s': %w\n", pageRange, err), ExitCodeInvalidPageRange) return diff --git a/pdf/pdf.go b/pdf/pdf.go index 70a7e07..7fdaaed 100644 --- a/pdf/pdf.go +++ b/pdf/pdf.go @@ -27,7 +27,7 @@ func ClosePdfium() { // supports simple instructions like 1-5 or just a page number. This method // can automatically calculate ends and reverse pages for example. // This way we can also properly validate page ranges. -func NormalizePageRange(pageCount int, pageRange string) (*string, *int, error) { +func NormalizePageRange(pageCount int, pageRange string, ignoreInvalidPages bool) (*string, *int, error) { calculatedPageCount := 0 var calculatedPageNumbers []string seenPageNumbers := map[int]bool{} @@ -48,10 +48,17 @@ func NormalizePageRange(pageCount int, pageRange string) (*string, *int, error) } else if strings.HasPrefix(pageRangeParts[pageRangePartI], "r") { parsedPageNumber, err := strconv.Atoi(strings.TrimPrefix(pageRangeParts[pageRangePartI], "r")) if err != nil { + if ignoreInvalidPages { + continue + } return nil, nil, fmt.Errorf("%s is not a valid page number", strings.TrimPrefix(pageRangeParts[pageRangePartI], "r")) } if pageCount-parsedPageNumber < 1 || pageCount-parsedPageNumber > pageCount { + if ignoreInvalidPages { + pageNumbers = append(pageNumbers, 1) + continue + } return nil, nil, fmt.Errorf("%d is not a valid page number, the document has %d page(s)", pageCount-parsedPageNumber, pageCount) } @@ -59,10 +66,17 @@ func NormalizePageRange(pageCount int, pageRange string) (*string, *int, error) } else { parsedPageNumber, err := strconv.Atoi(pageRangeParts[pageRangePartI]) if err != nil { + if ignoreInvalidPages { + continue + } return nil, nil, fmt.Errorf("%s is not a valid page number", pageRangeParts[pageRangePartI]) } if parsedPageNumber < 1 || parsedPageNumber > pageCount { + if ignoreInvalidPages { + pageNumbers = append(pageNumbers, pageCount) + continue + } return nil, nil, fmt.Errorf("%s is not a valid page number, the document has %d page(s)", pageRangeParts[pageRangePartI], pageCount) } diff --git a/pdf/pdf_test.go b/pdf/pdf_test.go index 068c396..58a21a6 100644 --- a/pdf/pdf_test.go +++ b/pdf/pdf_test.go @@ -6,16 +6,18 @@ import ( func TestNormalizePageRange(t *testing.T) { tests := []struct { - name string - pageCount int - pageRange string - want string - wantErr string + name string + pageCount int + pageRange string + ignoreInvalidPages bool + want string + wantErr string }{ { "test first-last", 5, "first-last", + false, "1,2,3,4,5", "", }, @@ -23,6 +25,7 @@ func TestNormalizePageRange(t *testing.T) { "test page-range", 5, "1-5", + false, "1,2,3,4,5", "", }, @@ -30,6 +33,7 @@ func TestNormalizePageRange(t *testing.T) { "test out of range page-range", 5, "1-10", + false, "1,2,3,4,5", "10 is not a valid page number, the document has 5 page(s)", }, @@ -37,6 +41,7 @@ func TestNormalizePageRange(t *testing.T) { "test out of range page-range", 20, "1,2,4-22", + false, "1,2,3,4,5", "22 is not a valid page number, the document has 20 page(s)", }, @@ -44,6 +49,7 @@ func TestNormalizePageRange(t *testing.T) { "test reverse page-range", 5, "1-r2", + false, "1,2,3", "", }, @@ -51,6 +57,7 @@ func TestNormalizePageRange(t *testing.T) { "test negative reverse page-range", 5, "1-r6", + false, "1,2,3", "-1 is not a valid page number, the document has 5 page(s)", }, @@ -58,14 +65,23 @@ func TestNormalizePageRange(t *testing.T) { "test removal of duplicate pages", 5, "1-3,first-last,2,3", + false, "1,2,3,4,5", "", }, + { + "test ignore invalid pages", + 5, + "3-10,6,2", + true, + "3,4,5,2", + "", + }, } for i := range tests { t.Run(tests[i].name, func(t *testing.T) { - normalizedPageRange, _, err := NormalizePageRange(tests[i].pageCount, tests[i].pageRange) + normalizedPageRange, _, err := NormalizePageRange(tests[i].pageCount, tests[i].pageRange, tests[i].ignoreInvalidPages) if tests[i].wantErr == "" && err != nil { t.Errorf("expected no error but got error %s", err.Error()) } else if tests[i].wantErr != "" && err == nil {