Skip to content

Commit

Permalink
Improve page range parser (#46)
Browse files Browse the repository at this point in the history
  • Loading branch information
jerbob92 authored Jul 19, 2024
1 parent 1f500d3 commit b0758d0
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 25 deletions.
50 changes: 27 additions & 23 deletions pdf/pdf.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,54 +48,54 @@ func NormalizePageRange(pageCount int, pageRange string, ignoreInvalidPages bool
} else if strings.HasPrefix(pageRangeParts[pageRangePartI], "r") {
parsedPageNumber, err := strconv.Atoi(strings.TrimPrefix(pageRangeParts[pageRangePartI], "r"))
if err != nil {
if ignoreInvalidPages {
continue
}
return nil, nil, fmt.Errorf("%s is not a valid page number", strings.TrimPrefix(pageRangeParts[pageRangePartI], "r"))
}

if pageCount-parsedPageNumber < 1 || pageCount-parsedPageNumber > pageCount {
if ignoreInvalidPages {
pageNumbers = append(pageNumbers, 1)
continue
}
return nil, nil, fmt.Errorf("%d is not a valid page number, the document has %d page(s)", pageCount-parsedPageNumber, pageCount)
}

pageNumbers = append(pageNumbers, pageCount-parsedPageNumber)
} else {
parsedPageNumber, err := strconv.Atoi(pageRangeParts[pageRangePartI])
if err != nil {
if ignoreInvalidPages {
continue
}
return nil, nil, fmt.Errorf("%s is not a valid page number", pageRangeParts[pageRangePartI])
}

if parsedPageNumber < 1 || parsedPageNumber > pageCount {
if ignoreInvalidPages {
pageNumbers = append(pageNumbers, pageCount)
continue
}
return nil, nil, fmt.Errorf("%s is not a valid page number, the document has %d page(s)", pageRangeParts[pageRangePartI], pageCount)
}

pageNumbers = append(pageNumbers, parsedPageNumber)
}
}

if len(pageNumbers) == 0 {
continue
} else if len(pageNumbers) == 1 {
if pageNumbers[0] < 1 || pageNumbers[0] > pageCount {
if ignoreInvalidPages {
continue
}
return nil, nil, fmt.Errorf("%d is not a valid page number, the document has %d page(s)", pageNumbers[0], pageCount)
}

_, seen := seenPageNumbers[pageNumbers[0]]
if !seen {
// Only 1 page number.
seenPageNumbers[pageNumbers[0]] = true
calculatedPageNumbers = append(calculatedPageNumbers, strconv.Itoa(pageNumbers[0]))
}
} else {
// If the end page number is lower than the start page number,
// ignore the whole page range.
if pageNumbers[1] < pageNumbers[0] {
if ignoreInvalidPages {
continue
}
return nil, nil, fmt.Errorf("%d is not a valid page number, the document has %d page(s)", pageNumbers[1], pageCount)
}

// A page range, a start and end number. Tokens should be replaced by earlier logic.
for i := pageNumbers[0]; i <= pageNumbers[1]; i++ {
if i < 1 || i > pageCount {
if ignoreInvalidPages {
continue
}
return nil, nil, fmt.Errorf("%d is not a valid page number, the document has %d page(s)", i, pageCount)
}

_, seen := seenPageNumbers[i]
if !seen {
seenPageNumbers[i] = true
Expand All @@ -105,6 +105,10 @@ func NormalizePageRange(pageCount int, pageRange string, ignoreInvalidPages bool
}
}

if len(calculatedPageNumbers) == 0 {
return nil, nil, fmt.Errorf("the page range(s) resulted in no valid pages")
}

pageRange = strings.Join(calculatedPageNumbers, ",")
calculatedPageCount = len(calculatedPageNumbers)

Expand Down
28 changes: 26 additions & 2 deletions pdf/pdf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,23 @@ func TestNormalizePageRange(t *testing.T) {
"1-10",
false,
"1,2,3,4,5",
"10 is not a valid page number, the document has 5 page(s)",
"6 is not a valid page number, the document has 5 page(s)",
},
{
"test out of range page-range",
20,
"1,2,4-22",
false,
"1,2,3,4,5",
"22 is not a valid page number, the document has 20 page(s)",
"21 is not a valid page number, the document has 20 page(s)",
},
{
"test out of range page-range",
20,
"1,2,25",
false,
"1,2,3,4,5",
"25 is not a valid page number, the document has 20 page(s)",
},
{
"test reverse page-range",
Expand Down Expand Up @@ -77,6 +85,22 @@ func TestNormalizePageRange(t *testing.T) {
"3,4,5,2",
"",
},
{
"test ignore invalid pages with no valid output",
5,
"r2-r6",
true,
"",
"the page range(s) resulted in no valid pages",
},
{
"test ignore invalid pages",
5,
"r6-r2",
true,
"1,2,3",
"",
},
}

for i := range tests {
Expand Down

0 comments on commit b0758d0

Please sign in to comment.