Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pipeline bug, doc bugs, auto split new URL and doc #2906

Merged
merged 6 commits into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ ext {
}

group = "stirling.software"
version = "0.40.2"
version = "0.41.0"

java {
// 17 is lowest but we support and recommend 21
Expand Down
Binary file removed docs/stirling-pdf.png
Binary file not shown.
1 change: 0 additions & 1 deletion docs/stirling-transparent.svg

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

Expand Down Expand Up @@ -41,8 +43,12 @@
@Tag(name = "Misc", description = "Miscellaneous APIs")
public class AutoSplitPdfController {

private static final String QR_CONTENT = "https://github.com/Stirling-Tools/Stirling-PDF";
private static final String QR_CONTENT_OLD = "https://github.com/Frooodle/Stirling-PDF";
private static final Set<String> VALID_QR_CONTENTS =
new HashSet<>(
Set.of(
"https://github.com/Stirling-Tools/Stirling-PDF",
"https://github.com/Frooodle/Stirling-PDF",
"https://stirlingpdf.com"));

private final CustomPDDocumentFactory pdfDocumentFactory;

Expand Down Expand Up @@ -120,13 +126,14 @@ public ResponseEntity<byte[]> autoSplitPdf(@ModelAttribute AutoSplitPdfRequest r
for (int page = 0; page < document.getNumberOfPages(); ++page) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 150);
String result = decodeQRCode(bim);
if ((QR_CONTENT.equals(result) || QR_CONTENT_OLD.equals(result)) && page != 0) {

boolean isValidQrCode = VALID_QR_CONTENTS.contains(result);
log.debug("detected qr code {}, code is vale={}", result, isValidQrCode);
if (isValidQrCode && page != 0) {
splitDocuments.add(new PDDocument());
}

if (!splitDocuments.isEmpty()
&& !QR_CONTENT.equals(result)
&& !QR_CONTENT_OLD.equals(result)) {
if (!splitDocuments.isEmpty() && !isValidQrCode) {
splitDocuments.get(splitDocuments.size() - 1).addPage(document.getPage(page));
} else if (page == 0) {
PDDocument firstDocument = new PDDocument();
Expand All @@ -135,7 +142,7 @@ public ResponseEntity<byte[]> autoSplitPdf(@ModelAttribute AutoSplitPdfRequest r
}

// If duplexMode is true and current page is a divider, then skip next page
if (duplexMode && (QR_CONTENT.equals(result) || QR_CONTENT_OLD.equals(result))) {
if (duplexMode && isValidQrCode) {
page++;
}
}
Expand Down Expand Up @@ -168,6 +175,9 @@ public ResponseEntity<byte[]> autoSplitPdf(@ModelAttribute AutoSplitPdfRequest r

return WebResponseUtils.bytesToWebResponse(
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
} catch (Exception e) {
log.error("Error in auto split", e);
throw e;
} finally {
// Clean up resources
if (document != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public class ExtractImagesController {
@Operation(
summary = "Extract images from a PDF file",
description =
"This endpoint extracts images from a given PDF file and returns them in a zip file. Users can specify the output image format. Input: PDF Output: IMAGE/ZIP Type: SIMO")
"This endpoint extracts images from a given PDF file and returns them in a zip file. Users can specify the output image format. Input:PDF Output:IMAGE/ZIP Type:SIMO")
public ResponseEntity<byte[]> extractImages(@ModelAttribute PDFExtractImagesRequest request)
throws IOException, InterruptedException, ExecutionException {
MultipartFile file = request.getFileInput();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public FlattenController(CustomPDDocumentFactory pdfDocumentFactory) {
@Operation(
summary = "Flatten PDF form fields or full page",
description =
"Flattening just PDF form fields or converting each page to images to make text unselectable. Input: PDF, Output: PDF. Type: SISO")
"Flattening just PDF form fields or converting each page to images to make text unselectable. Input:PDF, Output:PDF. Type:SISO")
public ResponseEntity<byte[]> flatten(@ModelAttribute FlattenRequest request) throws Exception {
MultipartFile file = request.getFileInput();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import io.swagger.v3.oas.annotations.Operation;

import javax.imageio.ImageIO;

import org.apache.pdfbox.multipdf.PDFMergerUtility;
Expand All @@ -26,6 +26,7 @@

import io.github.pixee.security.BoundedLineReader;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;

import lombok.extern.slf4j.Slf4j;
Expand Down Expand Up @@ -65,9 +66,10 @@ public List<String> getAvailableTesseractLanguages() {
}

@PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf")
@Operation(
summary = "Process PDF files with OCR using Tesseract",
description = "Takes a PDF file as input, performs OCR using specified languages and OCR type (skip-text/force-ocr), and returns the processed PDF. Input:PDF Output:PDF Type:SISO")
@Operation(
summary = "Process PDF files with OCR using Tesseract",
description =
"Takes a PDF file as input, performs OCR using specified languages and OCR type (skip-text/force-ocr), and returns the processed PDF. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> processPdfWithOCR(
@ModelAttribute ProcessPdfWithOcrRequest request)
throws IOException, InterruptedException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.PipelineConfig;
import stirling.software.SPDF.model.PipelineResult;
import stirling.software.SPDF.model.api.HandleDataRequest;
import stirling.software.SPDF.utils.WebResponseUtils;

Expand Down Expand Up @@ -58,7 +59,8 @@ public ResponseEntity<byte[]> handleData(@ModelAttribute HandleDataRequest reque
if (inputFiles == null || inputFiles.size() == 0) {
return null;
}
List<Resource> outputFiles = processor.runPipelineAgainstFiles(inputFiles, config);
PipelineResult result = processor.runPipelineAgainstFiles(inputFiles, config);
List<Resource> outputFiles = result.getOutputFiles();
if (outputFiles != null && outputFiles.size() == 1) {
// If there is only one file, return it directly
Resource singleFile = outputFiles.get(0);
Expand Down
Loading