Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add vision ocr for pdf/tiff #1078

Merged
merged 8 commits into from
Apr 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions vision/beta/cloud-client/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,9 @@ mvn exec:java -DDetect -Dexec.args="web-entities-include-geo ./resources/landmar
```
mvn exec:java -DDetect -Dexec.args="crop ./resources/landmark.jpg"
```

#### OCR
```
mvn exec:java -DDetect -Dexec.args="ocr gs://java-docs-samples-testing/vision/HodgeConj.pdf \
gs://<BUCKET_ID>/"
```
7 changes: 6 additions & 1 deletion vision/beta/cloud-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-vision</artifactId>
<version>1.22.0</version>
<version>1.24.1</version>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-storage</artifactId>
<version>1.24.1</version>
</dependency>
<!-- [END dependencies] -->

Expand Down
210 changes: 178 additions & 32 deletions vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,44 +16,63 @@

package com.example.vision;

import com.google.cloud.vision.v1p1beta1.AnnotateImageRequest;
import com.google.cloud.vision.v1p1beta1.AnnotateImageResponse;
import com.google.cloud.vision.v1p1beta1.BatchAnnotateImagesResponse;
import com.google.cloud.vision.v1p1beta1.Block;
import com.google.cloud.vision.v1p1beta1.ColorInfo;
import com.google.cloud.vision.v1p1beta1.CropHint;
import com.google.cloud.vision.v1p1beta1.CropHintsAnnotation;
import com.google.cloud.vision.v1p1beta1.DominantColorsAnnotation;
import com.google.cloud.vision.v1p1beta1.EntityAnnotation;
import com.google.cloud.vision.v1p1beta1.FaceAnnotation;
import com.google.cloud.vision.v1p1beta1.Feature;
import com.google.cloud.vision.v1p1beta1.Feature.Type;
import com.google.cloud.vision.v1p1beta1.Image;
import com.google.cloud.vision.v1p1beta1.ImageAnnotatorClient;
import com.google.cloud.vision.v1p1beta1.ImageContext;
import com.google.cloud.vision.v1p1beta1.ImageSource;
import com.google.cloud.vision.v1p1beta1.LocationInfo;
import com.google.cloud.vision.v1p1beta1.Page;
import com.google.cloud.vision.v1p1beta1.Paragraph;
import com.google.cloud.vision.v1p1beta1.SafeSearchAnnotation;
import com.google.cloud.vision.v1p1beta1.Symbol;
import com.google.cloud.vision.v1p1beta1.TextAnnotation;
import com.google.cloud.vision.v1p1beta1.WebDetection;
import com.google.cloud.vision.v1p1beta1.WebDetection.WebEntity;
import com.google.cloud.vision.v1p1beta1.WebDetection.WebImage;
import com.google.cloud.vision.v1p1beta1.WebDetection.WebLabel;
import com.google.cloud.vision.v1p1beta1.WebDetection.WebPage;
import com.google.cloud.vision.v1p1beta1.WebDetectionParams;
import com.google.cloud.vision.v1p1beta1.Word;

import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.Bucket;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.Storage.BlobListOption;
import com.google.cloud.storage.StorageOptions;
import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse;
import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse.Builder;
import com.google.cloud.vision.v1p2beta1.AnnotateImageRequest;
import com.google.cloud.vision.v1p2beta1.AnnotateImageResponse;
import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileRequest;
import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileResponse;
import com.google.cloud.vision.v1p2beta1.AsyncBatchAnnotateFilesResponse;
import com.google.cloud.vision.v1p2beta1.BatchAnnotateImagesResponse;
import com.google.cloud.vision.v1p2beta1.Block;
import com.google.cloud.vision.v1p2beta1.ColorInfo;
import com.google.cloud.vision.v1p2beta1.CropHint;
import com.google.cloud.vision.v1p2beta1.CropHintsAnnotation;
import com.google.cloud.vision.v1p2beta1.DominantColorsAnnotation;
import com.google.cloud.vision.v1p2beta1.EntityAnnotation;
import com.google.cloud.vision.v1p2beta1.FaceAnnotation;
import com.google.cloud.vision.v1p2beta1.Feature;
import com.google.cloud.vision.v1p2beta1.Feature.Type;
import com.google.cloud.vision.v1p2beta1.GcsDestination;
import com.google.cloud.vision.v1p2beta1.GcsSource;
import com.google.cloud.vision.v1p2beta1.Image;
import com.google.cloud.vision.v1p2beta1.ImageAnnotatorClient;
import com.google.cloud.vision.v1p2beta1.ImageContext;
import com.google.cloud.vision.v1p2beta1.ImageSource;
import com.google.cloud.vision.v1p2beta1.InputConfig;
import com.google.cloud.vision.v1p2beta1.LocationInfo;
import com.google.cloud.vision.v1p2beta1.OperationMetadata;
import com.google.cloud.vision.v1p2beta1.OutputConfig;
import com.google.cloud.vision.v1p2beta1.Page;
import com.google.cloud.vision.v1p2beta1.Paragraph;
import com.google.cloud.vision.v1p2beta1.SafeSearchAnnotation;
import com.google.cloud.vision.v1p2beta1.Symbol;
import com.google.cloud.vision.v1p2beta1.TextAnnotation;
import com.google.cloud.vision.v1p2beta1.WebDetection;
import com.google.cloud.vision.v1p2beta1.WebDetection.WebEntity;
import com.google.cloud.vision.v1p2beta1.WebDetection.WebImage;
import com.google.cloud.vision.v1p2beta1.WebDetection.WebLabel;
import com.google.cloud.vision.v1p2beta1.WebDetection.WebPage;
import com.google.cloud.vision.v1p2beta1.WebDetectionParams;
import com.google.cloud.vision.v1p2beta1.Word;
import com.google.protobuf.ByteString;
import com.google.protobuf.util.JsonFormat;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Detect {

Expand All @@ -78,11 +97,16 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
out.println("Usage:");
out.printf(
"\tmvn exec:java -DDetect -Dexec.args=\"<command> <path-to-image>\"\n"
+ "\tmvn exec:java -DDetect -Dexec.args=\"ocr <path-to-file> <path-to-destination>\""
+ "\n"
+ "Commands:\n"
+ "\tfaces | labels | landmarks | logos | text | safe-search | properties"
+ "| web | web-entities | web-entities-include-geo | crop \n"
+ "| web | web-entities | web-entities-include-geo | crop | ocr \n"
+ "Path:\n\tA file path (ex: ./resources/wakeupcat.jpg) or a URI for a Cloud Storage "
+ "resource (gs://...)\n");
+ "resource (gs://...)\n"
+ "Path to File:\n\tA path to the remote file on Cloud Storage (gs://...)\n"
+ "Path to Destination\n\tA path to the remote destination on Cloud Storage for the"
+ " file to be saved. (gs://BUCKET_NAME/PREFIX/)\n");
return;
}
String command = args[0];
Expand Down Expand Up @@ -162,6 +186,9 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
} else {
detectDocumentText(path, out);
}
} else if (command.equals("ocr")) {
String destPath = args.length > 2 ? args[2] : "";
detectDocumentsGcs(path, destPath);
}
}

Expand Down Expand Up @@ -1277,4 +1304,123 @@ public static void detectDocumentTextGcs(String gcsPath, PrintStream out) throws
}
}
// [END vision_detect_document_uri]

// [START vision_async_detect_document_ocr]
/**
* Performs document text OCR with PDF/TIFF as source files on Google Cloud Storage.
*
* @param gcsSourcePath The path to the remote file on Google Cloud Storage to detect document
* text on.
* @param gcsDestinationPath The path to the remote file on Google Cloud Storage to store the
* results on.
* @throws Exception on errors while closing the client.
*/
public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath) throws
Exception {
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
List<AsyncAnnotateFileRequest> requests = new ArrayList<>();

// Set the GCS source path for the remote file.
GcsSource gcsSource = GcsSource.newBuilder()
.setUri(gcsSourcePath)
.build();

// Create the configuration with the specified MIME (Multipurpose Internet Mail Extensions)
// types
InputConfig inputConfig = InputConfig.newBuilder()
.setMimeType("application/pdf") // Supported MimeTypes: "application/pdf", "image/tiff"
.setGcsSource(gcsSource)
.build();

// Set the GCS destination path for where to save the results.
GcsDestination gcsDestination = GcsDestination.newBuilder()
.setUri(gcsDestinationPath)
.build();

// Create the configuration for the output with the batch size.
// The batch size sets how many pages should be grouped into each json output file.
OutputConfig outputConfig = OutputConfig.newBuilder()
.setBatchSize(2)
.setGcsDestination(gcsDestination)
.build();

// Select the Feature required by the vision API
Feature feature = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build();

// Build the OCR request
AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder()
.addFeatures(feature)
.setInputConfig(inputConfig)
.setOutputConfig(outputConfig)
.build();

requests.add(request);

// Perform the OCR request
OperationFuture<AsyncBatchAnnotateFilesResponse, OperationMetadata> response =
client.asyncBatchAnnotateFilesAsync(requests);

System.out.println("Waiting for the operation to finish.");

// Wait for the request to finish. (The result is not used, since the API saves the result to
// the specified location on GCS.)
List<AsyncAnnotateFileResponse> result = response.get(180, TimeUnit.SECONDS)
.getResponsesList();

// Once the request has completed and the output has been
// written to GCS, we can list all the output files.
Storage storage = StorageOptions.getDefaultInstance().getService();

// Get the destination location from the gcsDestinationPath
Pattern pattern = Pattern.compile("gs://([^/]+)/(.+)");
Matcher matcher = pattern.matcher(gcsDestinationPath);

if (matcher.find()) {
String bucketName = matcher.group(1);
String prefix = matcher.group(2);

// Get the list of objects with the given prefix from the GCS bucket
Bucket bucket = storage.get(bucketName);
com.google.api.gax.paging.Page<Blob> pageList = bucket.list(BlobListOption.prefix(prefix));

Blob firstOutputFile = null;

// List objects with the given prefix.
System.out.println("Output files:");
for (Blob blob : pageList.iterateAll()) {
System.out.println(blob.getName());

// Process the first output file from GCS.
// Since we specified batch size = 2, the first response contains
// the first two pages of the input file.
if (firstOutputFile == null) {
firstOutputFile = blob;
}
}

// Get the contents of the file and convert the JSON contents to an AnnotateFileResponse
// object. If the Blob is small read all its content in one request
// (Note: the file is a .json file)
// Storage guide: https://cloud.google.com/storage/docs/downloading-objects
String jsonContents = new String(firstOutputFile.getContent());
Builder builder = AnnotateFileResponse.newBuilder();
JsonFormat.parser().merge(jsonContents, builder);

// Build the AnnotateFileResponse object
AnnotateFileResponse annotateFileResponse = builder.build();

// Parse through the object to get the actual response for the first page of the input file.
AnnotateImageResponse annotateImageResponse = annotateFileResponse.getResponses(0);

// Here we print the full text from the first page.
// The response contains more information:
// annotation/pages/blocks/paragraphs/words/symbols
// including confidence score and bounding boxes
System.out.format("\nText: %s\n", annotateImageResponse.getFullTextAnnotation().getText());
} else {
System.out.println("No MATCH");
}
}
}
// [END vision_async_detect_document_ocr]
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@

import static com.google.common.truth.Truth.assertThat;

import com.google.api.gax.paging.Page;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.Storage.BlobListOption;
import com.google.cloud.storage.StorageOptions;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
Expand All @@ -37,6 +42,7 @@ public class DetectIT {
private Detect app;
private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT");
private static final String BUCKET = PROJECT_ID;
private static final String OUTPUT_PREFIX = "OCR_PDF_TEST_OUTPUT";

@Before
public void setUp() throws IOException {
Expand Down Expand Up @@ -348,4 +354,25 @@ public void testDocumentTextGcs() throws Exception {
assertThat(got).contains("37%");
assertThat(got).contains("Word text: class (confidence:");
}

@Test
public void testDetectDocumentsGcs() throws Exception {
// Act
String[] args = {"ocr", "gs://" + BUCKET + "/vision/HodgeConj.pdf",
"gs://" + BUCKET + "/" + OUTPUT_PREFIX + "/"};
Detect.argsHelper(args, out);

// Assert
String got = bout.toString();
assertThat(got).contains("HODGE'S GENERAL CONJECTURE");

Storage storage = StorageOptions.getDefaultInstance().getService();

Page<Blob> blobs = storage.list(BUCKET, BlobListOption.currentDirectory(),
BlobListOption.prefix(OUTPUT_PREFIX + "/"));

for (Blob blob : blobs.iterateAll()) {
blob.delete();
}
}
}