GoogleCloudPlatform · texasmichelle · Jan 28, 2020 · Jun 2, 2019 · Jun 7, 2019 · Jan 21, 2020
diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images.py b/vision/cloud-client/detect/vision_async_batch_annotate_images.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DO NOT EDIT! This is a generated sample
+#   ("LongRunningPromise",  "vision_async_batch_annotate_images")
+
+# To install the latest published package dependency, execute the following:
+#   pip install google-cloud-vision
+
+# sample-metadata
+#   title: Async Batch Image Annotation
+#   description: Perform async batch image annotation
+#   usage: python3 samples/v1/vision_async_batch_annotate_images.py \
+#     [--input_image_uri \
+#     "gs://cloud-samples-data/vision/label/wakeupcat.jpg"] \
+#     [--output_uri "gs://your-bucket/prefix/"]
+
+# [START vision_async_batch_annotate_images]
+
+from google.cloud import vision_v1
+from google.cloud.vision_v1 import enums
+import six
+
+
+def sample_async_batch_annotate_images(input_image_uri, output_uri):
+    """Perform async batch image annotation"""
+    # [START vision_async_batch_annotate_images_core]
+
+    client = vision_v1.ImageAnnotatorClient()
+
+    # input_image_uri = 'gs://cloud-samples-data/vision/label/wakeupcat.jpg'
+    # output_uri = 'gs://your-bucket/prefix/'
+
+    if isinstance(input_image_uri, six.binary_type):
+        input_image_uri = input_image_uri.decode("utf-8")
+    if isinstance(output_uri, six.binary_type):
+        output_uri = output_uri.decode("utf-8")
+    source = {"image_uri": input_image_uri}
+    image = {"source": source}
+    type_ = enums.Feature.Type.LABEL_DETECTION
+    features_element = {"type": type_}
+    type_2 = enums.Feature.Type.IMAGE_PROPERTIES
+    features_element_2 = {"type": type_2}
+    features = [features_element, features_element_2]
+    requests_element = {"image": image, "features": features}
+    requests = [requests_element]
+    gcs_destination = {"uri": output_uri}
+
+    # The max number of responses to output in each JSON file
+    batch_size = 2
+    output_config = {
+        "gcs_destination": gcs_destination, "batch_size": batch_size}
+
+    operation = client.async_batch_annotate_images(requests, output_config)
+
+    print("Waiting for operation to complete...")
+    response = operation.result()
+
+    # The output is written to GCS with the provided output_uri as prefix
+    gcs_output_uri = response.output_config.gcs_destination.uri
+    print("Output written to GCS with prefix: {}".format(gcs_output_uri))
+
+    # [END vision_async_batch_annotate_images_core]
+
+
+# [END vision_async_batch_annotate_images]
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--input_image_uri",
+        type=str,
+        default="gs://cloud-samples-data/vision/label/wakeupcat.jpg",
+    )
+    parser.add_argument(
+        "--output_uri", type=str, default="gs://your-bucket/prefix/")
+    args = parser.parse_args()
+
+    sample_async_batch_annotate_images(args.input_image_uri, args.output_uri)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py b/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py
@@ -0,0 +1,53 @@
+# Copyright 2020 Google
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import uuid
+
+from google.cloud import storage
+
+import vision_async_batch_annotate_images
+
+RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
+GCS_ROOT = "gs://cloud-samples-data/vision/"
+
+BUCKET = os.environ["CLOUD_STORAGE_BUCKET"]
+OUTPUT_PREFIX = "TEST_OUTPUT_{}".format(uuid.uuid4())
+GCS_DESTINATION_URI = "gs://{}/{}/".format(BUCKET, OUTPUT_PREFIX)
+
+
+def test_sample_asyn_batch_annotate_images(capsys):
+    storage_client = storage.Client()
+    bucket = storage_client.get_bucket(BUCKET)
+    if len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0:
+        for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
+            blob.delete()
+
+    assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0
+
+    input_image_uri = os.path.join(GCS_ROOT, "label/wakeupcat.jpg")
+
+    vision_async_batch_annotate_images.sample_async_batch_annotate_images(
+        input_image_uri=input_image_uri, output_uri=GCS_DESTINATION_URI
+    )
+
+    out, _ = capsys.readouterr()
+
+    assert "Output written to GCS" in out
+    assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0
+
+    for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
+        blob.delete()
+
+    assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0
diff --git a/vision/cloud-client/detect/vision_batch_annotate_files.py b/vision/cloud-client/detect/vision_batch_annotate_files.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DO NOT EDIT! This is a generated sample
+#   ("Request",  "vision_batch_annotate_files")
+
+# To install the latest published package dependency, execute the following:
+#   pip install google-cloud-vision
+
+# sample-metadata
+#   title:
+#   description: Perform batch file annotation
+#   usage: python3 samples/v1/vision_batch_annotate_files.py \
+#     [--file_path "resources/kafka.pdf"]
+
+# [START vision_batch_annotate_files]
+
+from google.cloud import vision_v1
+from google.cloud.vision_v1 import enums
+import io
+import six
+
+
+def sample_batch_annotate_files(file_path):
+    """
+    Perform batch file annotation
+
+    Args:
+      file_path Path to local pdf file, e.g. /path/document.pdf
+    """
+    # [START vision_batch_annotate_files_core]
+
+    client = vision_v1.ImageAnnotatorClient()
+
+    # file_path = 'resources/kafka.pdf'
+
+    if isinstance(file_path, six.binary_type):
+        file_path = file_path.decode("utf-8")
+
+    # Supported mime_type: application/pdf, image/tiff, image/gif
+    mime_type = "application/pdf"
+    with io.open(file_path, "rb") as f:
+        content = f.read()
+    input_config = {"mime_type": mime_type, "content": content}
+    type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION
+    features_element = {"type": type_}
+    features = [features_element]
+
+    # The service can process up to 5 pages per document file. Here we specify
+    # the first, second, and last page of the document to be processed.
+    pages_element = 1
+    pages_element_2 = 2
+    pages_element_3 = -1
+    pages = [pages_element, pages_element_2, pages_element_3]
+    requests_element = {
+        "input_config": input_config,
+        "features": features,
+        "pages": pages,
+    }
+    requests = [requests_element]
+
+    response = client.batch_annotate_files(requests)
+    for image_response in response.responses[0].responses:
+        print(
+            u"Full text: {}".format(image_response.full_text_annotation.text))
+        for page in image_response.full_text_annotation.pages:
+            for block in page.blocks:
+                print(u"\nBlock confidence: {}".format(block.confidence))
+                for par in block.paragraphs:
+                    print(u"\tParagraph confidence: {}".format(par.confidence))
+                    for word in par.words:
+                        print(
+                            u"\t\tWord confidence: {}".format(word.confidence))
+                        for symbol in word.symbols:
+                            print(
+                                u"\t\t\tSymbol: {}, (confidence: {})".format(
+                                    symbol.text, symbol.confidence
+                                )
+                            )
+
+    # [END vision_batch_annotate_files_core]
+
+
+# [END vision_batch_annotate_files]
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--file_path", type=str, default="resources/kafka.pdf")
+    args = parser.parse_args()
+
+    sample_batch_annotate_files(args.file_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DO NOT EDIT! This is a generated sample
+#   ("Request",  "vision_batch_annotate_files_gcs")
+
+# To install the latest published package dependency, execute the following:
+#   pip install google-cloud-vision
+
+# sample-metadata
+#   title:
+#   description: Perform batch file annotation
+#   usage: python3 samples/v1/vision_batch_annotate_files_gcs.py \
+#     [--storage_uri \
+#     "gs://cloud-samples-data/vision/document_understanding/kafka.pdf"]
+
+# [START vision_batch_annotate_files_gcs]
+
+from google.cloud import vision_v1
+from google.cloud.vision_v1 import enums
+import six
+
+
+def sample_batch_annotate_files(storage_uri):
+    """
+    Perform batch file annotation
+
+    Args:
+      storage_uri Cloud Storage URI to source image in the format
+        gs://[bucket]/ [file]
+    """
+    # [START vision_batch_annotate_files_gcs_core]
+    mime_type = "application/pdf"
+
+    client = vision_v1.ImageAnnotatorClient()
+
+    # storage_uri = (
+    #    'gs://cloud-samples-data/vision/document_understanding/kafka.pdf'
+    # )
+
+    if isinstance(storage_uri, six.binary_type):
+        storage_uri = storage_uri.decode("utf-8")
+    gcs_source = {"uri": storage_uri}
+    input_config = {"gcs_source": gcs_source, "mime_type": mime_type}
+    type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION
+    features_element = {"type": type_}
+    features = [features_element]
+
+    # The service can process up to 5 pages per document file.
+    # Here we specify the first, second, and last page of the document to be
+    # processed.
+    pages_element = 1
+    pages_element_2 = 2
+    pages_element_3 = -1
+    pages = [pages_element, pages_element_2, pages_element_3]
+    requests_element = {
+        "input_config": input_config,
+        "features": features,
+        "pages": pages,
+    }
+    requests = [requests_element]
+
+    response = client.batch_annotate_files(requests)
+    for image_response in response.responses[0].responses:
+        print(
+            u"Full text: {}".format(image_response.full_text_annotation.text))
+        for page in image_response.full_text_annotation.pages:
+            for block in page.blocks:
+                print(u"\nBlock confidence: {}".format(block.confidence))
+                for par in block.paragraphs:
+                    print(u"\tParagraph confidence: {}".format(par.confidence))
+                    for word in par.words:
+                        print(
+                            u"\t\tWord confidence: {}".format(word.confidence))
+                        for symbol in word.symbols:
+                            print(
+                                u"\t\t\tSymbol: {}, (confidence: {})".format(
+                                    symbol.text, symbol.confidence
+                                )
+                            )
+
+    # [END vision_batch_annotate_files_gcs_core]
+
+
+# [END vision_batch_annotate_files_gcs]
+
+
+def main():
+    import argparse
+
+    storage_uri_default = (
+        "gs://cloud-samples-data/vision/document_understanding/kafka.pdf"
+    )
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--storage_uri", type=str, default=storage_uri_default,
+    )
+    args = parser.parse_args()
+
+    sample_batch_annotate_files(args.storage_uri)
+
+
+if __name__ == "__main__":
+    main()