diff --git a/vision/cloud-client/detect/README.rst b/vision/cloud-client/detect/README.rst index 6074419cbd27..8a077ab0406f 100644 --- a/vision/cloud-client/detect/README.rst +++ b/vision/cloud-client/detect/README.rst @@ -81,7 +81,7 @@ To run this sample: $ python detect.py usage: detect.py [-h] - {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri} + {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri,object-localization,object-localization-uri} ... This application demonstrates how to perform basic operations with the @@ -94,13 +94,15 @@ To run this sample: python detect.py web-uri http://wheresgus.com/dog.JPG python detect.py web-geo ./resources/city.jpg python detect.py faces-uri gs://your-bucket/file.jpg - python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf gs://BUCKET_NAME/PREFIX/ + python detect.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf gs://BUCKET_NAME/PREFIX/ + python detect.py object-localization ./resources/puppies.jpg + python detect.py object-localization-uri gs://... For more information, the documentation at https://cloud.google.com/vision/docs. positional arguments: - {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri} + {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri,object-localization,object-localization-uri} faces Detects faces in an image. faces-uri Detects faces in the file located in Google Cloud Storage or the web. @@ -126,10 +128,10 @@ To run this sample: web-uri Detects web annotations in the file located in Google Cloud Storage. web-geo Detects web annotations given an image, using the - geotag metadata in the iamge to detect web entities. + geotag metadata in the image to detect web entities. web-geo-uri Detects web annotations given an image in the file located in Google Cloud Storage., using the geotag - metadata in the iamge to detect web entities. + metadata in the image to detect web entities. crophints Detects crop hints in an image. crophints-uri Detects crop hints in the file located in Google Cloud Storage. @@ -137,6 +139,10 @@ To run this sample: document-uri Detects document features in the file located in Google Cloud Storage. ocr-uri OCR with PDF/TIFF as source files on GCS + object-localization + OCR with PDF/TIFF as source files on GCS + object-localization-uri + OCR with PDF/TIFF as source files on GCS optional arguments: -h, --help show this help message and exit @@ -166,8 +172,8 @@ To run this sample: Example Usage: python beta_snippets.py -h - python beta_snippets.py object-localizer INPUT_IMAGE - python beta_snippets.py object-localizer-uri gs://... + python beta_snippets.py object-localization INPUT_IMAGE + python beta_snippets.py object-localization-uri gs://... python beta_snippets.py handwritten-ocr INPUT_IMAGE python beta_snippets.py handwritten-ocr-uri gs://... diff --git a/vision/cloud-client/detect/detect.py b/vision/cloud-client/detect/detect.py index c8fbae44764e..8a285b785581 100644 --- a/vision/cloud-client/detect/detect.py +++ b/vision/cloud-client/detect/detect.py @@ -24,8 +24,10 @@ python detect.py web-uri http://wheresgus.com/dog.JPG python detect.py web-geo ./resources/city.jpg python detect.py faces-uri gs://your-bucket/file.jpg -python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf \ +python detect.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf \ gs://BUCKET_NAME/PREFIX/ +python detect.py object-localization ./resources/puppies.jpg +python detect.py object-localization-uri gs://... For more information, the documentation at https://cloud.google.com/vision/docs. @@ -35,14 +37,11 @@ import io import re -from google.cloud import storage -from google.cloud import vision -from google.protobuf import json_format - # [START vision_face_detection] def detect_faces(path): """Detects faces in an image.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() # [START vision_python_migration_face_detection] @@ -77,6 +76,7 @@ def detect_faces(path): # [START vision_face_detection_gcs] def detect_faces_uri(uri): """Detects faces in the file located in Google Cloud Storage or the web.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() # [START vision_python_migration_image_uri] image = vision.types.Image() @@ -106,6 +106,7 @@ def detect_faces_uri(uri): # [START vision_label_detection] def detect_labels(path): """Detects labels in the file.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() # [START vision_python_migration_label_detection] @@ -128,6 +129,7 @@ def detect_labels(path): def detect_labels_uri(uri): """Detects labels in the file located in Google Cloud Storage or on the Web.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() image = vision.types.Image() image.source.image_uri = uri @@ -144,6 +146,7 @@ def detect_labels_uri(uri): # [START vision_landmark_detection] def detect_landmarks(path): """Detects landmarks in the file.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() # [START vision_python_migration_landmark_detection] @@ -170,6 +173,7 @@ def detect_landmarks(path): def detect_landmarks_uri(uri): """Detects landmarks in the file located in Google Cloud Storage or on the Web.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() image = vision.types.Image() image.source.image_uri = uri @@ -186,6 +190,7 @@ def detect_landmarks_uri(uri): # [START vision_logo_detection] def detect_logos(path): """Detects logos in the file.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() # [START vision_python_migration_logo_detection] @@ -208,6 +213,7 @@ def detect_logos(path): def detect_logos_uri(uri): """Detects logos in the file located in Google Cloud Storage or on the Web. """ + from google.cloud import vision client = vision.ImageAnnotatorClient() image = vision.types.Image() image.source.image_uri = uri @@ -224,6 +230,7 @@ def detect_logos_uri(uri): # [START vision_safe_search_detection] def detect_safe_search(path): """Detects unsafe features in the file.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() # [START vision_python_migration_safe_search_detection] @@ -253,6 +260,7 @@ def detect_safe_search(path): def detect_safe_search_uri(uri): """Detects unsafe features in the file located in Google Cloud Storage or on the Web.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() image = vision.types.Image() image.source.image_uri = uri @@ -276,6 +284,7 @@ def detect_safe_search_uri(uri): # [START vision_text_detection] def detect_text(path): """Detects text in the file.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() # [START vision_python_migration_text_detection] @@ -303,6 +312,7 @@ def detect_text(path): def detect_text_uri(uri): """Detects text in the file located in Google Cloud Storage or on the Web. """ + from google.cloud import vision client = vision.ImageAnnotatorClient() image = vision.types.Image() image.source.image_uri = uri @@ -324,6 +334,7 @@ def detect_text_uri(uri): # [START vision_image_property_detection] def detect_properties(path): """Detects image properties in the file.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() # [START vision_python_migration_image_properties] @@ -350,6 +361,7 @@ def detect_properties(path): def detect_properties_uri(uri): """Detects image properties in the file located in Google Cloud Storage or on the Web.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() image = vision.types.Image() image.source.image_uri = uri @@ -370,6 +382,7 @@ def detect_properties_uri(uri): # [START vision_web_detection] def detect_web(path): """Detects web annotations given an image.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() # [START vision_python_migration_web_detection] @@ -427,6 +440,7 @@ def detect_web(path): # [START vision_web_detection_gcs] def detect_web_uri(uri): """Detects web annotations in the file located in Google Cloud Storage.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() image = vision.types.Image() image.source.image_uri = uri @@ -480,6 +494,7 @@ def detect_web_uri(uri): def web_entities_include_geo_results(path): """Detects web annotations given an image, using the geotag metadata in the image to detect web entities.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() with io.open(path, 'rb') as image_file: @@ -505,6 +520,7 @@ def web_entities_include_geo_results_uri(uri): """Detects web annotations given an image in the file located in Google Cloud Storage., using the geotag metadata in the image to detect web entities.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() image = vision.types.Image() @@ -526,6 +542,7 @@ def web_entities_include_geo_results_uri(uri): # [START vision_crop_hint_detection] def detect_crop_hints(path): """Detects crop hints in an image.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() # [START vision_python_migration_crop_hints] @@ -554,6 +571,7 @@ def detect_crop_hints(path): # [START vision_crop_hint_detection_gcs] def detect_crop_hints_uri(uri): """Detects crop hints in the file located in Google Cloud Storage.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() image = vision.types.Image() image.source.image_uri = uri @@ -578,6 +596,7 @@ def detect_crop_hints_uri(uri): # [START vision_fulltext_detection] def detect_document(path): """Detects document features in an image.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() # [START vision_python_migration_document_text_detection] @@ -614,6 +633,7 @@ def detect_document(path): def detect_document_uri(uri): """Detects document features in the file located in Google Cloud Storage.""" + from google.cloud import vision client = vision.ImageAnnotatorClient() image = vision.types.Image() image.source.image_uri = uri @@ -644,6 +664,9 @@ def detect_document_uri(uri): # [START vision_text_detection_pdf_gcs] def async_detect_document(gcs_source_uri, gcs_destination_uri): """OCR with PDF/TIFF as source files on GCS""" + from google.cloud import vision + from google.cloud import storage + from google.protobuf import json_format # Supported mime_types are: 'application/pdf' and 'image/tiff' mime_type = 'application/pdf' @@ -711,6 +734,57 @@ def async_detect_document(gcs_source_uri, gcs_destination_uri): # [END vision_text_detection_pdf_gcs] +# [START vision_localize_objects] +def localize_objects(path): + """Localize objects in the local image. + + Args: + path: The path to the local file. + """ + from google.cloud import vision + client = vision.ImageAnnotatorClient() + + with open(path, 'rb') as image_file: + content = image_file.read() + image = vision.types.Image(content=content) + + objects = client.object_localization( + image=image).localized_object_annotations + + print('Number of objects found: {}'.format(len(objects))) + for object_ in objects: + print('\n{} (confidence: {})'.format(object_.name, object_.score)) + print('Normalized bounding polygon vertices: ') + for vertex in object_.bounding_poly.normalized_vertices: + print(' - ({}, {})'.format(vertex.x, vertex.y)) +# [END vision_localize_objects] + + +# [START vision_localize_objects_gcs] +def localize_objects_uri(uri): + """Localize objects in the image on Google Cloud Storage + + Args: + uri: The path to the file in Google Cloud Storage (gs://...) + """ + from google.cloud import vision + client = vision.ImageAnnotatorClient() + + image = vision.types.Image() + image.source.image_uri = uri + + objects = client.object_localization( + image=image).localized_object_annotations + + print('Number of objects found: {}'.format(len(objects))) + for object_ in objects: + print('\n{} (confidence: {})'.format(object_.name, object_.score)) + print('Normalized bounding polygon vertices: ') + for vertex in object_.bounding_poly.normalized_vertices: + print(' - ({}, {})'.format(vertex.x, vertex.y)) +# [END vision_localize_objects_gcs] + + def run_local(args): if args.command == 'faces': detect_faces(args.path) @@ -734,6 +808,8 @@ def run_local(args): detect_document(args.path) elif args.command == 'web-geo': web_entities_include_geo_results(args.path) + elif args.command == 'object-localization': + localize_objects(args.path) def run_uri(args): @@ -761,6 +837,8 @@ def run_uri(args): web_entities_include_geo_results_uri(args.uri) elif args.command == 'ocr-uri': async_detect_document(args.uri, args.destination_uri) + elif args.command == 'object-localization-uri': + localize_objects_uri(args.uri) if __name__ == '__main__': @@ -867,6 +945,14 @@ def run_uri(args): ocr_uri_parser.add_argument('uri') ocr_uri_parser.add_argument('destination_uri') + object_localization_parser = subparsers.add_parser( + 'object-localization', help=async_detect_document.__doc__) + object_localization_parser.add_argument('path') + + object_localization_uri_parser = subparsers.add_parser( + 'object-localization-uri', help=async_detect_document.__doc__) + object_localization_uri_parser.add_argument('uri') + args = parser.parse_args() if 'uri' in args.command: diff --git a/vision/cloud-client/detect/detect_pdf.py b/vision/cloud-client/detect/detect_pdf.py deleted file mode 100644 index f7e8abed42d2..000000000000 --- a/vision/cloud-client/detect/detect_pdf.py +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -"""OCR with PDF/TIFF as source files on GCS - -Example: - python detect_pdf.py \ - --gcs-source-uri gs://python-docs-samples-tests/HodgeConj.pdf \ - --gcs-destination-uri gs://BUCKET_NAME/PREFIX/ -""" - -import argparse -import re - -from google.cloud import storage -from google.cloud import vision_v1p2beta1 as vision -from google.protobuf import json_format - - -# [START vision_text_detection_pdf_gcs] -def async_detect_document(gcs_source_uri, gcs_destination_uri): - # Supported mime_types are: 'application/pdf' and 'image/tiff' - mime_type = 'application/pdf' - - # How many pages should be grouped into each json output file. - # With a file of 5 pages - batch_size = 2 - - client = vision.ImageAnnotatorClient() - - feature = vision.types.Feature( - type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION) - - gcs_source = vision.types.GcsSource(uri=gcs_source_uri) - input_config = vision.types.InputConfig( - gcs_source=gcs_source, mime_type=mime_type) - - gcs_destination = vision.types.GcsDestination(uri=gcs_destination_uri) - output_config = vision.types.OutputConfig( - gcs_destination=gcs_destination, batch_size=batch_size) - - async_request = vision.types.AsyncAnnotateFileRequest( - features=[feature], input_config=input_config, - output_config=output_config) - - operation = client.async_batch_annotate_files( - requests=[async_request]) - - print('Waiting for the operation to finish.') - operation.result(timeout=90) - - # Once the request has completed and the output has been - # written to GCS, we can list all the output files. - storage_client = storage.Client() - - match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri) - bucket_name = match.group(1) - prefix = match.group(2) - - bucket = storage_client.get_bucket(bucket_name=bucket_name) - - # List objects with the given prefix. - blob_list = list(bucket.list_blobs(prefix=prefix)) - print('Output files:') - for blob in blob_list: - print(blob.name) - - # Process the first output file from GCS. - # Since we specified batch_size=2, the first response contains - # the first two pages of the input file. - output = blob_list[0] - - json_string = output.download_as_string() - response = json_format.Parse( - json_string, vision.types.AnnotateFileResponse()) - - # The actual response for the first page of the input file. - first_page_response = response.responses[0] - annotation = first_page_response.full_text_annotation - - # Here we print the full text from the first page. - # The response contains more information: - # annotation/pages/blocks/paragraphs/words/symbols - # including confidence scores and bounding boxes - print(u'Full text:\n{}'.format( - annotation.text)) -# [END vision_text_detection_pdf_gcs] - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--gcs-source-uri', required=True) - parser.add_argument('--gcs-destination-uri', required=True) - - args = parser.parse_args() - async_detect_document(args.gcs_source_uri, args.gcs_destination_uri) diff --git a/vision/cloud-client/detect/detect_pdf_test.py b/vision/cloud-client/detect/detect_pdf_test.py deleted file mode 100644 index 98b06aaab74f..000000000000 --- a/vision/cloud-client/detect/detect_pdf_test.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from google.cloud import storage - -from detect_pdf import async_detect_document - -BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] -OUTPUT_PREFIX = 'OCR_PDF_TEST_OUTPUT' -GCS_SOURCE_URI = 'gs://{}/HodgeConj.pdf'.format(BUCKET) -GCS_DESTINATION_URI = 'gs://{}/{}/'.format(BUCKET, OUTPUT_PREFIX) - - -def test_async_detect_document(capsys): - storage_client = storage.Client() - bucket = storage_client.get_bucket(BUCKET) - assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 - - async_detect_document( - gcs_source_uri=GCS_SOURCE_URI, - gcs_destination_uri=GCS_DESTINATION_URI) - out, _ = capsys.readouterr() - - assert 'Hodge conjecture' in out - assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 3 - - for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): - blob.delete() diff --git a/vision/cloud-client/detect/detect_test.py b/vision/cloud-client/detect/detect_test.py index d85b2331ace9..a46797692341 100644 --- a/vision/cloud-client/detect/detect_test.py +++ b/vision/cloud-client/detect/detect_test.py @@ -281,6 +281,10 @@ def test_detect_crop_hints_http(capsys): def test_async_detect_document(capsys): storage_client = storage.Client() bucket = storage_client.get_bucket(BUCKET) + if len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0: + for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): + blob.delete() + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 detect.async_detect_document( @@ -293,3 +297,21 @@ def test_async_detect_document(capsys): for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): blob.delete() + + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 + + +def test_localize_objects(capsys): + detect.localize_objects('resources/puppies.jpg') + + out, _ = capsys.readouterr() + assert 'Dog' in out + + +def test_localize_objects_uri(capsys): + uri = 'gs://cloud-samples-data/vision/puppies.jpg' + + detect.localize_objects_uri(uri) + + out, _ = capsys.readouterr() + assert 'Dog' in out diff --git a/vision/cloud-client/detect/requirements.txt b/vision/cloud-client/detect/requirements.txt index 2888f5fb6997..ff807b506a5f 100644 --- a/vision/cloud-client/detect/requirements.txt +++ b/vision/cloud-client/detect/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-vision==0.33.0 +google-cloud-vision==0.34.0 google-cloud-storage==1.6.0