From dad4f8345bb1a1e0ab16712078a0b3b415af0d5a Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 23 Aug 2016 13:12:54 -0700 Subject: [PATCH 1/4] Docstring and usage doc fixes for language. --- docs/language-usage.rst | 6 +++--- gcloud/language/__init__.py | 1 + gcloud/language/client.py | 16 ++++++++-------- gcloud/language/document.py | 12 +++++++----- gcloud/language/entity.py | 12 +++++++----- 5 files changed, 26 insertions(+), 21 deletions(-) diff --git a/docs/language-usage.rst b/docs/language-usage.rst index c61076d6df2c..c1302dcc59e3 100644 --- a/docs/language-usage.rst +++ b/docs/language-usage.rst @@ -37,13 +37,13 @@ create an instance of :class:`~gcloud.language.client.Client`. >>> from gcloud import language >>> client = language.Client() -By default the ``language`` is ``'en'`` and the ``encoding`` is +By default the ``language`` is ``'en-US'`` and the ``encoding`` is UTF-8. To over-ride these values: .. code-block:: python >>> client = language.Client(language='es', - ... encoding=encoding=language.Encoding.UTF16) + ... encoding=language.Encoding.UTF16) The encoding can be one of :attr:`Encoding.UTF8 `, @@ -85,7 +85,7 @@ the client .. code-block:: python >>> document.language - 'en' + 'en-US' >>> document.language == client.language True diff --git a/gcloud/language/__init__.py b/gcloud/language/__init__.py index e4123a035541..4b1df7af3a16 100644 --- a/gcloud/language/__init__.py +++ b/gcloud/language/__init__.py @@ -16,3 +16,4 @@ from gcloud.language.client import Client from gcloud.language.document import Document +from gcloud.language.document import Encoding diff --git a/gcloud/language/client.py b/gcloud/language/client.py index 50b95c56c7cc..6cefce66c66d 100644 --- a/gcloud/language/client.py +++ b/gcloud/language/client.py @@ -50,9 +50,9 @@ def document_from_text(self, content, **kwargs): :type kwargs: dict :param kwargs: Remaining keyword arguments to be passed along to the - :class:`Document` constructor. + :class:`.Document` constructor. - :rtype: :class:`Document` + :rtype: :class:`.Document` :returns: A plain-text document bound to this client. :raises: :class:`~exceptions.TypeError` if ``doc_type`` is passed as a keyword argument. @@ -70,9 +70,9 @@ def document_from_html(self, content, **kwargs): :type kwargs: dict :param kwargs: Remaining keyword arguments to be passed along to the - :class:`Document` constructor. + :class:`.Document` constructor. - :rtype: :class:`Document` + :rtype: :class:`.Document` :returns: An HTML document bound to this client. :raises: :class:`~exceptions.TypeError` if ``doc_type`` is passed as a keyword argument. @@ -98,9 +98,9 @@ def document_from_url(self, gcs_url, :type kwargs: dict :param kwargs: Remaining keyword arguments to be passed along to the - :class:`Document` constructor. + :class:`.Document` constructor. - :rtype: :class:`Document` + :rtype: :class:`.Document` :returns: A document bound to this client. """ return Document(self, gcs_url=gcs_url, doc_type=doc_type, **kwargs) @@ -124,9 +124,9 @@ def document_from_blob(self, bucket_name, blob_name, :type kwargs: dict :param kwargs: Remaining keyword arguments to be passed along to the - :class:`Document` constructor. + :class:`.Document` constructor. - :rtype: :class:`Document` + :rtype: :class:`.Document` :returns: A document bound to this client. """ # NOTE: We assume that the bucket and blob name don't diff --git a/gcloud/language/document.py b/gcloud/language/document.py index 11b9db5e242a..0e6cecc4b7f2 100644 --- a/gcloud/language/document.py +++ b/gcloud/language/document.py @@ -20,7 +20,7 @@ from gcloud.language.entity import Entity -DEFAULT_LANGUAGE = 'en' +DEFAULT_LANGUAGE = 'en-US' """Default document language, English.""" @@ -129,12 +129,14 @@ def analyze_entities(self): in the text, entity types, salience, mentions for each entity, and other properties. - See: - https://cloud.google.com/natural-language/reference/\ - rest/v1beta1/documents/analyzeEntities + .. _analyzeEntities: https://cloud.google.com/natural-language/\ + reference/rest/v1beta1/documents/analyzeEntities + + See `analyzeEntities`_. :rtype: list - :returns: A list of :class:`Entity` returned from the API. + :returns: A list of :class:`~.language.entity.Entity` returned from + the API. """ data = { 'document': self._to_dict(), diff --git a/gcloud/language/entity.py b/gcloud/language/entity.py index 0b1c26f92da4..4e523d7b5395 100644 --- a/gcloud/language/entity.py +++ b/gcloud/language/entity.py @@ -57,16 +57,18 @@ class Entity(object): so this value will be removed from the passed in ``metadata`` and put in its own property. - See: - https://cloud.google.com/natural-language/reference/rest/v1beta1/Entity + .. _Entity message: https://cloud.google.com/natural-language/\ + reference/rest/v1beta1/Entity + .. _EntityType enum: https://cloud.google.com/natural-language/\ + reference/rest/v1beta1/Entity#Type + + See `Entity message`_. :type name: str :param name: The name / phrase identified as the entity. :type entity_type: str - :param entity_type: The type of the entity. See - https://cloud.google.com/natural-language/\ - reference/rest/v1beta1/Entity#Type + :param entity_type: The type of the entity. See `EntityType enum`_. :type metadata: dict :param metadata: The metadata associated with the entity. From ad6850bde3251a1d43c244203d40c92057c6c1bb Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 23 Aug 2016 13:22:13 -0700 Subject: [PATCH 2/4] Removing project from language Client. The client does not need a project, just a set of credentials for authentication. --- docs/language-usage.rst | 3 +-- gcloud/language/client.py | 9 ++------- gcloud/language/document.py | 2 +- gcloud/language/test_client.py | 24 ++++++++---------------- 4 files changed, 12 insertions(+), 26 deletions(-) diff --git a/docs/language-usage.rst b/docs/language-usage.rst index c1302dcc59e3..b802fad33f44 100644 --- a/docs/language-usage.rst +++ b/docs/language-usage.rst @@ -23,8 +23,7 @@ Client :class:`~gcloud.language.client.Client` objects provide a means to configure your application. Each instance holds -both a ``project`` and an authenticated connection to the -Natural Language service. +an authenticated connection to the Natural Language service. For an overview of authentication in ``gcloud-python``, see :doc:`gcloud-auth`. diff --git a/gcloud/language/client.py b/gcloud/language/client.py index 6cefce66c66d..aa017b395c59 100644 --- a/gcloud/language/client.py +++ b/gcloud/language/client.py @@ -15,19 +15,14 @@ """Basic client for Google Cloud Natural Language API.""" -from gcloud.client import JSONClient +from gcloud import client as client_module from gcloud.language.connection import Connection from gcloud.language.document import Document -class Client(JSONClient): +class Client(client_module.Client): """Client to bundle configuration needed for API requests. - :type project: str - :param project: the project which the client acts on behalf of. If not - passed, falls back to the default inferred from the - environment. - :type credentials: :class:`~oauth2client.client.OAuth2Credentials` :param credentials: (Optional) The OAuth2 Credentials to use for the connection owned by this client. If not passed (and diff --git a/gcloud/language/document.py b/gcloud/language/document.py index 0e6cecc4b7f2..27531cdfa53f 100644 --- a/gcloud/language/document.py +++ b/gcloud/language/document.py @@ -48,7 +48,7 @@ class Document(object): object. :type client: :class:`~gcloud.language.client.Client` - :param client: A client which holds credentials and project + :param client: A client which holds credentials and other configuration. :type content: str diff --git a/gcloud/language/test_client.py b/gcloud/language/test_client.py index ca3c47ed38d3..c540fc533147 100644 --- a/gcloud/language/test_client.py +++ b/gcloud/language/test_client.py @@ -27,10 +27,9 @@ def _makeOne(self, *args, **kw): def test_ctor(self): from gcloud.language.connection import Connection - project = 'PROJECT' creds = _Credentials() http = object() - client = self._makeOne(project=project, credentials=creds, http=http) + client = self._makeOne(credentials=creds, http=http) self.assertIsInstance(client.connection, Connection) self.assertTrue(client.connection.credentials is creds) self.assertTrue(client.connection.http is http) @@ -39,8 +38,7 @@ def test_document_from_text_factory(self): from gcloud.language.document import Document creds = _Credentials() - client = self._makeOne(project='PROJECT', - credentials=creds, http=object()) + client = self._makeOne(credentials=creds, http=object()) content = 'abc' language = 'es' @@ -55,8 +53,7 @@ def test_document_from_text_factory(self): def test_document_from_text_factory_failure(self): creds = _Credentials() - client = self._makeOne(project='PROJECT', - credentials=creds, http=object()) + client = self._makeOne(credentials=creds, http=object()) with self.assertRaises(TypeError): client.document_from_text('abc', doc_type='foo') @@ -65,8 +62,7 @@ def test_document_from_html_factory(self): from gcloud.language.document import Document creds = _Credentials() - client = self._makeOne(project='PROJECT', - credentials=creds, http=object()) + client = self._makeOne(credentials=creds, http=object()) content = 'abc' language = 'ja' @@ -81,8 +77,7 @@ def test_document_from_html_factory(self): def test_document_from_html_factory_failure(self): creds = _Credentials() - client = self._makeOne(project='PROJECT', - credentials=creds, http=object()) + client = self._makeOne(credentials=creds, http=object()) with self.assertRaises(TypeError): client.document_from_html('abc', doc_type='foo') @@ -91,8 +86,7 @@ def test_document_from_url_factory(self): from gcloud.language.document import Document creds = _Credentials() - client = self._makeOne(project='PROJECT', - credentials=creds, http=object()) + client = self._makeOne(credentials=creds, http=object()) gcs_url = 'gs://my-text-bucket/sentiment-me.txt' document = client.document_from_url(gcs_url) @@ -107,8 +101,7 @@ def test_document_from_url_factory_explicit(self): from gcloud.language.document import Encoding creds = _Credentials() - client = self._makeOne(project='PROJECT', - credentials=creds, http=object()) + client = self._makeOne(credentials=creds, http=object()) encoding = Encoding.UTF32 gcs_url = 'gs://my-text-bucket/sentiment-me.txt' @@ -125,8 +118,7 @@ def test_document_from_blob_factory(self): from gcloud.language.document import Document creds = _Credentials() - client = self._makeOne(project='PROJECT', - credentials=creds, http=object()) + client = self._makeOne(credentials=creds, http=object()) bucket_name = 'my-text-bucket' blob_name = 'sentiment-me.txt' From 71c57868224e6a1f93090a7f3229ffc3e4b10e12 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Wed, 24 Aug 2016 14:21:54 -0700 Subject: [PATCH 3/4] Removing document_from_blob() helper in language. --- docs/language-usage.rst | 20 +++++++------------- gcloud/language/client.py | 29 ----------------------------- gcloud/language/test_client.py | 16 ---------------- 3 files changed, 7 insertions(+), 58 deletions(-) diff --git a/docs/language-usage.rst b/docs/language-usage.rst index b802fad33f44..accaa9a2bf9c 100644 --- a/docs/language-usage.rst +++ b/docs/language-usage.rst @@ -122,30 +122,24 @@ The document type (``doc_type``) value can be one of In addition to supplying the text / HTML content, a document can refer to content stored in `Google Cloud Storage`_. We can use the -:meth:`~gcloud.language.client.Client.document_from_blob` method: +:meth:`~gcloud.language.client.Client.document_from_url` method: .. code-block:: python - >>> document = client.document_from_blob('my-text-bucket', - ... 'sentiment-me.txt') - >>> document.gcs_url - 'gs://my-text-bucket/sentiment-me.txt' + >>> gcs_url = 'gs://my-text-bucket/sentiment-me.txt' + >>> document = client.document_from_url( + ... gcs_url, doc_type=language.Document.HTML) + >>> document.gcs_url == gcs_url + True >>> document.doc_type == language.Document.PLAIN_TEXT True -and the :meth:`~gcloud.language.client.Client.document_from_url` -method. In either case, the document type can be specified with -the ``doc_type`` argument: +The document type can be specified with the ``doc_type`` argument: .. code-block:: python - >>> gcs_url = 'gs://my-text-bucket/sentiment-me.txt' >>> document = client.document_from_url( ... gcs_url, doc_type=language.Document.HTML) - >>> document.gcs_url == gcs_url - True - >>> document.doc_type == language.Document.HTML - True .. _analyzeEntities: https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/analyzeEntities .. _analyzeSentiment: https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/analyzeSentiment diff --git a/gcloud/language/client.py b/gcloud/language/client.py index aa017b395c59..00702e4396a7 100644 --- a/gcloud/language/client.py +++ b/gcloud/language/client.py @@ -99,32 +99,3 @@ def document_from_url(self, gcs_url, :returns: A document bound to this client. """ return Document(self, gcs_url=gcs_url, doc_type=doc_type, **kwargs) - - def document_from_blob(self, bucket_name, blob_name, - doc_type=Document.PLAIN_TEXT, **kwargs): - """Create a Cloud Storage document bound to this client. - - :type bucket_name: str - :param bucket_name: The name of the bucket that contains the - document text. - - :type blob_name: str - :param blob_name: The name of the blob (within the bucket) that - contains document text. - - :type doc_type: str - :param doc_type: (Optional) The type of text in the document. - Defaults to plain text. Can also be specified - as HTML via :attr:`~.Document.HTML`. - - :type kwargs: dict - :param kwargs: Remaining keyword arguments to be passed along to the - :class:`.Document` constructor. - - :rtype: :class:`.Document` - :returns: A document bound to this client. - """ - # NOTE: We assume that the bucket and blob name don't - # need to be URL-encoded. - gcs_url = 'gs://%s/%s' % (bucket_name, blob_name) - return self.document_from_url(gcs_url, doc_type=doc_type, **kwargs) diff --git a/gcloud/language/test_client.py b/gcloud/language/test_client.py index c540fc533147..abfd85b47354 100644 --- a/gcloud/language/test_client.py +++ b/gcloud/language/test_client.py @@ -114,22 +114,6 @@ def test_document_from_url_factory_explicit(self): self.assertEqual(document.doc_type, Document.HTML) self.assertEqual(document.encoding, encoding) - def test_document_from_blob_factory(self): - from gcloud.language.document import Document - - creds = _Credentials() - client = self._makeOne(credentials=creds, http=object()) - - bucket_name = 'my-text-bucket' - blob_name = 'sentiment-me.txt' - gcs_url = 'gs://%s/%s' % (bucket_name, blob_name) - document = client.document_from_blob(bucket_name, blob_name) - self.assertIsInstance(document, Document) - self.assertIs(document.client, client) - self.assertIsNone(document.content) - self.assertEqual(document.gcs_url, gcs_url) - self.assertEqual(document.doc_type, Document.PLAIN_TEXT) - class _Credentials(object): From 23451b9a283a7886f17feb9f7e56905f31ccf21c Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 23 Aug 2016 14:10:25 -0700 Subject: [PATCH 4/4] Adding GCS system test for language analyze_entities(). --- system_tests/language.py | 65 +++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/system_tests/language.py b/system_tests/language.py index c23afc8711dd..f9e42eb35096 100644 --- a/system_tests/language.py +++ b/system_tests/language.py @@ -14,7 +14,18 @@ import unittest +from gcloud import _helpers +from gcloud.environment_vars import TESTS_PROJECT +from gcloud import exceptions from gcloud import language +from gcloud import storage + +from system_test_utils import unique_resource_id +from retry import RetryErrors + + +# 429 Too Many Requests in case API requests rate-limited. +retry_429 = RetryErrors(exceptions.TooManyRequests) class Config(object): @@ -24,25 +35,44 @@ class Config(object): global state. """ CLIENT = None + TEST_BUCKET = None def setUpModule(): + _helpers.PROJECT = TESTS_PROJECT Config.CLIENT = language.Client() + # Now create a bucket for GCS stored content. + storage_client = storage.Client() + bucket_name = 'new' + unique_resource_id() + Config.TEST_BUCKET = storage_client.bucket(bucket_name) + retry_429(Config.TEST_BUCKET.create)() + + +def tearDownModule(): + retry_429(Config.TEST_BUCKET.delete)() class TestLanguage(unittest.TestCase): - def test_analyze_entities(self): + NAME1 = 'Michelangelo Caravaggio' + NAME2 = 'Italian' + NAME3 = 'The Calling of Saint Matthew' + TEXT_CONTENT = '%s, %s painter, is known for %r.' % (NAME1, NAME2, NAME3) + + def setUp(self): + self.to_delete_by_case = [] + + def tearDown(self): + for value in self.to_delete_by_case: + value.delete() + + def _check_analyze_entities_result(self, entities): from gcloud.language.entity import EntityType - text_content = ("Michelangelo Caravaggio, Italian painter, is " - "known for 'The Calling of Saint Matthew'.") - document = Config.CLIENT.document_from_text(text_content) - entities = document.analyze_entities() self.assertEqual(len(entities), 3) entity1, entity2, entity3 = entities # Verify entity 1. - self.assertEqual(entity1.name, 'Michelangelo Caravaggio') + self.assertEqual(entity1.name, self.NAME1) self.assertEqual(entity1.entity_type, EntityType.PERSON) self.assertTrue(0.7 < entity1.salience < 0.8) self.assertEqual(entity1.mentions, [entity1.name]) @@ -50,7 +80,7 @@ def test_analyze_entities(self): 'http://en.wikipedia.org/wiki/Caravaggio') self.assertEqual(entity1.metadata, {}) # Verify entity 2. - self.assertEqual(entity2.name, 'Italian') + self.assertEqual(entity2.name, self.NAME2) self.assertEqual(entity2.entity_type, EntityType.LOCATION) self.assertTrue(0.15 < entity2.salience < 0.25) self.assertEqual(entity2.mentions, [entity2.name]) @@ -58,7 +88,7 @@ def test_analyze_entities(self): 'http://en.wikipedia.org/wiki/Italy') self.assertEqual(entity2.metadata, {}) # Verify entity 3. - self.assertEqual(entity3.name, 'The Calling of Saint Matthew') + self.assertEqual(entity3.name, self.NAME3) self.assertEqual(entity3.entity_type, EntityType.EVENT) self.assertTrue(0 < entity3.salience < 0.1) self.assertEqual(entity3.mentions, [entity3.name]) @@ -66,3 +96,22 @@ def test_analyze_entities(self): 'The_Calling_of_St_Matthew_(Caravaggio)') self.assertEqual(entity3.wikipedia_url, wiki_url) self.assertEqual(entity3.metadata, {}) + + def test_analyze_entities(self): + document = Config.CLIENT.document_from_text(self.TEXT_CONTENT) + entities = document.analyze_entities() + self._check_analyze_entities_result(entities) + + def test_analyze_entities_from_blob(self): + # Upload the text to a blob. + bucket_name = Config.TEST_BUCKET.name + blob_name = 'document.txt' + blob = Config.TEST_BUCKET.blob(blob_name) + self.to_delete_by_case.append(blob) # Clean-up. + blob.upload_from_string(self.TEXT_CONTENT) + + # Create a document referencing that blob. + gcs_url = 'gs://%s/%s' % (bucket_name, blob_name) + document = Config.CLIENT.document_from_url(gcs_url) + entities = document.analyze_entities() + self._check_analyze_entities_result(entities)