diff --git a/docs/language-usage.rst b/docs/language-usage.rst index 83b965bad1b4..119f7f41e966 100644 --- a/docs/language-usage.rst +++ b/docs/language-usage.rst @@ -134,14 +134,14 @@ to content stored in `Google Cloud Storage`_. We can use the >>> document.doc_type == language.Document.PLAIN_TEXT True -and the :meth:`~gcloud.language.client.Client.document_from_uri` +and the :meth:`~gcloud.language.client.Client.document_from_url` method. In either case, the document type can be specified with the ``doc_type`` argument: .. code-block:: python >>> gcs_url = 'gs://my-text-bucket/sentiment-me.txt' - >>> document = client.document_from_uri( + >>> document = client.document_from_url( ... gcs_url, doc_type=language.Document.HTML) >>> document.gcs_url == gcs_url True diff --git a/gcloud/language/client.py b/gcloud/language/client.py index 8b4bc0835478..8552f011ed9d 100644 --- a/gcloud/language/client.py +++ b/gcloud/language/client.py @@ -54,7 +54,7 @@ def document_from_text(self, content, **kwargs): :rtype: :class:`Document` :returns: A plain-text document bound to this client. - :raises: :class:`TypeError` if ``doc_type`` is passed as a + :raises: :class:`~exceptions.TypeError` if ``doc_type`` is passed as a keyword argument. """ if 'doc_type' in kwargs: @@ -74,10 +74,33 @@ def document_from_html(self, content, **kwargs): :rtype: :class:`Document` :returns: An HTML document bound to this client. - :raises: :class:`TypeError` if ``doc_type`` is passed as a + :raises: :class:`~exceptions.TypeError` if ``doc_type`` is passed as a keyword argument. """ if 'doc_type' in kwargs: raise TypeError('Cannot pass doc_type') return Document(self, content=content, doc_type=Document.HTML, **kwargs) + + def document_from_url(self, gcs_url, + doc_type=Document.PLAIN_TEXT, **kwargs): + """Create a Cloud Storage document bound to this client. + + :type gcs_url: str + :param gcs_url: The URL of the Google Cloud Storage object + holding the content. Of the form + ``gs://{bucket}/{blob-name}``. + + :type doc_type: str + :param doc_type: (Optional) The type of text in the document. + Defaults to plain text. Can also be specified + as HTML via :attr:`~.Document.HTML`. + + :type kwargs: dict + :param kwargs: Remaining keyword arguments to be passed along to the + :class:`Document` constructor. + + :rtype: :class:`Document` + :returns: A plain-text document bound to this client. + """ + return Document(self, gcs_url=gcs_url, doc_type=doc_type, **kwargs) diff --git a/gcloud/language/document.py b/gcloud/language/document.py index dff524964afd..f25c85a4f4c7 100644 --- a/gcloud/language/document.py +++ b/gcloud/language/document.py @@ -55,10 +55,11 @@ class Document(object): :type gcs_url: str :param gcs_url: (Optional) The URL of the Google Cloud Storage object - holding the content. + holding the content. Of the form + ``gs://{bucket}/{blob-name}``. :type doc_type: str - :param doc_type: (Optional) The encoding of the document text. + :param doc_type: (Optional) The type of text in the document. Defaults to plain text. Can be one of :attr:`~.Document.PLAIN_TEXT` or or :attr:`~.Document.HTML`. diff --git a/gcloud/language/test_client.py b/gcloud/language/test_client.py index 8d3b20340a71..4e96d1f9ea9f 100644 --- a/gcloud/language/test_client.py +++ b/gcloud/language/test_client.py @@ -87,6 +87,40 @@ def test_document_from_html_factory_failure(self): with self.assertRaises(TypeError): client.document_from_html('abc', doc_type='foo') + def test_document_from_url_factory(self): + from gcloud.language.document import Document + + creds = _Credentials() + client = self._makeOne(project='PROJECT', + credentials=creds, http=object()) + + gcs_url = 'gs://my-text-bucket/sentiment-me.txt' + document = client.document_from_url(gcs_url) + self.assertIsInstance(document, Document) + self.assertIs(document.client, client) + self.assertIsNone(document.content) + self.assertEqual(document.gcs_url, gcs_url) + self.assertEqual(document.doc_type, Document.PLAIN_TEXT) + + def test_document_from_url_factory_explicit(self): + from gcloud.language.document import Document + from gcloud.language.document import Encoding + + creds = _Credentials() + client = self._makeOne(project='PROJECT', + credentials=creds, http=object()) + + encoding = Encoding.UTF32 + gcs_url = 'gs://my-text-bucket/sentiment-me.txt' + document = client.document_from_url(gcs_url, doc_type=Document.HTML, + encoding=encoding) + self.assertIsInstance(document, Document) + self.assertIs(document.client, client) + self.assertIsNone(document.content) + self.assertEqual(document.gcs_url, gcs_url) + self.assertEqual(document.doc_type, Document.HTML) + self.assertEqual(document.encoding, encoding) + class _Credentials(object):