Skip to content

Commit

Permalink
Adding document_from_url() factory to language client.
Browse files Browse the repository at this point in the history
  • Loading branch information
dhermes committed Aug 23, 2016
1 parent 837358c commit bbd2126
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 6 deletions.
4 changes: 2 additions & 2 deletions docs/language-usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,14 @@ to content stored in `Google Cloud Storage`_. We can use the
>>> document.doc_type == language.Document.PLAIN_TEXT
True
and the :meth:`~gcloud.language.client.Client.document_from_uri`
and the :meth:`~gcloud.language.client.Client.document_from_url`
method. In either case, the document type can be specified with
the ``doc_type`` argument:

.. code-block:: python
>>> gcs_url = 'gs://my-text-bucket/sentiment-me.txt'
>>> document = client.document_from_uri(
>>> document = client.document_from_url(
... gcs_url, doc_type=language.Document.HTML)
>>> document.gcs_url == gcs_url
True
Expand Down
27 changes: 25 additions & 2 deletions gcloud/language/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def document_from_text(self, content, **kwargs):
:rtype: :class:`Document`
:returns: A plain-text document bound to this client.
:raises: :class:`TypeError` if ``doc_type`` is passed as a
:raises: :class:`~exceptions.TypeError` if ``doc_type`` is passed as a
keyword argument.
"""
if 'doc_type' in kwargs:
Expand All @@ -74,10 +74,33 @@ def document_from_html(self, content, **kwargs):
:rtype: :class:`Document`
:returns: An HTML document bound to this client.
:raises: :class:`TypeError` if ``doc_type`` is passed as a
:raises: :class:`~exceptions.TypeError` if ``doc_type`` is passed as a
keyword argument.
"""
if 'doc_type' in kwargs:
raise TypeError('Cannot pass doc_type')
return Document(self, content=content,
doc_type=Document.HTML, **kwargs)

def document_from_url(self, gcs_url,
doc_type=Document.PLAIN_TEXT, **kwargs):
"""Create a Cloud Storage document bound to this client.
:type gcs_url: str
:param gcs_url: The URL of the Google Cloud Storage object
holding the content. Of the form
``gs://{bucket}/{blob-name}``.
:type doc_type: str
:param doc_type: (Optional) The type of text in the document.
Defaults to plain text. Can also be specified
as HTML via :attr:`~.Document.HTML`.
:type kwargs: dict
:param kwargs: Remaining keyword arguments to be passed along to the
:class:`Document` constructor.
:rtype: :class:`Document`
:returns: A plain-text document bound to this client.
"""
return Document(self, gcs_url=gcs_url, doc_type=doc_type, **kwargs)
5 changes: 3 additions & 2 deletions gcloud/language/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,11 @@ class Document(object):
:type gcs_url: str
:param gcs_url: (Optional) The URL of the Google Cloud Storage object
holding the content.
holding the content. Of the form
``gs://{bucket}/{blob-name}``.
:type doc_type: str
:param doc_type: (Optional) The encoding of the document text.
:param doc_type: (Optional) The type of text in the document.
Defaults to plain text. Can be one of
:attr:`~.Document.PLAIN_TEXT` or
or :attr:`~.Document.HTML`.
Expand Down
34 changes: 34 additions & 0 deletions gcloud/language/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,40 @@ def test_document_from_html_factory_failure(self):
with self.assertRaises(TypeError):
client.document_from_html('abc', doc_type='foo')

def test_document_from_url_factory(self):
from gcloud.language.document import Document

creds = _Credentials()
client = self._makeOne(project='PROJECT',
credentials=creds, http=object())

gcs_url = 'gs://my-text-bucket/sentiment-me.txt'
document = client.document_from_url(gcs_url)
self.assertIsInstance(document, Document)
self.assertIs(document.client, client)
self.assertIsNone(document.content)
self.assertEqual(document.gcs_url, gcs_url)
self.assertEqual(document.doc_type, Document.PLAIN_TEXT)

def test_document_from_url_factory_explicit(self):
from gcloud.language.document import Document
from gcloud.language.document import Encoding

creds = _Credentials()
client = self._makeOne(project='PROJECT',
credentials=creds, http=object())

encoding = Encoding.UTF32
gcs_url = 'gs://my-text-bucket/sentiment-me.txt'
document = client.document_from_url(gcs_url, doc_type=Document.HTML,
encoding=encoding)
self.assertIsInstance(document, Document)
self.assertIs(document.client, client)
self.assertIsNone(document.content)
self.assertEqual(document.gcs_url, gcs_url)
self.assertEqual(document.doc_type, Document.HTML)
self.assertEqual(document.encoding, encoding)


class _Credentials(object):

Expand Down

0 comments on commit bbd2126

Please sign in to comment.