feat: add connection timeout + misc improvements (#440)

* Add optional connection timeout to gTTS constructor * Fix bug where filtered tokens were unused * Fix typos * Remove _len function; it is not needed, as Python 2 is no longer supported
pndurette · Dec 19, 2023 · bcdb79d · bcdb79d
1 parent dc4ce71
commit bcdb79d
Show file tree

Hide file tree

Showing 9 changed files with 50 additions and 40 deletions.
diff --git a/gtts/__init__.py b/gtts/__init__.py
@@ -2,4 +2,4 @@
 from .version import __version__  # noqa: F401
 from .tts import gTTS, gTTSError
 
-__all__ = ["gTTS", "gTTSError"]
+__all__ = ["__version__", "gTTS", "gTTSError"]
diff --git a/gtts/lang.py b/gtts/lang.py
@@ -38,7 +38,7 @@ def _extra_langs():
     """Define extra languages.
 
     Returns:
-        dict: A dictionnary of extra languages manually defined.
+        dict: A dictionary of extra languages manually defined.
 
             Variations of the ones generated in `_main_langs`,
             observed to provide different dialects or accents or
@@ -64,7 +64,7 @@ def _fallback_deprecated_lang(lang):
 
     Returns:
         string: The language tag, as-is if not deprecated,
-            or a fallack if it exits.
+            or a fallback if it exits.
 
     Example:
         ``en-GB`` returns ``en``.

diff --git a/gtts/tests/test_tts.py b/gtts/tests/test_tts.py
@@ -10,7 +10,7 @@
 # Testing all languages takes some time.
 # Set TEST_LANGS envvar to choose languages to test.
 #  * 'main': Languages extracted from the Web
-#  * 'extra': Languagee set in Languages.EXTRA_LANGS
+#  * 'extra': Language set in Languages.EXTRA_LANGS
 #  * 'all': All of the above
 #  * <csv>: Languages tags list to test
 # Unset TEST_LANGS to test everything ('all')
@@ -125,7 +125,7 @@ def test_msg():
 
 
 def test_infer_msg():
-    """Infer message sucessfully based on context"""
+    """Infer message successfully based on context"""
 
     # Without response:
 
@@ -163,7 +163,7 @@ def test_infer_msg():
     error500 = gTTSError(tts=tts500, response=response500)
     assert (
         error500.msg
-        == "500 (ccc) from TTS API. Probable cause: Uptream API error. Try again later."
+        == "500 (ccc) from TTS API. Probable cause: Upstream API error. Try again later."
     )
 
     # Unknown (ex. 100)
@@ -190,5 +190,23 @@ def test_WebRequest(tmp_path):
         tts.save(filename)
 
 
+@pytest.mark.net
+def test_timeout(tmp_path):
+    # Check default timeout
+    tts = gTTS(text="test")
+    assert tts.timeout is None
+
+    # Check passed in timeout
+    timeout = 1.2
+    tts = gTTS(text="test", timeout=timeout)
+    assert tts.timeout == timeout
+
+    # Make sure an exception is raised when a timeout occurs
+    tts = gTTS(text="test", timeout=0.000001)
+    filename = tmp_path / "save.mp3"
+    with pytest.raises(gTTSError):
+        tts.save(filename)
+
+
 if __name__ == "__main__":
     pytest.main(["-x", __file__])
diff --git a/gtts/tests/test_utils.py b/gtts/tests/test_utils.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 import pytest
-from gtts.utils import _minimize, _len, _clean_tokens, _translate_url
+from gtts.utils import _minimize, _clean_tokens, _translate_url
 
 delim = " "
 Lmax = 10
@@ -32,12 +32,12 @@ def test_startwith_delim():
 
 def test_len_ascii():
     text = "Bacon ipsum dolor sit amet flank corned beef."
-    assert _len(text) == 45
+    assert len(text) == 45
 
 
 def test_len_unicode():
     text = u"但在一个重要的任务上"
-    assert _len(text) == 10
+    assert len(text) == 10
 
 
 def test_only_space_and_punc():

diff --git a/gtts/tokenizer/core.py b/gtts/tokenizer/core.py
@@ -229,7 +229,7 @@ class Tokenizer:
 
     Args:
         regex_funcs (list): List of compiled ``regex`` objects. Each
-            functions's pattern will be joined into a single pattern and
+            function's pattern will be joined into a single pattern and
             compiled.
         flags: ``re`` flag(s) to compile with the final regex. Defaults to
             ``re.IGNORECASE``

diff --git a/gtts/tokenizer/pre_processors.py b/gtts/tokenizer/pre_processors.py
@@ -6,7 +6,7 @@
 def tone_marks(text):
     """Add a space after tone-modifying punctuation.
 
-    Because the `tone_marks` tokenizer case will split after a tone-modidfying
+    Because the `tone_marks` tokenizer case will split after a tone-modifying
     punctuation mark, make sure there's whitespace after.
 
     """
@@ -30,7 +30,7 @@ def end_of_line(text):
 
 def abbreviations(text):
     """Remove periods after an abbreviation from a list of known
-    abbrevations that can be spoken the same without that period. This
+    abbreviations that can be spoken the same without that period. This
     prevents having to handle tokenization of that period.
 
     Note:

diff --git a/gtts/tokenizer/tokenizer_cases.py b/gtts/tokenizer/tokenizer_cases.py
@@ -35,7 +35,7 @@ def period_comma():
 def colon():
     """Colon case.
 
-    Match a colon ":" only if not preceeded by a digit.
+    Match a colon ":" only if not preceded by a digit.
     Mainly to prevent a cut in the middle of time notations e.g. 10:01
 
     """

diff --git a/gtts/tts.py b/gtts/tts.py
@@ -9,7 +9,7 @@
 
 from gtts.lang import _fallback_deprecated_lang, tts_langs
 from gtts.tokenizer import Tokenizer, pre_processors, tokenizer_cases
-from gtts.utils import _clean_tokens, _len, _minimize, _translate_url
+from gtts.utils import _clean_tokens, _minimize, _translate_url
 
 __all__ = ["gTTS", "gTTSError"]
 
@@ -50,7 +50,7 @@ class gTTS:
             to catch a language error early. If set to ``True``,
             a ``ValueError`` is raised if ``lang`` doesn't exist.
             Setting ``lang_check`` to ``False`` skips Web requests
-            (to validate language) and therefore speeds up instanciation.
+            (to validate language) and therefore speeds up instantiation.
             Default is ``True``.
         pre_processor_funcs (list): A list of zero or more functions that are
             called to transform (pre-process) text before tokenizing. Those
@@ -73,6 +73,10 @@ class gTTS:
                     tokenizer_cases.other_punctuation
                 ]).run
 
+        timeout (float or tuple, optional): Seconds to wait for the server to
+            send data before giving up, as a float, or a ``(connect timeout,
+            read timeout)`` tuple. ``None`` will wait forever (default).
+
     See Also:
         :doc:`Pre-processing and tokenizing <tokenizer>`
 
@@ -116,6 +120,7 @@ def __init__(
                 tokenizer_cases.other_punctuation,
             ]
         ).run,
+        timeout=None,
     ):
 
         # Debug
@@ -157,6 +162,8 @@ def __init__(
         self.pre_processor_funcs = pre_processor_funcs
         self.tokenizer_func = tokenizer_func
 
+        self.timeout = timeout
+
     def _tokenize(self, text):
         # Pre-clean
         text = text.strip()
@@ -166,7 +173,7 @@ def _tokenize(self, text):
             log.debug("pre-processing: %s", pp)
             text = pp(text)
 
-        if _len(text) <= self.GOOGLE_TTS_MAX_CHARS:
+        if len(text) <= self.GOOGLE_TTS_MAX_CHARS:
             return _clean_tokens([text])
 
         # Tokenize
@@ -184,7 +191,7 @@ def _tokenize(self, text):
         # Filter empty tokens, post-minimize
         tokens = [t for t in min_tokens if t]
 
-        return min_tokens
+        return tokens
 
     def _prepare_requests(self):
         """Created the TTS API the request(s) without sending them.
@@ -233,7 +240,7 @@ def get_bodies(self):
         """Get TTS API request bodies(s) that would be sent to the TTS API.
 
         Returns:
-            list: A list of TTS API request bodiess to make.
+            list: A list of TTS API request bodies to make.
         """
         return [pr.body for pr in self._prepare_requests()]
 
@@ -259,7 +266,10 @@ def stream(self):
                 with requests.Session() as s:
                     # Send request
                     r = s.send(
-                        request=pr, proxies=urllib.request.getproxies(), verify=False
+                        request=pr,
+                        verify=False,
+                        proxies=urllib.request.getproxies(),
+                        timeout=self.timeout,
                     )
 
                 log.debug("headers-%i: %s", idx, r.request.headers)
@@ -372,6 +382,6 @@ def infer_msg(self, tts, rsp=None):
                     % self.tts.lang
                 )
             elif status >= 500:
-                cause = "Uptream API error. Try again later."
+                cause = "Upstream API error. Try again later."
 
         return "{}. Probable cause: {}".format(premise, cause)
diff --git a/gtts/utils.py b/gtts/utils.py
@@ -36,9 +36,9 @@ def _minimize(the_string, delim, max_size):
     # i.e. prevent a recursive infinite loop on `the_string[0:0]`
     # if `the_string` starts with `delim` and is larger than `max_size`
     if the_string.startswith(delim):
-        the_string = the_string[_len(delim) :]
+        the_string = the_string[len(delim):]
 
-    if _len(the_string) > max_size:
+    if len(the_string) > max_size:
         try:
             # Find the highest index of `delim` in `the_string[0:max_size]`
             # i.e. `the_string` will be cut in half on `delim` index
@@ -53,24 +53,6 @@ def _minimize(the_string, delim, max_size):
         return [the_string]
 
 
-def _len(text):
-    """Same as ``len(text)`` for a string but that decodes
-    ``text`` first in Python 2.x
-
-    Args:
-        text (string): String to get the size of.
-
-    Returns:
-        int: The size of the string.
-    """
-    try:
-        # Python 2
-        return len(unicode(text))
-    except NameError:  # pragma: no cover
-        # Python 3
-        return len(text)
-
-
 def _clean_tokens(tokens):
     """Clean a list of strings