Skip to content

Commit

Permalink
feat: add connection timeout + misc improvements (#440)
Browse files Browse the repository at this point in the history
* Add optional connection timeout to gTTS constructor

* Fix bug where filtered tokens were unused

* Fix typos

* Remove _len function; it is not needed, as Python 2 is no longer supported
  • Loading branch information
austin-bowen authored Dec 19, 2023
1 parent dc4ce71 commit bcdb79d
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 40 deletions.
2 changes: 1 addition & 1 deletion gtts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from .version import __version__ # noqa: F401
from .tts import gTTS, gTTSError

__all__ = ["gTTS", "gTTSError"]
__all__ = ["__version__", "gTTS", "gTTSError"]
4 changes: 2 additions & 2 deletions gtts/lang.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def _extra_langs():
"""Define extra languages.
Returns:
dict: A dictionnary of extra languages manually defined.
dict: A dictionary of extra languages manually defined.
Variations of the ones generated in `_main_langs`,
observed to provide different dialects or accents or
Expand All @@ -64,7 +64,7 @@ def _fallback_deprecated_lang(lang):
Returns:
string: The language tag, as-is if not deprecated,
or a fallack if it exits.
or a fallback if it exits.
Example:
``en-GB`` returns ``en``.
Expand Down
24 changes: 21 additions & 3 deletions gtts/tests/test_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# Testing all languages takes some time.
# Set TEST_LANGS envvar to choose languages to test.
# * 'main': Languages extracted from the Web
# * 'extra': Languagee set in Languages.EXTRA_LANGS
# * 'extra': Language set in Languages.EXTRA_LANGS
# * 'all': All of the above
# * <csv>: Languages tags list to test
# Unset TEST_LANGS to test everything ('all')
Expand Down Expand Up @@ -125,7 +125,7 @@ def test_msg():


def test_infer_msg():
"""Infer message sucessfully based on context"""
"""Infer message successfully based on context"""

# Without response:

Expand Down Expand Up @@ -163,7 +163,7 @@ def test_infer_msg():
error500 = gTTSError(tts=tts500, response=response500)
assert (
error500.msg
== "500 (ccc) from TTS API. Probable cause: Uptream API error. Try again later."
== "500 (ccc) from TTS API. Probable cause: Upstream API error. Try again later."
)

# Unknown (ex. 100)
Expand All @@ -190,5 +190,23 @@ def test_WebRequest(tmp_path):
tts.save(filename)


@pytest.mark.net
def test_timeout(tmp_path):
# Check default timeout
tts = gTTS(text="test")
assert tts.timeout is None

# Check passed in timeout
timeout = 1.2
tts = gTTS(text="test", timeout=timeout)
assert tts.timeout == timeout

# Make sure an exception is raised when a timeout occurs
tts = gTTS(text="test", timeout=0.000001)
filename = tmp_path / "save.mp3"
with pytest.raises(gTTSError):
tts.save(filename)


if __name__ == "__main__":
pytest.main(["-x", __file__])
6 changes: 3 additions & 3 deletions gtts/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
import pytest
from gtts.utils import _minimize, _len, _clean_tokens, _translate_url
from gtts.utils import _minimize, _clean_tokens, _translate_url

delim = " "
Lmax = 10
Expand Down Expand Up @@ -32,12 +32,12 @@ def test_startwith_delim():

def test_len_ascii():
text = "Bacon ipsum dolor sit amet flank corned beef."
assert _len(text) == 45
assert len(text) == 45


def test_len_unicode():
text = u"但在一个重要的任务上"
assert _len(text) == 10
assert len(text) == 10


def test_only_space_and_punc():
Expand Down
2 changes: 1 addition & 1 deletion gtts/tokenizer/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ class Tokenizer:
Args:
regex_funcs (list): List of compiled ``regex`` objects. Each
functions's pattern will be joined into a single pattern and
function's pattern will be joined into a single pattern and
compiled.
flags: ``re`` flag(s) to compile with the final regex. Defaults to
``re.IGNORECASE``
Expand Down
4 changes: 2 additions & 2 deletions gtts/tokenizer/pre_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
def tone_marks(text):
"""Add a space after tone-modifying punctuation.
Because the `tone_marks` tokenizer case will split after a tone-modidfying
Because the `tone_marks` tokenizer case will split after a tone-modifying
punctuation mark, make sure there's whitespace after.
"""
Expand All @@ -30,7 +30,7 @@ def end_of_line(text):

def abbreviations(text):
"""Remove periods after an abbreviation from a list of known
abbrevations that can be spoken the same without that period. This
abbreviations that can be spoken the same without that period. This
prevents having to handle tokenization of that period.
Note:
Expand Down
2 changes: 1 addition & 1 deletion gtts/tokenizer/tokenizer_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def period_comma():
def colon():
"""Colon case.
Match a colon ":" only if not preceeded by a digit.
Match a colon ":" only if not preceded by a digit.
Mainly to prevent a cut in the middle of time notations e.g. 10:01
"""
Expand Down
24 changes: 17 additions & 7 deletions gtts/tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from gtts.lang import _fallback_deprecated_lang, tts_langs
from gtts.tokenizer import Tokenizer, pre_processors, tokenizer_cases
from gtts.utils import _clean_tokens, _len, _minimize, _translate_url
from gtts.utils import _clean_tokens, _minimize, _translate_url

__all__ = ["gTTS", "gTTSError"]

Expand Down Expand Up @@ -50,7 +50,7 @@ class gTTS:
to catch a language error early. If set to ``True``,
a ``ValueError`` is raised if ``lang`` doesn't exist.
Setting ``lang_check`` to ``False`` skips Web requests
(to validate language) and therefore speeds up instanciation.
(to validate language) and therefore speeds up instantiation.
Default is ``True``.
pre_processor_funcs (list): A list of zero or more functions that are
called to transform (pre-process) text before tokenizing. Those
Expand All @@ -73,6 +73,10 @@ class gTTS:
tokenizer_cases.other_punctuation
]).run
timeout (float or tuple, optional): Seconds to wait for the server to
send data before giving up, as a float, or a ``(connect timeout,
read timeout)`` tuple. ``None`` will wait forever (default).
See Also:
:doc:`Pre-processing and tokenizing <tokenizer>`
Expand Down Expand Up @@ -116,6 +120,7 @@ def __init__(
tokenizer_cases.other_punctuation,
]
).run,
timeout=None,
):

# Debug
Expand Down Expand Up @@ -157,6 +162,8 @@ def __init__(
self.pre_processor_funcs = pre_processor_funcs
self.tokenizer_func = tokenizer_func

self.timeout = timeout

def _tokenize(self, text):
# Pre-clean
text = text.strip()
Expand All @@ -166,7 +173,7 @@ def _tokenize(self, text):
log.debug("pre-processing: %s", pp)
text = pp(text)

if _len(text) <= self.GOOGLE_TTS_MAX_CHARS:
if len(text) <= self.GOOGLE_TTS_MAX_CHARS:
return _clean_tokens([text])

# Tokenize
Expand All @@ -184,7 +191,7 @@ def _tokenize(self, text):
# Filter empty tokens, post-minimize
tokens = [t for t in min_tokens if t]

return min_tokens
return tokens

def _prepare_requests(self):
"""Created the TTS API the request(s) without sending them.
Expand Down Expand Up @@ -233,7 +240,7 @@ def get_bodies(self):
"""Get TTS API request bodies(s) that would be sent to the TTS API.
Returns:
list: A list of TTS API request bodiess to make.
list: A list of TTS API request bodies to make.
"""
return [pr.body for pr in self._prepare_requests()]

Expand All @@ -259,7 +266,10 @@ def stream(self):
with requests.Session() as s:
# Send request
r = s.send(
request=pr, proxies=urllib.request.getproxies(), verify=False
request=pr,
verify=False,
proxies=urllib.request.getproxies(),
timeout=self.timeout,
)

log.debug("headers-%i: %s", idx, r.request.headers)
Expand Down Expand Up @@ -372,6 +382,6 @@ def infer_msg(self, tts, rsp=None):
% self.tts.lang
)
elif status >= 500:
cause = "Uptream API error. Try again later."
cause = "Upstream API error. Try again later."

return "{}. Probable cause: {}".format(premise, cause)
22 changes: 2 additions & 20 deletions gtts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ def _minimize(the_string, delim, max_size):
# i.e. prevent a recursive infinite loop on `the_string[0:0]`
# if `the_string` starts with `delim` and is larger than `max_size`
if the_string.startswith(delim):
the_string = the_string[_len(delim) :]
the_string = the_string[len(delim):]

if _len(the_string) > max_size:
if len(the_string) > max_size:
try:
# Find the highest index of `delim` in `the_string[0:max_size]`
# i.e. `the_string` will be cut in half on `delim` index
Expand All @@ -53,24 +53,6 @@ def _minimize(the_string, delim, max_size):
return [the_string]


def _len(text):
"""Same as ``len(text)`` for a string but that decodes
``text`` first in Python 2.x
Args:
text (string): String to get the size of.
Returns:
int: The size of the string.
"""
try:
# Python 2
return len(unicode(text))
except NameError: # pragma: no cover
# Python 3
return len(text)


def _clean_tokens(tokens):
"""Clean a list of strings
Expand Down

0 comments on commit bcdb79d

Please sign in to comment.