Skip to content

Commit

Permalink
Create NON_PRINTABLE regex at runtime
Browse files Browse the repository at this point in the history
  • Loading branch information
therve committed Mar 3, 2021
1 parent ee37f46 commit f84086e
Showing 1 changed file with 14 additions and 9 deletions.
23 changes: 14 additions & 9 deletions lib/yaml/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def __init__(self, stream):
self.index = 0
self.line = 0
self.column = 0
self._non_printable = None
if isinstance(stream, unicode):
self.name = "<unicode string>"
self.check_printable(stream)
Expand Down Expand Up @@ -136,15 +137,19 @@ def determine_encoding(self):
self.encoding = 'utf-8'
self.update(1)

if has_ucs4:
NON_PRINTABLE = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]'
elif sys.platform.startswith('java'):
# Jython doesn't support lone surrogates https://bugs.jython.org/issue2048
NON_PRINTABLE = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]'
else:
# Need to use eval here due to the above Jython issue
NON_PRINTABLE = eval(r"u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uFFFD]|(?:^|[^\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?:[^\uDC00-\uDFFF]|$)'")
NON_PRINTABLE = re.compile(NON_PRINTABLE)
@property
def NON_PRINTABLE(self):
if self._non_printable is None:
if has_ucs4:
non_printable = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]'
elif sys.platform.startswith('java'):
# Jython doesn't support lone surrogates https://bugs.jython.org/issue2048
non_printable = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]'
else:
# Need to use eval here due to the above Jython issue
non_printable = eval(r"u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uFFFD]|(?:^|[^\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?:[^\uDC00-\uDFFF]|$)'")
self._non_printable = re.compile(non_printable)

def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
Expand Down

0 comments on commit f84086e

Please sign in to comment.