From 8f941923f8b6d1e9995742e298e5e1b309321e81 Mon Sep 17 00:00:00 2001 From: Jong Wook Kim Date: Tue, 17 Jan 2023 23:28:36 -0800 Subject: [PATCH] print '?' if a letter can't be encoded using the system default encoding (#859) --- whisper/transcribe.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/whisper/transcribe.py b/whisper/transcribe.py index a0542943a..6ff909f3b 100644 --- a/whisper/transcribe.py +++ b/whisper/transcribe.py @@ -1,5 +1,6 @@ import argparse import os +import sys import warnings from typing import List, Optional, Tuple, Union, TYPE_CHECKING @@ -167,7 +168,10 @@ def add_segment( ) if verbose: - print(f"[{format_timestamp(start)} --> {format_timestamp(end)}] {text}") + line = f"[{format_timestamp(start)} --> {format_timestamp(end)}] {text}\n" + # compared to just `print(line)`, this replaces any character not representable using + # the system default encoding with an '?', avoiding UnicodeEncodeError. + sys.stderr.buffer.write(line.encode(sys.getdefaultencoding(), errors="replace")) # show the progress bar when verbose is False (otherwise the transcribed text will be printed) num_frames = mel.shape[-1]