Skip to content

Commit

Permalink
feat: add vad filter flag
Browse files Browse the repository at this point in the history
See #3
  • Loading branch information
geekodour committed Nov 9, 2023
1 parent b028255 commit 205bb19
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
7 changes: 6 additions & 1 deletion src/wscribe/backends/fasterwhisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ def load(self) -> None:
)

def transcribe(
self, input: np.ndarray, language: Optional[str] = None, silent: bool = False
self,
input: np.ndarray,
language: Optional[str] = None,
silent: bool = False,
vad: bool = False,
) -> list[TranscribedData]:
"""
Return word level transcription data.
Expand All @@ -50,6 +54,7 @@ def transcribe(
beam_size=DEFAULT_BEAM,
word_timestamps=True,
language=language,
vad_filter=vad,
)
# ps = playback seconds
with tqdm(
Expand Down
13 changes: 11 additions & 2 deletions src/wscribe/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,16 @@ def cli():
@click.option("-d", "--debug", help="show debug logs", default=False, is_flag=True)
@click.option("-s", "--stats", help="print stats", default=False, is_flag=True)
@click.option("-q", "--quiet", help="no progress bar", default=False, is_flag=True)
def transcribe(source, destination, format, model, gpu, language, debug, stats, quiet):
@click.option(
"-v",
"--vad",
help="use vad filter(better results, slower)",
default=False,
is_flag=True,
)
def transcribe(
source, destination, format, model, gpu, language, debug, stats, quiet, vad
):
"""
Transcribes SOURCE to DESTINATION. Where SOURCE can be local path to an audio/video file and
DESTINATION needs to be a local path to a non-existing file.
Expand All @@ -76,7 +85,7 @@ def transcribe(source, destination, format, model, gpu, language, debug, stats,
audio_end_time = time.perf_counter()

ts_start_time = time.perf_counter()
result = m.transcribe(input=audio, language=language, silent=quiet)
result = m.transcribe(input=audio, language=language, silent=quiet, vad=vad)
ts_end_time = time.perf_counter()

writer = WRITERS[format](result=result, destination=destination)
Expand Down

0 comments on commit 205bb19

Please sign in to comment.