Skip to content

Commit

Permalink
Merge pull request #1 from FredBill1/fix-timing-overlap
Browse files Browse the repository at this point in the history
fix timing overlap issue (m-bain#816)
  • Loading branch information
tylerjthomas9 authored Jul 2, 2024
2 parents f2da2f8 + faff50a commit a832332
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions whisperx/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,15 +278,19 @@ def iterate_subtitles():
yield subtitle, times

if "words" in result["segments"][0]:
for subtitle, _ in iterate_subtitles():
sstart, ssend, speaker = _[0]
for subtitle, times in iterate_subtitles():
# TODO: handle multiple segments with different start/end times and speakers
sstart, send, speaker = times[0]
has_timing = any(["start" in timing for timing in subtitle])
if has_timing:
sstart = next(timing["start"] for timing in subtitle if "start" in timing)
send = next(timing["end"] for timing in reversed(subtitle) if "end" in timing)
subtitle_start = self.format_timestamp(sstart)
subtitle_end = self.format_timestamp(ssend)
subtitle_end = self.format_timestamp(send)
if result["language"] in LANGUAGES_WITHOUT_SPACES:
subtitle_text = "".join([word["word"] for word in subtitle])
else:
subtitle_text = " ".join([word["word"] for word in subtitle])
has_timing = any(["start" in word for word in subtitle])

# add [$SPEAKER_ID]: to each subtitle if speaker is available
prefix = ""
Expand Down

0 comments on commit a832332

Please sign in to comment.