Skip to content

Commit

Permalink
Make DASH WebVTT timestamp fix forced
Browse files Browse the repository at this point in the history
  • Loading branch information
Shivelight authored and rlaphoenix committed May 6, 2024
1 parent 4b2fa8d commit 9dd6750
Showing 1 changed file with 14 additions and 34 deletions.
48 changes: 14 additions & 34 deletions devine/core/tracks/subtitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,18 @@ def download(
self.convert(Subtitle.Codec.TimedTextMarkupLang)
elif self.codec == Subtitle.Codec.fVTT:
self.convert(Subtitle.Codec.WebVTT)
elif self.codec == Subtitle.Codec.WebVTT:
text = self.path.read_text("utf8")
if self.descriptor == Track.Descriptor.DASH:
text = fix_webvtt_timestamp(
text,
segment_duration=self.data["dash"]["segment_durations"],
timescale=self.data["dash"]["timescale"]
)
caption_set = pycaption.WebVTTReader().read(text)
Subtitle.merge_same_cues(caption_set)
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
self.path.write_text(subtitle_text, encoding="utf8")

def convert(self, codec: Subtitle.Codec) -> Path:
"""
Expand Down Expand Up @@ -266,6 +278,7 @@ def convert(self, codec: Subtitle.Codec) -> Path:

caption_set = self.parse(self.path.read_bytes(), self.codec)
Subtitle.merge_same_cues(caption_set)

subtitle_text = writer().write(caption_set)

output_path.write_text(subtitle_text, encoding="utf8")
Expand All @@ -280,6 +293,7 @@ def convert(self, codec: Subtitle.Codec) -> Path:

@staticmethod
def parse(data: bytes, codec: Subtitle.Codec) -> pycaption.CaptionSet:
# TODO: Use an "enum" for subtitle codecs
if not isinstance(data, bytes):
raise ValueError(f"Subtitle data must be parsed as bytes data, not {type(data).__name__}")

Expand Down Expand Up @@ -575,39 +589,5 @@ def reverse_rtl(self) -> None:
stdout=subprocess.DEVNULL
)

def fix_webvtt_timestamp(self) -> None:
# TODO PR#67 rlaphoenix: This func name clashes with the import from newly added utils.webvtt
"""
Convert segmented WebVTT timestamps where each cue starts at 0 (relative to the segment)
to absolute timestamps.
This function is not called by default; instead, service code should explicitly call
this function when needed. Example using a callback::
if isinstance(track, Subtitle):
track.OnDownloaded = lambda track: track.fix_webvtt_timestamp()
"""
if not self.path or not self.path.exists():
raise ValueError("You must download the subtitle track first.")

if self.codec is not Subtitle.Codec.WebVTT:
raise ValueError(f"Expected subtitle codec to be a {Subtitle.Codec.WebVTT}, not {self.codec}.")

if self.descriptor is Subtitle.Descriptor.MPD:
segment_duration = self.data["dash"]["_segment_duration"]
timescale = self.data["dash"]["_timescale"]
elif self.descriptor is Subtitle.Descriptor.M3U:
segment_duration = None
timescale = 1
else:
return

text = Subtitle.space_webvtt_headers(self.path.read_text("utf8"))
fixed = fix_webvtt_timestamp(
text, segment_duration=segment_duration, timescale=timescale
)

self.path.write_text(fixed, "utf8")

__all__ = ("Subtitle",)

0 comments on commit 9dd6750

Please sign in to comment.