Make DASH WebVTT timestamp fix forced

devine-dl · May 6, 2024 · 9dd6750 · 9dd6750
1 parent 4b2fa8d
commit 9dd6750
Showing 1 changed file with 14 additions and 34 deletions.
diff --git a/devine/core/tracks/subtitle.py b/devine/core/tracks/subtitle.py
@@ -203,6 +203,18 @@ def download(
             self.convert(Subtitle.Codec.TimedTextMarkupLang)
         elif self.codec == Subtitle.Codec.fVTT:
             self.convert(Subtitle.Codec.WebVTT)
+        elif self.codec == Subtitle.Codec.WebVTT:
+            text = self.path.read_text("utf8")
+            if self.descriptor == Track.Descriptor.DASH:
+                text = fix_webvtt_timestamp(
+                    text,
+                    segment_duration=self.data["dash"]["segment_durations"],
+                    timescale=self.data["dash"]["timescale"]
+                )
+            caption_set = pycaption.WebVTTReader().read(text)
+            Subtitle.merge_same_cues(caption_set)
+            subtitle_text = pycaption.WebVTTWriter().write(caption_set)
+            self.path.write_text(subtitle_text, encoding="utf8")
 
     def convert(self, codec: Subtitle.Codec) -> Path:
         """
@@ -266,6 +278,7 @@ def convert(self, codec: Subtitle.Codec) -> Path:
 
             caption_set = self.parse(self.path.read_bytes(), self.codec)
             Subtitle.merge_same_cues(caption_set)
+
             subtitle_text = writer().write(caption_set)
 
             output_path.write_text(subtitle_text, encoding="utf8")
@@ -280,6 +293,7 @@ def convert(self, codec: Subtitle.Codec) -> Path:
 
     @staticmethod
     def parse(data: bytes, codec: Subtitle.Codec) -> pycaption.CaptionSet:
+        # TODO: Use an "enum" for subtitle codecs
         if not isinstance(data, bytes):
             raise ValueError(f"Subtitle data must be parsed as bytes data, not {type(data).__name__}")
 
@@ -575,39 +589,5 @@ def reverse_rtl(self) -> None:
             stdout=subprocess.DEVNULL
         )
 
-    def fix_webvtt_timestamp(self) -> None:
-        # TODO PR#67 rlaphoenix: This func name clashes with the import from newly added utils.webvtt
-        """
-        Convert segmented WebVTT timestamps where each cue starts at 0 (relative to the segment)
-        to absolute timestamps.
-
-        This function is not called by default; instead, service code should explicitly call
-        this function when needed. Example using a callback::
-
-            if isinstance(track, Subtitle):
-                track.OnDownloaded = lambda track: track.fix_webvtt_timestamp()
-
-        """
-        if not self.path or not self.path.exists():
-            raise ValueError("You must download the subtitle track first.")
-
-        if self.codec is not Subtitle.Codec.WebVTT:
-            raise ValueError(f"Expected subtitle codec to be a {Subtitle.Codec.WebVTT}, not {self.codec}.")
-
-        if self.descriptor is Subtitle.Descriptor.MPD:
-            segment_duration = self.data["dash"]["_segment_duration"]
-            timescale = self.data["dash"]["_timescale"]
-        elif self.descriptor is Subtitle.Descriptor.M3U:
-            segment_duration = None
-            timescale = 1
-        else:
-            return
-
-        text = Subtitle.space_webvtt_headers(self.path.read_text("utf8"))
-        fixed = fix_webvtt_timestamp(
-            text, segment_duration=segment_duration, timescale=timescale
-        )
-
-        self.path.write_text(fixed, "utf8")
 
 __all__ = ("Subtitle",)