Skip to content

Commit

Permalink
Add boilerplate support for VobSub tracks in MKV
Browse files Browse the repository at this point in the history
Signed-off-by: Ethan Dye <mrtops03@gmail.com>
  • Loading branch information
ecdye committed Oct 19, 2024
1 parent 7e8d34e commit 960d6c0
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 21 deletions.
1 change: 1 addition & 0 deletions Sources/macSubtitleOCR/MKV/MKVTrack.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ struct MKVTrack {
var trackNumber: Int
var codecId: String
var trackData: Data
var idxData: String? = nil

Check warning on line 15 in Sources/macSubtitleOCR/MKV/MKVTrack.swift

View workflow job for this annotation

GitHub Actions / Lint

Remove/insert redundant nil default value (Optional vars are nil by default). (redundantNilInit)

Check warning on line 15 in Sources/macSubtitleOCR/MKV/MKVTrack.swift

View workflow job for this annotation

GitHub Actions / Lint

Remove/insert redundant nil default value (Optional vars are nil by default). (redundantNilInit)
}
33 changes: 22 additions & 11 deletions Sources/macSubtitleOCR/MKV/MKVTrackParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class MKVTrackParser: MKVFileHandler {

// MARK: - Functions

func parseTracks(codec: String) throws {
func parseTracks(codec: [String]) throws {
guard findElement(withID: EBML.segmentID) as? (UInt64, UInt32) != nil else {
fatalError("Segment element not found in file: \(filePath)")
}
Expand All @@ -27,13 +27,18 @@ class MKVTrackParser: MKVFileHandler {

let endOfTracksOffset = fileHandle.offsetInFile + tracksSize

var trackNumbers = [Int]()
var trackNumbersPGS = [Int]()
var trackNumbersVobSub = [Int]()
while fileHandle.offsetInFile < endOfTracksOffset {
if let (elementID, elementSize) = tryParseElement() {
if elementID == EBML.trackEntryID {
logger.debug("Found TrackEntry element")
if let track = parseTrackEntry(codec: codec) {
trackNumbers.append(track)
if track.1 == "S_HDMV/PGS" {
trackNumbersPGS.append(track.0)
} else {
trackNumbersVobSub.append(track.0)
}
}
} else if elementID == EBML.chapters {
break
Expand All @@ -43,13 +48,19 @@ class MKVTrackParser: MKVFileHandler {
}
}

let trackData = extractTrackData(trackNumber: trackNumbers)
let trackData = extractTrackDataPGS(trackNumber: trackNumbersPGS)
trackData?.enumerated().forEach { index, data in
tracks.append(MKVTrack(trackNumber: index, codecId: codec, trackData: data))
tracks.append(MKVTrack(trackNumber: index, codecId: codec[0], trackData: data))
}
/*
let trackDataVobSub = extractTrackDataVobSub(trackNumber: trackNumbersVobSub)

Check warning on line 56 in Sources/macSubtitleOCR/MKV/MKVTrackParser.swift

View workflow job for this annotation

GitHub Actions / Lint

Indent code in accordance with the scope level. (indent)

Check warning on line 56 in Sources/macSubtitleOCR/MKV/MKVTrackParser.swift

View workflow job for this annotation

GitHub Actions / Lint

Indent code in accordance with the scope level. (indent)
trackDataVobSub?.enumerated().forEach { index, data in

Check warning on line 57 in Sources/macSubtitleOCR/MKV/MKVTrackParser.swift

View workflow job for this annotation

GitHub Actions / Lint

Indent code in accordance with the scope level. (indent)

Check warning on line 57 in Sources/macSubtitleOCR/MKV/MKVTrackParser.swift

View workflow job for this annotation

GitHub Actions / Lint

Indent code in accordance with the scope level. (indent)
tracks.append(MKVTrack(trackNumber: index, codecId: codec[1], trackData: data))

Check warning on line 58 in Sources/macSubtitleOCR/MKV/MKVTrackParser.swift

View workflow job for this annotation

GitHub Actions / Lint

Indent code in accordance with the scope level. (indent)

Check warning on line 58 in Sources/macSubtitleOCR/MKV/MKVTrackParser.swift

View workflow job for this annotation

GitHub Actions / Lint

Indent code in accordance with the scope level. (indent)
}

Check warning on line 59 in Sources/macSubtitleOCR/MKV/MKVTrackParser.swift

View workflow job for this annotation

GitHub Actions / Lint

Indent code in accordance with the scope level. (indent)

Check warning on line 59 in Sources/macSubtitleOCR/MKV/MKVTrackParser.swift

View workflow job for this annotation

GitHub Actions / Lint

Indent code in accordance with the scope level. (indent)
*/

Check warning on line 60 in Sources/macSubtitleOCR/MKV/MKVTrackParser.swift

View workflow job for this annotation

GitHub Actions / Lint

Indent code in accordance with the scope level. (indent)

Check warning on line 60 in Sources/macSubtitleOCR/MKV/MKVTrackParser.swift

View workflow job for this annotation

GitHub Actions / Lint

Indent code in accordance with the scope level. (indent)
}

func extractTrackData(trackNumber: [Int]) -> [Data]? {
func extractTrackDataPGS(trackNumber: [Int]) -> [Data]? {
fileHandle.seek(toFileOffset: 0)

// Step 1: Locate the Segment element
Expand All @@ -72,7 +83,7 @@ class MKVTrackParser: MKVFileHandler {
}

// Step 4: Parse Blocks (SimpleBlock or Block) within each Cluster
parseBlocks(
parseBlocksPGS(
within: clusterEndOffset,
trackNumber: trackNumber,
clusterTimestamp: clusterTimestamp,
Expand All @@ -84,7 +95,7 @@ class MKVTrackParser: MKVFileHandler {

// MARK: - Methods

private func parseTrackEntry(codec: String) -> Int? {
private func parseTrackEntry(codec: [String]) -> (Int, String)? {
var trackNumber: Int?
var trackType: UInt8?
var codecId: String?
Expand All @@ -109,8 +120,8 @@ class MKVTrackParser: MKVFileHandler {
}

if let trackNumber, let codecId {
if codecId == codec {
return trackNumber
if codec.contains(codecId) {
return (trackNumber, codecId)
}
}
return nil
Expand All @@ -123,7 +134,7 @@ class MKVTrackParser: MKVFileHandler {
return nil
}

private func parseBlocks(within clusterEndOffset: UInt64, trackNumber: [Int], clusterTimestamp: Int64,
private func parseBlocksPGS(within clusterEndOffset: UInt64, trackNumber: [Int], clusterTimestamp: Int64,
trackData: inout [Data]) {

Check warning on line 138 in Sources/macSubtitleOCR/MKV/MKVTrackParser.swift

View workflow job for this annotation

GitHub Actions / Lint

Wrap lines that exceed the specified maximum width. (wrap)

Check warning on line 138 in Sources/macSubtitleOCR/MKV/MKVTrackParser.swift

View workflow job for this annotation

GitHub Actions / Lint

Wrap lines that exceed the specified maximum width. (wrap)
while fileHandle.offsetInFile < clusterEndOffset {
// swiftformat:disable:next redundantSelf
Expand Down
9 changes: 7 additions & 2 deletions Sources/macSubtitleOCR/Subtitles/VobSub/VobSub.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,16 @@ struct VobSub {
let subData = try subFile.readToEnd()!
subFile.closeFile()
let idx = VobSubIDX(URL(filePath: idx))
subData.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in
extractSubtitleImages(buffer: pointer, idx: idx)
subData.withUnsafeBytes { (buffer: UnsafeRawBufferPointer) in
extractSubtitleImages(buffer: buffer, idx: idx)
}
}

init(_ buffer: UnsafeRawBufferPointer, _ idxData: String) throws {
let idx = VobSubIDX(idxData)
extractSubtitleImages(buffer: buffer, idx: idx)
}

// MARK: - Methods

private mutating func extractSubtitleImages(buffer: UnsafeRawBufferPointer, idx: VobSubIDX) {
Expand Down
8 changes: 8 additions & 0 deletions Sources/macSubtitleOCR/Subtitles/VobSub/VobSubIDX.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ struct VobSubIDX {
}
}

init(_ idxData: String) {
do {
try parseIdxFile(idxData: idxData)
} catch {
fatalError("Failed to parse IDX file: \(error)")
}
}

// MARK: - Methods

private mutating func parseIdxFile(idxData: String) throws {
Expand Down
25 changes: 17 additions & 8 deletions Sources/macSubtitleOCR/macSubtitleOCR.swift
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ struct macSubtitleOCR: AsyncParsableCommand {
results.append(result)
} else if input.hasSuffix(".mkv") {
let mkvStream = MKVSubtitleExtractor(filePath: input)
try mkvStream.parseTracks(codec: "S_HDMV/PGS")
try mkvStream.parseTracks(codec: ["S_HDMV/PGS", "S_VOBSUB"])
for track in mkvStream.tracks {
logger.debug("Found subtitle track: \(track.trackNumber), Codec: \(track.codecId)")
if experimentalOptions.saveSubtitleFile {
Expand All @@ -104,13 +104,22 @@ struct macSubtitleOCR: AsyncParsableCommand {
outputDirectory: URL(fileURLWithPath: outputDirectory))
}

// Open the PGS data stream
let pgs: PGS = try mkvStream.tracks[track.trackNumber].trackData
.withUnsafeBytes { (buffer: UnsafeRawBufferPointer) in
try PGS(buffer)
}
let result = try await processSubtitle(pgs.subtitles, trackNumber: track.trackNumber)
results.append(result)

Check warning on line 107 in Sources/macSubtitleOCR/macSubtitleOCR.swift

View workflow job for this annotation

GitHub Actions / Lint

Replace consecutive blank lines with a single blank line. (consecutiveBlankLines)

Check warning on line 107 in Sources/macSubtitleOCR/macSubtitleOCR.swift

View workflow job for this annotation

GitHub Actions / Lint

Replace consecutive blank lines with a single blank line. (consecutiveBlankLines)
if track.codecId == "S_HDMV/PGS" {
let pgs: PGS = try track.trackData
.withUnsafeBytes { (buffer: UnsafeRawBufferPointer) in
try PGS(buffer)
}
let result = try await processSubtitle(pgs.subtitles, trackNumber: track.trackNumber)
results.append(result)
} else if track.codecId == "S_VOBSUB" {
let vobSub: VobSub = try track.trackData
.withUnsafeBytes { (buffer: UnsafeRawBufferPointer) in
try VobSub(buffer, track.idxData ?? "")
}
let result = try await processSubtitle(vobSub.subtitles, trackNumber: track.trackNumber)
results.append(result)
}
}
} else if input.hasSuffix(".sup") {
// Open the PGS data stream
Expand Down

0 comments on commit 960d6c0

Please sign in to comment.