Skip to content

Commit

Permalink
Add pointer usage to PGS for performance
Browse files Browse the repository at this point in the history
Signed-off-by: Ethan Dye <mrtops03@gmail.com>
  • Loading branch information
ecdye committed Oct 18, 2024
1 parent 5513f53 commit fd44f71
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 60 deletions.
73 changes: 35 additions & 38 deletions Sources/macSubtitleOCR/Subtitles/PGS/PGS.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
// Copyright © 2024 Ethan Dye. All rights reserved.
//

import CoreGraphics
import Foundation
import ImageIO
import os

struct PGS {
Expand All @@ -29,96 +27,95 @@ struct PGS {
fatalError("Failed to read file data from: \(url.path)")
}
fileHandle.closeFile()
try data.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in
try parseData(pointer)
}

try parseData()
// try parseData()
}

init(_ data: Data) throws {
self.data = data
try parseData()
init(_ pointer: UnsafeRawBufferPointer) throws {
self.data = Data()
try parseData(pointer)
}

// MARK: - Methods

private mutating func parseData() throws {
var headerData = data.extractBytes(pgsHeaderLength)
while data.count > 0 {
guard let subtitle = try parseNextSubtitle(headerData: &headerData)
private mutating func parseData(_ pointer: UnsafeRawBufferPointer) throws {
var offset = 0
while offset + pgsHeaderLength < pointer.count {
guard let subtitle = try parseNextSubtitle(pointer, &offset)
else {
if data.count < pgsHeaderLength { break }
headerData = data.extractBytes(pgsHeaderLength)
if offset + pgsHeaderLength > pointer.count { break }
continue
}

// Find the next timestamp to use as our end timestamp
while subtitle.endTimestamp == nil {
headerData = data.extractBytes(pgsHeaderLength)
subtitle.endTimestamp = parseTimestamp(headerData)
}
subtitle.endTimestamp = parseTimestamp(pointer.loadUnaligned(fromByteOffset: offset + 2, as: UInt32.self).bigEndian)

subtitles.append(subtitle)
}
}

private func parseTimestamp(_ data: Data) -> TimeInterval {
let pts = data.value(ofType: UInt32.self, at: 2)!
return TimeInterval(pts) / 90000.0 // 90 kHz clock
private func parseTimestamp(_ timestamp: UInt32) -> TimeInterval {
return TimeInterval(timestamp) / 90000 // 90 kHz clock
}

private mutating func parseNextSubtitle(headerData: inout Data) throws -> Subtitle? {
private mutating func parseNextSubtitle(_ pointer: UnsafeRawBufferPointer, _ offset: inout Int) throws -> Subtitle? {
var multipleODS = false
var ods: ODS?
var pds: PDS?

while true {
guard headerData.count == pgsHeaderLength else {
fatalError("Failed to read PGS header correctly, got header length: \(headerData.count)/\(pgsHeaderLength)")
guard offset + pgsHeaderLength < pointer.count else {
return nil // End of data
}

let segmentType = headerData[10]
let segmentLength = Int(headerData.value(ofType: UInt16.self, at: 11)!)
let segmentType = pointer[offset + 10]
let segmentLength = Int(pointer.loadUnaligned(fromByteOffset: offset + 11, as: UInt16.self).bigEndian)
let startTimestamp = parseTimestamp(pointer.loadUnaligned(fromByteOffset: offset + 2, as: UInt32.self).bigEndian)
offset += pgsHeaderLength

// Check for the end of the subtitle stream (0x80 segment type and 0 length)
guard segmentType != 0x80, segmentLength != 0 else { return nil }

// Read the rest of the segment
let segmentData = data.extractBytes(segmentLength)
guard segmentData.count == segmentLength else {
fatalError("Failed to read the full segment data, got: \(segmentData.count)/\(segmentLength)")
}

// Parse the segment based on the type (0x14 for PCS, 0x15 for WDS, 0x16 for PDS, 0x17 for ODS)
switch segmentType {
case 0x14: // PDS (Palette Definition Segment)
do {
pds = try PDS(segmentData)
pds = try PDS(pointer, offset, segmentLength)
offset += segmentLength
} catch let macSubtitleOCRError.invalidPDSDataLength(length) {
fatalError("Invalid Palette Data Segment length: \(length)")
}
case 0x15: // ODS (Object Definition Segment)
do {
if segmentData[3] == 0x80 {
ods = try ODS(segmentData)
if pointer[offset + 3] == 0x80 {
ods = try ODS(pointer, offset, segmentLength)
offset += segmentLength
multipleODS = true
break
} else if multipleODS {
try ods?.appendSegment(segmentData)
if segmentData[3] != 0x40 { break }
try ods?.appendSegment(pointer, offset, segmentLength)
if pointer[offset + 3] != 0x40 { break }
offset += segmentLength
} else {
ods = try ODS(segmentData)
ods = try ODS(pointer, offset, segmentLength)
offset += segmentLength
}
} catch let macSubtitleOCRError.invalidODSDataLength(length) {
fatalError("Invalid Object Data Segment length: \(length)")
}
case 0x16, 0x17: // PCS (Presentation Composition Segment), WDS (Window Definition Segment)
offset += segmentLength
break // PCS and WDS parsing not required for basic rendering
default:
logger.warning("Unknown segment type: \(segmentType, format: .hex), skipping...")
offset += segmentLength
return nil
}
headerData = data.extractBytes(pgsHeaderLength)
guard let pds, let ods else { continue }
let startTimestamp = parseTimestamp(headerData)
offset += pgsHeaderLength // Skip the end segment
return Subtitle(
index: subtitles.count + 1,
startTimestamp: startTimestamp,
Expand Down
26 changes: 13 additions & 13 deletions Sources/macSubtitleOCR/Subtitles/PGS/Parsers/ODS.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ struct ODS {

// MARK: - Lifecycle

init(_ data: Data) throws {
try parseODS(data)
init(_ data: UnsafeRawBufferPointer, _ offset: Int, _ segmentLength: Int) throws {
try parseODS(data, offset, segmentLength)
}

mutating func appendSegment(_ data: Data) throws {
try parseODS(data)
mutating func appendSegment(_ data: UnsafeRawBufferPointer, _ offset: Int, _ segmentLength: Int) throws {
try parseODS(data, offset, segmentLength)
}

// MARK: - Methods
Expand All @@ -39,26 +39,26 @@ struct ODS {
// 2 bytes: Object width
// 2 bytes: Object height
// Rest: Image data (run-length encoded, RLE)
private mutating func parseODS(_ data: Data) throws {
let sequenceFlag = data[3]
private mutating func parseODS(_ data: UnsafeRawBufferPointer, _ offset: Int, _ segmentLength: Int) throws {
let sequenceFlag = data[offset + 3]
if sequenceFlag != 0x40 {
objectWidth = Int(data.value(ofType: UInt16.self, at: 7) ?? 0)
objectHeight = Int(data.value(ofType: UInt16.self, at: 9) ?? 0)
objectWidth = Int(data.loadUnaligned(fromByteOffset: offset + 7, as: UInt16.self).bigEndian)
objectHeight = Int(data.loadUnaligned(fromByteOffset: offset + 9, as: UInt16.self).bigEndian)
}

// PGS includes the width and height as part of the image data length calculations
guard data.count > 7 else {
throw macSubtitleOCRError.invalidODSDataLength(length: data.count)
guard data.count - offset > 7 else {
throw macSubtitleOCRError.invalidODSDataLength(length: data.count - offset)
}

switch sequenceFlag {
case 0x40:
rawImageData.append(data[4...])
rawImageData.append(contentsOf: data[(offset + 4)..<(offset + segmentLength)])
imageData = decodeRLEData()
case 0x80:
rawImageData.append(data[11...])
rawImageData.append(contentsOf: data[(offset + 11)..<(offset + segmentLength)])
default:
rawImageData.append(data[11...])
rawImageData.append(contentsOf: data[(offset + 11)..<(offset + segmentLength)])
imageData = decodeRLEData()
}
}
Expand Down
15 changes: 8 additions & 7 deletions Sources/macSubtitleOCR/Subtitles/PGS/Parsers/PDS.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ struct PDS {

// MARK: - Lifecycle

init(_ data: Data) throws {
guard data.count >= 7, (data.count - 2) % 5 == 0 else {
throw macSubtitleOCRError.invalidPDSDataLength(length: data.count)
init(_ data: UnsafeRawBufferPointer, _ offset: Int, _ segmentLength: Int) throws {
let count = data.count - offset
guard count >= 7, (segmentLength - 2) % 5 == 0 else {
throw macSubtitleOCRError.invalidPDSDataLength(length: count)
}
parsePDS(data.advanced(by: 2))
parsePDS(data, offset, segmentLength)
}

// MARK: - Methods
Expand All @@ -32,10 +33,10 @@ struct PDS {
// 1 byte: Palette Version (unused by us)
// Followed by a series of palette entries:
// Each entry is 5 bytes: (Index, Y, Cr, Cb, Alpha)
private mutating func parsePDS(_ data: Data) {
private mutating func parsePDS(_ data: UnsafeRawBufferPointer, _ offset: Int, _ segmentLength: Int) {
// Start reading after the first 2 bytes (Palette ID and Version)
var i = 0
while i + 4 <= data.count {
var i = offset + 2
while i + 4 <= (offset + segmentLength) {
let index = data[i]
let y = data[i + 1]
let cr = data[i + 2]
Expand Down
6 changes: 4 additions & 2 deletions Sources/macSubtitleOCR/macSubtitleOCR.swift
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,10 @@ struct macSubtitleOCR: AsyncParsableCommand {
}

// Open the PGS data stream
let PGS = try PGS(mkvStream.tracks[track.trackNumber].trackData)
let result = try await processSubtitle(PGS.subtitles, trackNumber: track.trackNumber)
let pgs: PGS = try mkvStream.tracks[track.trackNumber].trackData.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in
try PGS(pointer)
}
let result = try await processSubtitle(pgs.subtitles, trackNumber: track.trackNumber)
results.append(result)
}
} else if input.hasSuffix(".sup") {
Expand Down

0 comments on commit fd44f71

Please sign in to comment.