Skip to content

Commit

Permalink
subber: further refine cleaner & srt parser
Browse files Browse the repository at this point in the history
  • Loading branch information
martinlindhe committed Oct 11, 2024
1 parent 57da01c commit 3d9988f
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 11 deletions.
15 changes: 8 additions & 7 deletions cleaner.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ var (
"encoded by",
"downloaded from",
"web-dl",
"subscene",
"subscene", "podnapisi", "bokutox", "team nanban",
"fury_don@hotmail.com",
"broadcasttext",
"seriessub", "subtitlesource",
"addic7ed", "addicted.com", "vaioholics",
Expand All @@ -51,18 +52,18 @@ var (
"cssubs", "tvsub", "uksubtitles",
"ragbear.com", "ydy.com", "yyets.net", "indivx.net", "sub-way.fr",
"forom.com", "forom. com", "facebook.com", "hdvietnam.com", "sapo.pt", "softhome.net",
"americascardroom.com",
"@gmail.com", "@hotmail.com", "@hotmail.fr",
"napisy.org", "1000fr.com", "yts.mx",
"americascardroom.com", "subti.com", "tugazx", "pirata-tuga",
"napisy.org", "1000fr.com", "yts.mx", "yts.am", "yts.ag", "yts.lt",
"opensubtitles", "open subtitles", "s u b t i t l e",
"sous-titres.eu", "300mbfilms.com", "put.io", "subtitulos.es", "osdb.link", "300mbunited",
"simail.si", "sf-film.dk", "sf.net", "vitac.com", "rapidpremium", "psarips",
"yify-torrents", "yify torrents", "yify movies",
"thepiratebay", "anoxmous", "verdikt", "la fisher team", "red bee media",
"mkv player", "best watched using", "advertise your product", "remove all ads",
"mkv player", "mkv-potplayer-vlc", "best watched using", "best play with",
"advertise your product", "remove all ads",
"memoryonsmells", "1st-booking",
":[gwc]:", "ripped by", "ripped with subrip", "titra film",
"hiqve",
":[gwc]:", "ripped with subrip", "titra film",
"hiqve", "kentir.bb", "w-bb.org", "sub download",
"trimark home video",

// swedish:
Expand Down
3 changes: 0 additions & 3 deletions cmd/subber/subber.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ func parseAndWriteSubFile(inFileName, outFileName string, filterName string, kee
if err != nil {
return err
}
if len(data) == 0 {
return fmt.Errorf("input file is empty")
}

out, err := cleanupSub(data, filterName, keepAds, sync, inFileName)
if err != nil {
Expand Down
6 changes: 6 additions & 0 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import (

// Parse tries to parse a subtitle
func Parse(b []byte) (Subtitle, error) {
if len(b) <= 10 {
return Subtitle{}, fmt.Errorf("parse: empty input")
}
s := ConvertToUTF8(b)
if looksLikeCCDBCapture(s) {
return NewFromCCDBCapture(s)
Expand All @@ -29,6 +32,9 @@ func LooksLikeTextSubtitle(filename string) bool {
if err != nil {
log.Fatal(err)
}
if len(data) <= 10 {
log.Fatal(fmt.Errorf("parse: empty input in '%s'", filename))
}
s := ConvertToUTF8(data)
return looksLikeCCDBCapture(s) || looksLikeSSA(s) || looksLikeDCSub(s) || looksLikeSRT(s) || looksLikeVTT(s)
}
2 changes: 1 addition & 1 deletion srt.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func init() {

func looksLikeSRT(s string) bool {
s = strings.TrimSpace(s)
return strings.HasPrefix(s, "0\n") || strings.HasPrefix(s, "0\r\n") || strings.HasPrefix(s, "1\n") || strings.HasPrefix(s, "1\r\n") || strings.HasPrefix(s, "2\n") || strings.HasPrefix(s, "2\r\n") || strings.HasPrefix(s, "3\n") || strings.HasPrefix(s, "3\r\n")
return strings.HasPrefix(s, "0\n") || strings.HasPrefix(s, "0\r\n") || strings.HasPrefix(s, "1\n") || strings.HasPrefix(s, "1\r\n") || strings.HasPrefix(s, "2\n") || strings.HasPrefix(s, "2\r\n") || strings.HasPrefix(s, "3\n") || strings.HasPrefix(s, "3\r\n") || strings.HasPrefix(s, "4\n") || strings.HasPrefix(s, "4\r\n")
}

// NewFromSRT parses a .srt text into Subtitle, assumes s is a clean utf8 string
Expand Down

0 comments on commit 3d9988f

Please sign in to comment.