Skip to content

Commit

Permalink
version 1.5.0 ( updated 02.06.2024 )
Browse files Browse the repository at this point in the history
+ Added
	- Game groups: HHT, PLAYMAGiC
	- Consoles: POCKET, JAG
	- Test cases
	- Release Country parsing (TV Shows)
~ Improved:
	- Release title parsing: Added '&' to regex
	- Release type parsing: check for season and episode if release is from streaming service
	- Group name parsing with brackets
- Fixed:
	- Incorrect season parsing for some release titles
  • Loading branch information
pr0pz committed Jun 2, 2024
1 parent 6b7f385 commit 1cef243
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 11 deletions.
40 changes: 39 additions & 1 deletion ReleaseParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
*
* @package ReleaseParser
* @author Wellington Estevo
* @version 1.4.4
* @version 1.5.0
*/

class ReleaseParser extends ReleasePatterns
Expand All @@ -36,6 +36,7 @@ class ReleaseParser extends ReleasePatterns
'os' => \null, // For Software/Game rls
'version' => \null, // For Software/Game rls
'language' => \null, // Array with language code as key and name as value (in english)
'country' => \null, // Release country
'type' => \null,
];

Expand Down Expand Up @@ -78,6 +79,7 @@ public function __construct( string $release_name, string $section = '' )

$this->parseType( $section );
$this->parseTitle(); // Title and extra title
$this->parseCountry(); // Parses Release country
$this->cleanupAttributes(); // Clean up unneeded and falsely parsed attributes
}

Expand Down Expand Up @@ -746,6 +748,34 @@ private function parseEpisode()
}
}


/**
* Parses Release country and strips it from title.
*
* @return void
*/
private function parseCountry()
{
if ( !\strtolower( $this->get( 'type' ) ) === 'tv' ) return;

$title_words = \explode( ' ', $this->get( 'title' ) );
$last_element = array_key_last( $title_words );
$countries = '/^(US|UK|NZ|AU|CA|BE)$/i';
$invalid_words_before = '/^(the|of|with|and|between|to)$/i';

if ( $last_element === 0 ) return;

if (
preg_match( $countries, $title_words[ $last_element ] ) &&
!preg_match( $invalid_words_before, $title_words[ $last_element - 1 ] )
)
{
$this->set( 'country', $title_words[ $last_element ] );
unset( $title_words[ $last_element ] );
$this->set( 'title', join( ' ', $title_words ) );
}
}


/**
* Parse Bookware type.
Expand Down Expand Up @@ -969,6 +999,14 @@ private function guessTypeByParsedAttributes(): string
{
$type = 'MusicVideo';
}
// Probably movie if not episode and season given
else if (
empty( $this->get( 'episode') ) &&
empty( $this->get( 'season') )
)
{
$type = 'Movie';
}
}
// Description with date inside brackets is nearly always music or musicvideo
else if ( \preg_match( self::REGEX_DATE_MUSIC, $this->get( 'release' ) ) )
Expand Down
21 changes: 18 additions & 3 deletions ReleaseParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
*
* @package ReleaseParser
* @author Wellington Estevo
* @version 1.4.4
* @version 1.5.0
*/

/**
Expand Down Expand Up @@ -192,6 +192,21 @@ function release_parser_test()
new ReleaseParser( 'Der.Herr.Der.Ringe.Die.Gefaehrten.SPECIAL.EXTENDED.EDITION.2001.German.DL.1080p.BluRay.AVC.READ.NFO.PROPER-AVCBD', 'BLURAY-AVC' ),
'Title: Der Herr Der Ringe Die Gefaehrten / Group: AVCBD / Year: 2001 / Flags: Extended, Proper, READNFO, Special Edition / Source: Bluray / Format: AVC / Resolution: 1080p / Language: German, Multilingual / Type: Movie'
],
[
'Movies #13 - Special char in title',
new ReleaseParser( 'Hansel.&.Gretel.Witch.Hunters..2013.DiRFiX.NTSC.MULTi.DVDR-FUTiL', 'PRE' ),
'Title: Hansel & Gretel Witch Hunters / Group: FUTiL / Year: 2013 / Flags: DIRFiX / Format: DVDR / Resolution: NTSC / Language: Multilingual / Type: Movie'
],
[
'Movies #14 - Don\'t falsely parse season',
new ReleaseParser( 'V.H.S.94.2021.BluRay.1080p.DTS-HD.MA.5.1.AVC-GROUPNAME', 'PRE' ),
'Title: V H S 94 / Group: GROUPNAME / Year: 2021 / Source: Bluray / Format: AVC / Resolution: 1080p / Audio: DTS-HD MA, 5.1 / Type: Movie'
],
[
'Movies #15 - Don\'t falsely parse movie as tv show + special group chars',
new ReleaseParser( 'Pay.the.Ghost.2015.1080p.HULU.WEB-DL.DDP.5.1.H.264-PiRaTeS[TGx]', 'PRE' ),
'Title: Pay the Ghost / Group: PiRaTeS[TGx] / Year: 2015 / Source: Hulu / Resolution: 1080p / Audio: Dolby Digital Plus, 5.1 / Type: Movie'
],

// TV
[
Expand Down Expand Up @@ -227,7 +242,7 @@ function release_parser_test()
[
'TV #7 - Whole season without episode',
new ReleaseParser( 'Riverdale.US.S05.PROPER.FRENCH.WEB.x264-STRINGERBELL', 'tv' ),
'Title: Riverdale US / Group: STRINGERBELL / Season: 5 / Flags: Proper / Source: WEB / Format: x264 / Language: French / Type: TV'
'Title: Riverdale / Group: STRINGERBELL / Season: 5 / Flags: Proper / Source: WEB / Format: x264 / Language: French / Country: US / Type: TV'
],
[
'TV #8 - Episode is 0 (needs dirfix but works)',
Expand Down Expand Up @@ -564,7 +579,7 @@ function release_parser_test()
function release_parser_test_single()
{
echo \PHP_EOL . 'Starting ReleaseParser Single test ...' . \PHP_EOL . \PHP_EOL;
$release_name = 'Diephuis--Mirrors_EP-(DHR19)-WEB-2023-dh';
$release_name = 'Pay.the.Ghost.2015.1080p.HULU.WEB-DL.DDP.5.1.H.264-PiRaTeS[TGx]';
$release_section = '';
$release = new ReleaseParser( $release_name, $release_section );

Expand Down
14 changes: 9 additions & 5 deletions ReleasePatterns.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
*
* @package ReleaseParser
* @author Wellington Estevo
* @version 1.4.4
* @version 1.5.0
*/

class ReleasePatterns {
Expand All @@ -27,7 +27,7 @@ class ReleasePatterns {
const REGEX_YEAR_SIMPLE = '(19\d[\dx]|20\d[\dx])';
const REGEX_YEAR = '/(?=[(._-]' . self::REGEX_YEAR_SIMPLE . '[)._-])/i';
// Extract group
const REGEX_GROUP = '/-([\w.]+)$/i';
const REGEX_GROUP = '/-([\w.\[\]]+)$/i';
// Extract OS
//const REGEX_OS = '';
// Episode pattern matches: S01E01 / 1x01 / E(PS)1 / OVA1 / F123 / Folge_123 / Episode 1 / Issue 1 etc.
Expand All @@ -38,9 +38,9 @@ class ReleasePatterns {
// For Disc numbers: Disc1 / DVD1 / CD1 / (S01)D01
const REGEX_DISC = '(?:s\d+[._-]?)?(?:d|di[cks][cks]|cd|dvd)[._-]?(\d+)';
// Season pattern matches: S01E01 / 1x01 / S01D01
const REGEX_SEASON = '/[._-](?:(?:[ST]|saison|staffel|temp)[._-]?(\d+)[._-]?(?:(?:ep?|eps[._-]?|episode[._-]?|f(?:olge[._-]?)|d|di[cks][cks][._-]?|cd[._-]?|dvd[._-]?)\d+)?|(\d+)(?:x\d+))[._-]/i';
const REGEX_SEASON = '/[._-](?:(?:[ST]|saison[._-]?|staffel[._-]?|temp[._-]?)(\d+)[._-]?(?:(?:ep?|eps[._-]?|episode[._-]?|f(?:olge[._-]?)|d|di[cks][cks][._-]?|cd[._-]?|dvd[._-]?)\d+)?|(\d+)(?:x\d+))[._-]/i';
// Basic title pattern
const REGEX_TITLE = '([\w.()-]+)';
const REGEX_TITLE = '([\w&.()-]+)';
// Good for Ebooks
const REGEX_TITLE_EBOOK = '/^' . self::REGEX_TITLE . '[._(-]+(?:%year%|%language%|%flags%|%format%|%regex_date%|%regex_date_monthname%|ebook)[._)-]/iU';
// Good for Fonts
Expand Down Expand Up @@ -171,6 +171,7 @@ class ReleasePatterns {
'PPV' => 'PPV(?:[._-]?RIP)?', // Pay-per-view
'PSN' => 'PSN', // Playstation Network
'RAWRiP' => 'Rawrip', // Anime
'Roku' => 'Roku', // (P2P)
'SAT' => 'sat', // Analog Satellite
'Scan' => 'scan',
'Screener' => '(b[dr]+|bluray|dvd|vhs?|t[cs]|line)?.?(scr|screener)',
Expand Down Expand Up @@ -333,6 +334,8 @@ class ReleasePatterns {
// Game Console patterns
const DEVICE = [
'3DO' => '3DO',
'Analogue Pocket' => 'POCKET',
'Atari Jaguar' => 'JAG',
//'Bandai WonderSwan' => 'WS',
'Bandai WonderSwan Color' => 'WSC',
'Commodore Amiga' => 'AMIGA',
Expand All @@ -358,6 +361,7 @@ class ReleasePatterns {
'Playstation Vita' => 'PSV',
'Pocket PC' => 'PPC\d*',
'Sega Dreamcast' => [ 'DC$', 'DREAMCAST' ],
'Sega Master System' => 'SMS$',
'Sega Mega CD' => 'MEGACD',
'Sega Mega Drive' => 'SMD',
'Sega Saturn' => 'SATURN$',
Expand Down Expand Up @@ -738,7 +742,7 @@ class ReleasePatterns {
];

const GROUPS_GAMES = [
'0x0007', '0x0815', '1C', 'ABSiSO', 'ACTiVATED', 'ADDONiA', 'ALiAS', 'ANOMALY', 'AUGETY', 'AVENGED', 'BACKLASH', 'bADkARMA', 'Bamboocha', 'BAT', 'BAZOOKA', 'BFHiSO', 'BiTE', 'BLASTCiTY', 'BReWErS', 'BREWS', 'BREWZ', 'CiFE', 'CLONECD', 'CLS', 'CODEX', 'COGENT', 'CUBiC', 'CXZiSO', 'DARKSiDERS', 'DARKZER0', 'DELiGHT', 'DEViANCE', 'DINOByTES', 'DOGE', 'DVN', 'DVNiSO', 'DYNAMIX', 'ENiGMA', 'FANiSO', 'FAS', 'FASiSO', 'FASDOX', 'FCKDRM', 'FLT', 'FLTDOX', 'GENESIS', 'gimpsRus', 'GMiSO', 'GOW', 'GREENPEACE', 'HATRED', 'HBD', 'HEiST', 'HI2U', 'HOODLUM', 'HR', 'HYBRID', 'I_KnoW', 'iMMERSiON', 'iNLAWS', 'iTWINS', 'JAGDOX', 'JAGUAR', 'LiGHTFORCE', 'LUMA', 'MONEV', 'MYSTERY', 'MYTH', 'NiiNTENDO', 'NNSSWW', 'OUTLAWS', 'PiKMiN', 'PiMoCK', 'PiZZA', 'PiZZADOX', 'PLAZA', 'POSTMORTEM', 'PRELUDE', 'PROPHET', 'PS5B', 'PUSSYCAT', 'PWZ', 'TENOKE', 'TENOKE1', 'THG', 'TiNYiSO', 'TRSi', 'TSC', 'RELOADED', 'RAZOR', 'Razor1911', 'RAZORCD', 'RazorDOX', 'ReVOLVeR', 'RiTUEL', 'RUNE', 'SCRUBS', 'SiLENTGATE', 'SiMPLEX', 'SKIDROW', 'SMACKs', 'Souldrinker', 'SPLATTER', 'SPLATTERKiNGS', 'STEAMPUNKS', 'STRANGE', 'SUXXORS', 'TDUJAM', 'TECHNiC', 'TEDOX', 'TNT', 'VACE', 'VENGEANCE', 'VENOM', 'ViTALiTY', 'VREX', 'Unleashed', 'YOUCANTNUKE', 'ZEKE'
'0x0007', '0x0815', '1C', 'ABSiSO', 'ACTiVATED', 'ADDONiA', 'ALiAS', 'ANOMALY', 'AUGETY', 'AVENGED', 'BACKLASH', 'bADkARMA', 'Bamboocha', 'BAT', 'BAZOOKA', 'BFHiSO', 'BiTE', 'BLASTCiTY', 'BReWErS', 'BREWS', 'BREWZ', 'CiFE', 'CLONECD', 'CLS', 'CODEX', 'COGENT', 'CUBiC', 'CXZiSO', 'DARKSiDERS', 'DARKZER0', 'DELiGHT', 'DEViANCE', 'DINOByTES', 'DOGE', 'DVN', 'DVNiSO', 'DYNAMIX', 'ENiGMA', 'FANiSO', 'FAS', 'FASiSO', 'FASDOX', 'FCKDRM', 'FLT', 'FLTDOX', 'GENESIS', 'gimpsRus', 'GMiSO', 'GOW', 'GREENPEACE', 'HATRED', 'HBD', 'HEiST', 'HHT', 'HI2U', 'HOODLUM', 'HR', 'HYBRID', 'I_KnoW', 'iMMERSiON', 'iNLAWS', 'iTWINS', 'JAGDOX', 'JAGUAR', 'LiGHTFORCE', 'LUMA', 'MONEV', 'MYSTERY', 'MYTH', 'NiiNTENDO', 'NNSSWW', 'OUTLAWS', 'PiKMiN', 'PiMoCK', 'PiZZA', 'PiZZADOX', 'PLAZA', 'POSTMORTEM', 'PRELUDE', 'PROPHET', 'PS5B', 'PUSSYCAT', 'PWZ', 'TENOKE', 'TENOKE1', 'THG', 'TiNYiSO', 'TRSi', 'TSC', 'RELOADED', 'RAZOR', 'Razor1911', 'RAZORCD', 'RazorDOX', 'ReVOLVeR', 'RiTUEL', 'RUNE', 'SCRUBS', 'SiLENTGATE', 'SiMPLEX', 'SKIDROW', 'SMACKs', 'Souldrinker', 'SPLATTER', 'SPLATTERKiNGS', 'STEAMPUNKS', 'STRANGE', 'SUXXORS', 'TDUJAM', 'TECHNiC', 'TEDOX', 'TNT', 'VACE', 'VENGEANCE', 'VENOM', 'ViTALiTY', 'VREX', 'Unleashed', 'YOUCANTNUKE', 'ZEKE'
];

const GROUPS_APPS = [
Expand Down
15 changes: 15 additions & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
version 1.5.0 ( updated 02.06.2024 )
+ Added
- Game groups: HHT, PLAYMAGiC
- Consoles: POCKET, JAG
- Test cases
- Release Country parsing (TV Shows)
~ Improved:
- Release title parsing: Added '&' to regex
- Release type parsing: check for season and episode if release is from streaming service
- Group name parsing with brackets
- Fixed:
- Incorrect season parsing for some release titles

***

version 1.4.4 ( updated 12.01.2024 )
+ Added
- App groups: CNC
Expand Down
4 changes: 2 additions & 2 deletions composer.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "propz/release-parser",
"version": "1.4.4",
"version": "1.5.0",
"description": "A library for parsing scene release names into human readable data.",
"keywords": [
"parser",
Expand All @@ -16,7 +16,7 @@
],
"homepage": "https://github.com/pr0pz/scene-release-parser-php#readme",
"readme": "README.md",
"time": "2024-01-12",
"time": "2024-06-02",
"license": "MIT",
"autoload": {
"classmap": [
Expand Down

0 comments on commit 1cef243

Please sign in to comment.