From c4f2fffb63d2d7c118b36eaf10c0b622a286e3dc Mon Sep 17 00:00:00 2001 From: Andrew Rogl Date: Sat, 2 Oct 2021 14:12:01 +1000 Subject: [PATCH 1/9] Fix #4080 Tested with https://www.discogs.com/SHOUSE-Love-Tonight-Robin-Schulz-Remix/release/20356324 https://www.discogs.com/release/20356324-SHOUSE-Love-Tonight-Robin-Schulz-Remix --- beetsplug/discogs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 76036f1be9..5ff298db9c 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -209,8 +209,10 @@ def album_for_id(self, album_id): # of an input string as to avoid confusion with other metadata plugins. # An optional bracket can follow the integer, as this is how discogs # displays the release ID on its webpage. - match = re.search(r'(^|\[*r|discogs\.com/.+/release/)(\d+)($|\])', + # Issue #4080 highlighted that Discogs changed the way the URL is formed + match = re.search(r'(^|[*r|discogs\.com/.+/release/])(\d+)($|)', album_id) + if not match: return None result = Release(self.discogs_client, {'id': int(match.group(2))}) From b9353464029b8f94d4c0d959c1a6e554a8a0c7cc Mon Sep 17 00:00:00 2001 From: Andrew Rogl Date: Sat, 2 Oct 2021 14:14:04 +1000 Subject: [PATCH 2/9] changelog --- docs/changelog.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 9581c820f0..519ecae434 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -39,6 +39,9 @@ Bug fixes: * :doc:`/plugins/lyrics`: Fix crash bug when beautifulsoup4 is not installed. :bug:`4027` +* :doc:`/plugins/discogs`: Fix regex for new style of regex. + :bug: `4080` + 1.5.0 (August 19, 2021) ----------------------- From 39def81d1c0bd358ca6367308e8325a9b7d45304 Mon Sep 17 00:00:00 2001 From: Andrew Rogl Date: Sat, 2 Oct 2021 14:20:17 +1000 Subject: [PATCH 3/9] Fix lint error --- beetsplug/discogs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 5ff298db9c..9f9c3ef766 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -209,7 +209,8 @@ def album_for_id(self, album_id): # of an input string as to avoid confusion with other metadata plugins. # An optional bracket can follow the integer, as this is how discogs # displays the release ID on its webpage. - # Issue #4080 highlighted that Discogs changed the way the URL is formed + # Issue #4080 highlighted that Discogs changed the way the URL + # is formed match = re.search(r'(^|[*r|discogs\.com/.+/release/])(\d+)($|)', album_id) From ade9978f7e2fc923d47a40b2180cc3b6065eb491 Mon Sep 17 00:00:00 2001 From: Andrew Rogl Date: Sat, 2 Oct 2021 21:39:26 +1000 Subject: [PATCH 4/9] Updated after feedback from @wisp3rwind --- beetsplug/discogs.py | 9 ++++----- docs/changelog.rst | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 9f9c3ef766..f9c37c59cb 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -206,13 +206,12 @@ def album_for_id(self, album_id): self._log.debug('Searching for release {0}', album_id) # Discogs-IDs are simple integers. We only look for those at the end - # of an input string as to avoid confusion with other metadata plugins. + # of an input string or after the "/release/" keyword as to avoid + # confusion with other metadata plugins. # An optional bracket can follow the integer, as this is how discogs # displays the release ID on its webpage. - # Issue #4080 highlighted that Discogs changed the way the URL - # is formed - match = re.search(r'(^|[*r|discogs\.com/.+/release/])(\d+)($|)', - album_id) + match = re.search( + r"(^|\[*r|discogs\.com.*/release/)(\d+)(|\])", album_id) if not match: return None diff --git a/docs/changelog.rst b/docs/changelog.rst index 519ecae434..8bad047284 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -39,7 +39,7 @@ Bug fixes: * :doc:`/plugins/lyrics`: Fix crash bug when beautifulsoup4 is not installed. :bug:`4027` -* :doc:`/plugins/discogs`: Fix regex for new style of regex. +* :doc:`/plugins/discogs`: Adapt regex to new URL format . :bug: `4080` 1.5.0 (August 19, 2021) From a8b8d05ac5dcdf476a24ceb73122072b8cf744e7 Mon Sep 17 00:00:00 2001 From: Andrew Rogl Date: Sun, 3 Oct 2021 10:23:10 +1000 Subject: [PATCH 5/9] Update comment around Discogs-ID --- beetsplug/discogs.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index f9c37c59cb..51a42eb2fc 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -208,8 +208,25 @@ def album_for_id(self, album_id): # Discogs-IDs are simple integers. We only look for those at the end # of an input string or after the "/release/" keyword as to avoid # confusion with other metadata plugins. + # the following possible inputs can be used for a Discogs-ID, + # apart from the last one: + # http://www.discogs.com/G%C3%BCnther-Lause-Meru-Ep/release/4354798 + # http://www.discogs.com/release/4354798-G%C3%BCnther-Lause-Meru-Ep + # http://www.discogs.com/G%C3%BCnther-4354798Lause-Meru-Ep/release/4354798 + # http://www.discogs.com/release/4354798-G%C3%BCnther-4354798Lause-Meru-Ep/ + # [r4354798] + # r4354798 + # 4354798 + # yet-another-metadata-provider.org/foo/12345 + # # An optional bracket can follow the integer, as this is how discogs # displays the release ID on its webpage. + # + # These examples have come about from a number of Github items: + # #291 Support for manually entered IDs in plugins + # #4080 discogs: Parse IDs from new release URL format + # + # Regex has been tested here https://regex101.com/r/wyLdB4/1 match = re.search( r"(^|\[*r|discogs\.com.*/release/)(\d+)(|\])", album_id) From db0431deed9edad4b4b337f71eb67b743156e2e3 Mon Sep 17 00:00:00 2001 From: Andrew Rogl Date: Mon, 4 Oct 2021 10:53:06 +1000 Subject: [PATCH 6/9] Updated as per feedback from @wisp3rwind I think I have the match.group correct I'll also try and look at tests to make sure we can check --- beetsplug/discogs.py | 46 +++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 51a42eb2fc..759d9e470a 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -205,34 +205,28 @@ def album_for_id(self, album_id): return self._log.debug('Searching for release {0}', album_id) - # Discogs-IDs are simple integers. We only look for those at the end - # of an input string or after the "/release/" keyword as to avoid - # confusion with other metadata plugins. - # the following possible inputs can be used for a Discogs-ID, - # apart from the last one: - # http://www.discogs.com/G%C3%BCnther-Lause-Meru-Ep/release/4354798 - # http://www.discogs.com/release/4354798-G%C3%BCnther-Lause-Meru-Ep - # http://www.discogs.com/G%C3%BCnther-4354798Lause-Meru-Ep/release/4354798 - # http://www.discogs.com/release/4354798-G%C3%BCnther-4354798Lause-Meru-Ep/ - # [r4354798] - # r4354798 - # 4354798 - # yet-another-metadata-provider.org/foo/12345 - # - # An optional bracket can follow the integer, as this is how discogs - # displays the release ID on its webpage. - # - # These examples have come about from a number of Github items: - # #291 Support for manually entered IDs in plugins - # #4080 discogs: Parse IDs from new release URL format - # - # Regex has been tested here https://regex101.com/r/wyLdB4/1 - match = re.search( - r"(^|\[*r|discogs\.com.*/release/)(\d+)(|\])", album_id) - + # Discogs-IDs are simple integers. In order to avoid confusion with + # other metadata plugins, we only look for very specific formats of the + # input string: + # - plain integer, optionally wrapped in brackets and prefixed by an + # 'r', as this is how discogs displays the release ID on its webpage. + # - legacy url format: discogs.com//release/ + # - current url format: discogs.com/release/- + # See #291, #4080 and #4085 for the discussions leading up to these + # patterns. + # Regex has been tested here https://regex101.com/r/wyLdB4/2 + + for pattern in [ + r'^\[?r?(?P\d+)\]?$', + r'discogs\.com/release/(?P\d+)-', + r'discogs\.com/[^/]+/release/(?P\d+)', + ]: + match = re.search(pattern, album_id) + if match: + break if not match: return None - result = Release(self.discogs_client, {'id': int(match.group(2))}) + result = Release(self.discogs_client, {'id': int(match.group('id'))}) # Try to obtain title to verify that we indeed have a valid Release try: getattr(result, 'title') From f7539b3ec3a61f849ebd5c16a3e1da1e76c1dd2e Mon Sep 17 00:00:00 2001 From: Andrew Rogl Date: Mon, 4 Oct 2021 17:38:35 +1000 Subject: [PATCH 7/9] Update to extract regex testing from album_for_id --- beetsplug/discogs.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 759d9e470a..2bf277d835 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -197,14 +197,9 @@ def candidates(self, items, artist, album, va_likely, extra_tags=None): self._log.debug('Connection error in album search', exc_info=True) return [] - def album_for_id(self, album_id): - """Fetches an album by its Discogs ID and returns an AlbumInfo object - or None if the album is not found. - """ - if not self.discogs_client: - return - - self._log.debug('Searching for release {0}', album_id) + @staticmethod + def extract_release_id_regex(album_id): + """Returns the Discogs_id or None.""" # Discogs-IDs are simple integers. In order to avoid confusion with # other metadata plugins, we only look for very specific formats of the # input string: @@ -226,7 +221,23 @@ def album_for_id(self, album_id): break if not match: return None - result = Release(self.discogs_client, {'id': int(match.group('id'))}) + return int(match.group('id')) + + def album_for_id(self, album_id): + """Fetches an album by its Discogs ID and returns an AlbumInfo object + or None if the album is not found. + """ + if not self.discogs_client: + return + + self._log.debug('Searching for release {0}', album_id) + + discogs_id = self.extract_release_id_regex(album_id) + + if not discogs_id: + return None + + result = Release(self.discogs_client, {'id': discogs_id}) # Try to obtain title to verify that we indeed have a valid Release try: getattr(result, 'title') @@ -239,7 +250,8 @@ def album_for_id(self, album_id): return self.album_for_id(album_id) return None except CONNECTION_ERRORS: - self._log.debug('Connection error in album lookup', exc_info=True) + self._log.debug('Connection error in album lookup', + exc_info=True) return None return self.get_album_info(result) From 109f4fa4300dfcd11ecc6e3d1c707d68e1f23fb6 Mon Sep 17 00:00:00 2001 From: Andrew Rogl Date: Mon, 4 Oct 2021 17:40:07 +1000 Subject: [PATCH 8/9] Add tests for range of discogs id extraction --- test/test_discogs.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/test/test_discogs.py b/test/test_discogs.py index 8f1b077210..a919a22618 100644 --- a/test/test_discogs.py +++ b/test/test_discogs.py @@ -172,16 +172,16 @@ def test_parse_position(self): """Test the conversion of discogs `position` to medium, medium_index and subtrack_index.""" # List of tuples (discogs_position, (medium, medium_index, subindex) - positions = [('1', (None, '1', None)), - ('A12', ('A', '12', None)), - ('12-34', ('12-', '34', None)), - ('CD1-1', ('CD1-', '1', None)), - ('1.12', (None, '1', '12')), - ('12.a', (None, '12', 'A')), - ('12.34', (None, '12', '34')), - ('1ab', (None, '1', 'AB')), + positions = [('1', (None, '1', None)), + ('A12', ('A', '12', None)), + ('12-34', ('12-', '34', None)), + ('CD1-1', ('CD1-', '1', None)), + ('1.12', (None, '1', '12')), + ('12.a', (None, '12', 'A')), + ('12.34', (None, '12', '34')), + ('1ab', (None, '1', 'AB')), # Non-standard - ('IV', ('IV', None, None)), + ('IV', ('IV', None, None)), ] d = DiscogsPlugin() @@ -355,9 +355,28 @@ def test_parse_release_without_required_fields(self): self.assertEqual(d, None) self.assertIn('Release does not contain the required fields', logs[0]) + def test_album_for_id(self): + """Test parsing for a valid Discogs release_id""" + test_patterns = [('http://www.discogs.com/G%C3%BCnther-Lause-Meru-Ep/release/4354798', 4354798), # NOQA E501 + ('http://www.discogs.com/release/4354798-G%C3%BCnther-Lause-Meru-Ep', 4354798), # NOQA E501 + ('http://www.discogs.com/G%C3%BCnther-4354798Lause-Meru-Ep/release/4354798', 4354798), # NOQA E501 + ('http://www.discogs.com/release/4354798-G%C3%BCnther-4354798Lause-Meru-Ep/', 4354798), # NOQA E501 + ('[r4354798]', 4354798), + ('r4354798', 4354798), + ('4354798', 4354798), + ('yet-another-metadata-provider.org/foo/12345', ''), + ('005b84a0-ecd6-39f1-b2f6-6eb48756b268', ''), + ] + for test_pattern, expected in test_patterns: + match = DiscogsPlugin().extract_release_id_regex(test_pattern) + if not match: + match = '' + self.assertEqual(match, expected) + def suite(): return unittest.TestLoader().loadTestsFromName(__name__) + if __name__ == '__main__': unittest.main(defaultTest='suite') From b880e2db8af8418b9d6bef34a7fb69c5ab84f3b7 Mon Sep 17 00:00:00 2001 From: Andrew Rogl Date: Tue, 5 Oct 2021 13:00:38 +1000 Subject: [PATCH 9/9] Final clean up --- beetsplug/discogs.py | 7 +++---- test/test_discogs.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 2bf277d835..90486850a8 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -218,10 +218,9 @@ def extract_release_id_regex(album_id): ]: match = re.search(pattern, album_id) if match: - break - if not match: - return None - return int(match.group('id')) + return int(match.group('id')) + + return None def album_for_id(self, album_id): """Fetches an album by its Discogs ID and returns an AlbumInfo object diff --git a/test/test_discogs.py b/test/test_discogs.py index a919a22618..4e62e71249 100644 --- a/test/test_discogs.py +++ b/test/test_discogs.py @@ -368,7 +368,7 @@ def test_album_for_id(self): ('005b84a0-ecd6-39f1-b2f6-6eb48756b268', ''), ] for test_pattern, expected in test_patterns: - match = DiscogsPlugin().extract_release_id_regex(test_pattern) + match = DiscogsPlugin.extract_release_id_regex(test_pattern) if not match: match = '' self.assertEqual(match, expected)