diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 76036f1be9..90486850a8 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -197,6 +197,31 @@ def candidates(self, items, artist, album, va_likely, extra_tags=None): self._log.debug('Connection error in album search', exc_info=True) return [] + @staticmethod + def extract_release_id_regex(album_id): + """Returns the Discogs_id or None.""" + # Discogs-IDs are simple integers. In order to avoid confusion with + # other metadata plugins, we only look for very specific formats of the + # input string: + # - plain integer, optionally wrapped in brackets and prefixed by an + # 'r', as this is how discogs displays the release ID on its webpage. + # - legacy url format: discogs.com//release/ + # - current url format: discogs.com/release/- + # See #291, #4080 and #4085 for the discussions leading up to these + # patterns. + # Regex has been tested here https://regex101.com/r/wyLdB4/2 + + for pattern in [ + r'^\[?r?(?P\d+)\]?$', + r'discogs\.com/release/(?P\d+)-', + r'discogs\.com/[^/]+/release/(?P\d+)', + ]: + match = re.search(pattern, album_id) + if match: + return int(match.group('id')) + + return None + def album_for_id(self, album_id): """Fetches an album by its Discogs ID and returns an AlbumInfo object or None if the album is not found. @@ -205,15 +230,13 @@ def album_for_id(self, album_id): return self._log.debug('Searching for release {0}', album_id) - # Discogs-IDs are simple integers. We only look for those at the end - # of an input string as to avoid confusion with other metadata plugins. - # An optional bracket can follow the integer, as this is how discogs - # displays the release ID on its webpage. - match = re.search(r'(^|\[*r|discogs\.com/.+/release/)(\d+)($|\])', - album_id) - if not match: + + discogs_id = self.extract_release_id_regex(album_id) + + if not discogs_id: return None - result = Release(self.discogs_client, {'id': int(match.group(2))}) + + result = Release(self.discogs_client, {'id': discogs_id}) # Try to obtain title to verify that we indeed have a valid Release try: getattr(result, 'title') @@ -226,7 +249,8 @@ def album_for_id(self, album_id): return self.album_for_id(album_id) return None except CONNECTION_ERRORS: - self._log.debug('Connection error in album lookup', exc_info=True) + self._log.debug('Connection error in album lookup', + exc_info=True) return None return self.get_album_info(result) diff --git a/docs/changelog.rst b/docs/changelog.rst index 9581c820f0..8bad047284 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -39,6 +39,9 @@ Bug fixes: * :doc:`/plugins/lyrics`: Fix crash bug when beautifulsoup4 is not installed. :bug:`4027` +* :doc:`/plugins/discogs`: Adapt regex to new URL format . + :bug: `4080` + 1.5.0 (August 19, 2021) ----------------------- diff --git a/test/test_discogs.py b/test/test_discogs.py index 8f1b077210..4e62e71249 100644 --- a/test/test_discogs.py +++ b/test/test_discogs.py @@ -172,16 +172,16 @@ def test_parse_position(self): """Test the conversion of discogs `position` to medium, medium_index and subtrack_index.""" # List of tuples (discogs_position, (medium, medium_index, subindex) - positions = [('1', (None, '1', None)), - ('A12', ('A', '12', None)), - ('12-34', ('12-', '34', None)), - ('CD1-1', ('CD1-', '1', None)), - ('1.12', (None, '1', '12')), - ('12.a', (None, '12', 'A')), - ('12.34', (None, '12', '34')), - ('1ab', (None, '1', 'AB')), + positions = [('1', (None, '1', None)), + ('A12', ('A', '12', None)), + ('12-34', ('12-', '34', None)), + ('CD1-1', ('CD1-', '1', None)), + ('1.12', (None, '1', '12')), + ('12.a', (None, '12', 'A')), + ('12.34', (None, '12', '34')), + ('1ab', (None, '1', 'AB')), # Non-standard - ('IV', ('IV', None, None)), + ('IV', ('IV', None, None)), ] d = DiscogsPlugin() @@ -355,9 +355,28 @@ def test_parse_release_without_required_fields(self): self.assertEqual(d, None) self.assertIn('Release does not contain the required fields', logs[0]) + def test_album_for_id(self): + """Test parsing for a valid Discogs release_id""" + test_patterns = [('http://www.discogs.com/G%C3%BCnther-Lause-Meru-Ep/release/4354798', 4354798), # NOQA E501 + ('http://www.discogs.com/release/4354798-G%C3%BCnther-Lause-Meru-Ep', 4354798), # NOQA E501 + ('http://www.discogs.com/G%C3%BCnther-4354798Lause-Meru-Ep/release/4354798', 4354798), # NOQA E501 + ('http://www.discogs.com/release/4354798-G%C3%BCnther-4354798Lause-Meru-Ep/', 4354798), # NOQA E501 + ('[r4354798]', 4354798), + ('r4354798', 4354798), + ('4354798', 4354798), + ('yet-another-metadata-provider.org/foo/12345', ''), + ('005b84a0-ecd6-39f1-b2f6-6eb48756b268', ''), + ] + for test_pattern, expected in test_patterns: + match = DiscogsPlugin.extract_release_id_regex(test_pattern) + if not match: + match = '' + self.assertEqual(match, expected) + def suite(): return unittest.TestLoader().loadTestsFromName(__name__) + if __name__ == '__main__': unittest.main(defaultTest='suite')