Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add musicbrainz id option to importer #1808

Merged
merged 10 commits into from
Jan 22, 2016
52 changes: 30 additions & 22 deletions beets/autotag/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def _add_candidate(items, results, info):


def tag_album(items, search_artist=None, search_album=None,
search_id=None):
search_ids=[]):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably describe search_ids in the docstring. (Although I note we didn't document search_id before—bad @sampsyo!)

"""Return a tuple of a artist name, an album name, a list of
`AlbumMatch` candidates from the metadata backend, and a
`Recommendation`.
Expand All @@ -380,8 +380,11 @@ def tag_album(items, search_artist=None, search_album=None,

The `AlbumMatch` objects are generated by searching the metadata
backends. By default, the metadata of the items is used for the
search. This can be customized by setting the parameters. The
`mapping` field of the album has the matched `items` as keys.
search. This can be customized by setting the parameters.
`search_ids` is a list of MusicBrainz release IDs: if specified,
it will restrict the candidates to those IDs, ignoring
`search_artist` and `search album`. The `mapping` field of the
album has the matched `items` as keys.

The recommendation is calculated from the match quality of the
candidates.
Expand All @@ -397,9 +400,11 @@ def tag_album(items, search_artist=None, search_album=None,
candidates = {}

# Search by explicit ID.
if search_id is not None:
log.debug(u'Searching for album ID: {0}', search_id)
search_cands = hooks.albums_for_id(search_id)
if search_ids:
search_cands = []
for search_id in search_ids:
log.debug(u'Searching for album ID: {0}', search_id)
search_cands.extend(hooks.albums_for_id(search_id))

# Use existing metadata or text search.
else:
Expand Down Expand Up @@ -444,35 +449,38 @@ def tag_album(items, search_artist=None, search_album=None,


def tag_item(item, search_artist=None, search_title=None,
search_id=None):
search_ids=[]):
"""Attempts to find metadata for a single track. Returns a
`(candidates, recommendation)` pair where `candidates` is a list of
TrackMatch objects. `search_artist` and `search_title` may be used
to override the current metadata for the purposes of the MusicBrainz
title; likewise `search_id`.
title. `search_ids` may be used for restricting the search to a list
of MusicBrainz IDs.
"""
# Holds candidates found so far: keys are MBIDs; values are
# (distance, TrackInfo) pairs.
candidates = {}

# First, try matching by MusicBrainz ID.
trackid = search_id or item.mb_trackid
if trackid:
log.debug(u'Searching for track ID: {0}', trackid)
for track_info in hooks.tracks_for_id(trackid):
dist = track_distance(item, track_info, incl_artist=True)
candidates[track_info.track_id] = \
hooks.TrackMatch(dist, track_info)
# If this is a good match, then don't keep searching.
rec = _recommendation(candidates.values())
if rec == Recommendation.strong and not config['import']['timid']:
log.debug(u'Track ID match.')
return candidates.values(), rec
trackids = search_ids or filter(None, [item.mb_trackid])
if trackids:
for trackid in trackids:
log.debug(u'Searching for track ID: {0}', trackid)
for track_info in hooks.tracks_for_id(trackid):
dist = track_distance(item, track_info, incl_artist=True)
candidates[track_info.track_id] = \
hooks.TrackMatch(dist, track_info)
# If this is a good match, then don't keep searching.
rec = _recommendation(sorted(candidates.itervalues()))
if rec == Recommendation.strong and \
not config['import']['timid']:
log.debug(u'Track ID match.')
return sorted(candidates.itervalues()), rec

# If we're searching by ID, don't proceed.
if search_id is not None:
if search_ids:
if candidates:
return candidates.values(), rec
return sorted(candidates.itervalues()), rec
else:
return [], Recommendation.none

Expand Down
1 change: 1 addition & 0 deletions beets/config_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import:
flat: no
group_albums: no
pretend: false
musicbrainz_ids: []

clutter: ["Thumbs.DB", ".DS_Store"]
ignore: [".*", "*~", "System Volume Information"]
Expand Down
15 changes: 12 additions & 3 deletions beets/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ def __init__(self, toppath, paths, items):
self.rec = None
self.should_remove_duplicates = False
self.is_album = True
self.musicbrainz_ids = [] # user-supplied candidate IDs.

def set_choice(self, choice):
"""Given an AlbumMatch or TrackMatch object or an action constant,
Expand Down Expand Up @@ -579,10 +580,12 @@ def handle_created(self, session):
return tasks

def lookup_candidates(self):
"""Retrieve and store candidates for this album.
"""Retrieve and store candidates for this album. User-specified
candidate IDs are stored in self.musicbrainz_ids: if present, the
initial lookup is restricted to only those IDs.
"""
artist, album, candidates, recommendation = \
autotag.tag_album(self.items)
autotag.tag_album(self.items, search_ids=self.musicbrainz_ids)
self.cur_artist = artist
self.cur_album = album
self.candidates = candidates
Expand Down Expand Up @@ -821,7 +824,8 @@ def _emit_imported(self, lib):
plugins.send('item_imported', lib=lib, item=item)

def lookup_candidates(self):
candidates, recommendation = autotag.tag_item(self.item)
candidates, recommendation = autotag.tag_item(
self.item, search_ids=self.musicbrainz_ids)
self.candidates = candidates
self.rec = recommendation

Expand Down Expand Up @@ -1246,6 +1250,11 @@ def lookup_candidates(session, task):

plugins.send('import_task_start', session=session, task=task)
log.debug(u'Looking up: {0}', displayable_path(task.paths))

# Restrict the initial lookup to IDs specified by the user via the -m
# option. Currently all the IDs are passed onto the tasks directly.
task.musicbrainz_ids = session.config['musicbrainz_ids'].as_str_seq()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks great for now. As you note in your other comment, we can move this around later if we want a more sophisticated way of populating this.


task.lookup_candidates()


Expand Down
10 changes: 7 additions & 3 deletions beets/ui/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ def choose_match(self, task):
search_id = manual_id(False)
if search_id:
_, _, candidates, rec = autotag.tag_album(
task.items, search_id=search_id
task.items, search_ids=search_id.split(' ')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably use .split() with no argument here, to split on any whitespace. That makes text entry a bit more robust to, for example, accidentally hitting the space bar twice.

)
elif choice in extra_ops.keys():
# Allow extra ops to automatically set the post-choice.
Expand Down Expand Up @@ -786,8 +786,8 @@ def choose_item(self, task):
# Ask for a track ID.
search_id = manual_id(True)
if search_id:
candidates, rec = autotag.tag_item(task.item,
search_id=search_id)
candidates, rec = autotag.tag_item(
task.item, search_ids=search_id.split(' '))
elif choice in extra_ops.keys():
# Allow extra ops to automatically set the post-choice.
post_choice = extra_ops[choice](self, task)
Expand Down Expand Up @@ -1022,6 +1022,10 @@ def import_func(lib, opts, args):
'--pretend', dest='pretend', action='store_true',
help='just print the files to import'
)
import_cmd.parser.add_option(
'-m', '--musicbrainzid', dest='musicbrainz_ids', action='append',
help='restrict the matching to a single MusicBrainz id'
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a tiny nitpick, but these technically don't need to be MBIDs. Other backends can also interpret these IDs (for now, just the Discogs plugin).

Alas, I don't have a great idea for another option name since -i and -I are both taken. Perhaps -f for --force-id? Or maybe -d as a second-choice contraction of --id? Or -S for --search-id?

import_cmd.func = import_func
default_commands.append(import_cmd)

Expand Down
2 changes: 1 addition & 1 deletion beetsplug/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def match_benchmark(lib, prof, query=None, album_id=None):

# Run the match.
def _run_match():
match.tag_album(items, search_id=album_id)
match.tag_album(items, search_ids=[album_id])
if prof:
cProfile.runctx('_run_match()', {}, {'_run_match': _run_match},
'match.prof')
Expand Down
9 changes: 7 additions & 2 deletions docs/guides/tagger.rst
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,10 @@ When beets needs your input about a match, it says something like this::
Beirut - Lon Gisland
(Similarity: 94.4%)
* Scenic World (Second Version) -> Scenic World
[A]pply, More candidates, Skip, Use as-is, as Tracks, Enter search, or aBort?
[A]pply, More candidates, Skip, Use as-is, as Tracks, Enter search, enter Id, or aBort?

When beets asks you this question, it wants you to enter one of the capital
letters: A, M, S, U, T, G, E, or B. That is, you can choose one of the
letters: A, M, S, U, T, G, E, I or B. That is, you can choose one of the
following:

* *A*: Apply the suggested changes shown and move on.
Expand All @@ -190,6 +190,11 @@ following:
option if beets hasn't found any good options because the album is mistagged
or untagged.

* *I*: Enter a MusicBrainz id to use as search in the database. Use this option
to specify a MusicBrainz entity (release or recording) directly, by pasting
its ID or the full URL. You can also specify several IDs by separating them
by a space.

* *B*: Cancel this import task altogether. No further albums will be tagged;
beets shuts down immediately. The next time you attempt to import the same
directory, though, beets will ask you if you want to resume tagging where you
Expand Down
5 changes: 5 additions & 0 deletions docs/reference/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ Optional command flags:
option. If set, beets will just print a list of files that it would
otherwise import.

* If you already have a MusicBrainz ID that matches the items to be imported,
you can instruct beets to restrict the search to that ID instead of searching
for other candidates by using the ``--musicbrainzid MB_ID`` option. Multiple
IDs can be specified by simply repeating the option several times.

.. _rarfile: https://pypi.python.org/pypi/rarfile/2.2

.. only:: html
Expand Down
163 changes: 163 additions & 0 deletions test/test_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1689,6 +1689,169 @@ def test_import_pretend_empty(self):
.format(displayable_path(self.empty_path))])


class ImportMusicBrainzIdTest(_common.TestCase, ImportHelper):
"""Test the --musicbrainzid argument."""

MB_RELEASE_PREFIX = 'https://musicbrainz.org/release/'
MB_RECORDING_PREFIX = 'https://musicbrainz.org/recording/'
ID_RELEASE_0 = '00000000-0000-0000-0000-000000000000'
ID_RELEASE_1 = '11111111-1111-1111-1111-111111111111'
ID_RECORDING_0 = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'
ID_RECORDING_1 = 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb'

def setUp(self):
self.setup_beets()
self._create_import_dir(1)

# Patch calls to musicbrainzngs.
self.release_patcher = patch('musicbrainzngs.get_release_by_id',
side_effect=mocked_get_release_by_id)
self.recording_patcher = patch('musicbrainzngs.get_recording_by_id',
side_effect=mocked_get_recording_by_id)
self.release_patcher.start()
self.recording_patcher.start()

def tearDown(self):
self.recording_patcher.stop()
self.release_patcher.stop()
self.teardown_beets()

def test_one_mbid_one_album(self):
self.config['import']['musicbrainz_ids'] = \
[self.MB_RELEASE_PREFIX + self.ID_RELEASE_0]
self._setup_import_session()

self.importer.add_choice(importer.action.APPLY)
self.importer.run()
self.assertEqual(self.lib.albums().get().album, 'VALID_RELEASE_0')

def test_several_mbid_one_album(self):
self.config['import']['musicbrainz_ids'] = \
[self.MB_RELEASE_PREFIX + self.ID_RELEASE_0,
self.MB_RELEASE_PREFIX + self.ID_RELEASE_1]
self._setup_import_session()

self.importer.add_choice(2) # Pick the 2nd best match (release 1).
self.importer.add_choice(importer.action.APPLY)
self.importer.run()
self.assertEqual(self.lib.albums().get().album, 'VALID_RELEASE_1')

def test_one_mbid_one_singleton(self):
self.config['import']['musicbrainz_ids'] = \
[self.MB_RECORDING_PREFIX + self.ID_RECORDING_0]
self._setup_import_session(singletons=True)

self.importer.add_choice(importer.action.APPLY)
self.importer.run()
self.assertEqual(self.lib.items().get().title, 'VALID_RECORDING_0')

def test_several_mbid_one_singleton(self):
self.config['import']['musicbrainz_ids'] = \
[self.MB_RECORDING_PREFIX + self.ID_RECORDING_0,
self.MB_RECORDING_PREFIX + self.ID_RECORDING_1]
self._setup_import_session(singletons=True)

self.importer.add_choice(2) # Pick the 2nd best match (recording 1).
self.importer.add_choice(importer.action.APPLY)
self.importer.run()
self.assertEqual(self.lib.items().get().title, 'VALID_RECORDING_1')

def test_candidates_album(self):
"""Test directly ImportTask.lookup_candidates()."""
task = importer.ImportTask(paths=self.import_dir,
toppath='top path',
items=[_common.item()])
task.musicbrainz_ids = [self.MB_RELEASE_PREFIX + self.ID_RELEASE_0,
self.MB_RELEASE_PREFIX + self.ID_RELEASE_1,
'an invalid and discarded id']

task.lookup_candidates()
self.assertEqual(set(['VALID_RELEASE_0', 'VALID_RELEASE_1']),
set([c.info.album for c in task.candidates]))

def test_candidates_singleton(self):
"""Test directly SingletonImportTask.lookup_candidates()."""
task = importer.SingletonImportTask(toppath='top path',
item=_common.item())
task.musicbrainz_ids = [self.MB_RECORDING_PREFIX + self.ID_RECORDING_0,
self.MB_RECORDING_PREFIX + self.ID_RECORDING_1,
'an invalid and discarded id']

task.lookup_candidates()
self.assertEqual(set(['VALID_RECORDING_0', 'VALID_RECORDING_1']),
set([c.info.title for c in task.candidates]))


# Helpers for ImportMusicBrainzIdTest.


def mocked_get_release_by_id(id_, includes=[], release_status=[],
release_type=[]):
"""Mimic musicbrainzngs.get_release_by_id, accepting only a restricted list
of MB ids (ID_RELEASE_0, ID_RELEASE_1). The returned dict differs only in
the release title and artist name, so that ID_RELEASE_0 is a closer match
to the items created by ImportHelper._create_import_dir()."""
# Map IDs to (release title, artist), so the distances are different.
releases = {ImportMusicBrainzIdTest.ID_RELEASE_0: ('VALID_RELEASE_0',
'TAG ARTIST'),
ImportMusicBrainzIdTest.ID_RELEASE_1: ('VALID_RELEASE_1',
'DISTANT_MATCH')}

return {
'release': {
'title': releases[id_][0],
'id': id_,
'medium-list': [{
'track-list': [{
'recording': {
'title': 'foo',
'id': 'bar',
'length': 59,
},
'position': 9,
}],
'position': 5,
}],
'artist-credit': [{
'artist': {
'name': releases[id_][1],
'id': 'some-id',
},
}],
'release-group': {
'id': 'another-id',
}
}
}


def mocked_get_recording_by_id(id_, includes=[], release_status=[],
release_type=[]):
"""Mimic musicbrainzngs.get_recording_by_id, accepting only a restricted
list of MB ids (ID_RECORDING_0, ID_RECORDING_1). The returned dict differs
only in the recording title and artist name, so that ID_RECORDING_0 is a
closer match to the items created by ImportHelper._create_import_dir()."""
# Map IDs to (recording title, artist), so the distances are different.
releases = {ImportMusicBrainzIdTest.ID_RECORDING_0: ('VALID_RECORDING_0',
'TAG ARTIST'),
ImportMusicBrainzIdTest.ID_RECORDING_1: ('VALID_RECORDING_1',
'DISTANT_MATCH')}

return {
'recording': {
'title': releases[id_][0],
'id': id_,
'length': 59,
'artist-credit': [{
'artist': {
'name': releases[id_][1],
'id': 'some-id',
},
}],
}
}


def suite():
return unittest.TestLoader().loadTestsFromName(__name__)

Expand Down