Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement album merging for duplicates #2725

Merged
merged 6 commits into from
Nov 11, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 81 additions & 22 deletions beets/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ def __init__(self, lib, loghandler, paths, query):
self.paths = paths
self.query = query
self._is_resuming = dict()
self._merged_items = set()
self._merged_dirs = set()

# Normalize the paths.
if self.paths:
Expand Down Expand Up @@ -350,6 +352,24 @@ def history_dirs(self):
self._history_dirs = history_get()
return self._history_dirs

def already_merged(self, paths):
"""Returns true if all the paths being imported were part of a merge
during previous tasks.
"""
for path in paths:
if path not in self._merged_items \
and path not in self._merged_dirs:
return False
return True

def mark_merged(self, paths):
"""Mark paths and directories as merged for future reimport tasks.
"""
self._merged_items.update(paths)
dirs = set([os.path.dirname(path) if os.path.isfile(path) else path
for path in paths])
self._merged_dirs.update(dirs)

def is_resuming(self, toppath):
"""Return `True` if user wants to resume import of this path.

Expand Down Expand Up @@ -443,6 +463,7 @@ def __init__(self, toppath, paths, items):
self.candidates = []
self.rec = None
self.should_remove_duplicates = False
self.should_merge_duplicates = False
self.is_album = True
self.search_ids = [] # user-supplied candidate IDs.

Expand Down Expand Up @@ -632,10 +653,11 @@ def find_duplicates(self, lib):
))

for album in lib.albums(duplicate_query):
# Check whether the album is identical in contents, in which
# case it is not a duplicate (will be replaced).
# Check whether the album paths are all present in the task
# i.e. album is being completely re-imported by the task,
# in which case it is not a duplicate (will be replaced).
album_paths = set(i.path for i in album.items())
if album_paths != task_paths:
if not (album_paths <= task_paths):
duplicates.append(album)
return duplicates

Expand Down Expand Up @@ -1225,6 +1247,27 @@ def read_item(self, path):
displayable_path(path), exc)


# Pipeline utilities

def _freshen_items(items):
# Clear IDs from re-tagged items so they appear "fresh" when
# we add them back to the library.
for item in items:
item.id = None
item.album_id = None


def _extend_pipeline(tasks, *stages):
# Return pipeline extension for stages with list of tasks
if type(tasks) == list:
task_iter = iter(tasks)
else:
task_iter = tasks

ipl = pipeline.Pipeline([task_iter] + list(stages))
return pipeline.multiple(ipl.pull())


# Full-album pipeline stages.

def read_tasks(session):
Expand Down Expand Up @@ -1270,12 +1313,7 @@ def query_tasks(session):
log.debug(u'yielding album {0}: {1} - {2}',
album.id, album.albumartist, album.album)
items = list(album.items())

# Clear IDs from re-tagged items so they appear "fresh" when
# we add them back to the library.
for item in items:
item.id = None
item.album_id = None
_freshen_items(items)

task = ImportTask(None, [album.item_dir()], items)
for task in task.handle_created(session):
Expand Down Expand Up @@ -1321,6 +1359,9 @@ def user_query(session, task):
if task.skip:
return task

if session.already_merged(task.paths):
return pipeline.BUBBLE

# Ask the user for a choice.
task.choose_match(session)
plugins.send('import_task_choice', session=session, task=task)
Expand All @@ -1335,24 +1376,38 @@ def emitter(task):
yield new_task
yield SentinelImportTask(task.toppath, task.paths)

ipl = pipeline.Pipeline([
emitter(task),
lookup_candidates(session),
user_query(session),
])
return pipeline.multiple(ipl.pull())
return _extend_pipeline(emitter(task),
lookup_candidates(session),
user_query(session))

# As albums: group items by albums and create task for each album
if task.choice_flag is action.ALBUMS:
ipl = pipeline.Pipeline([
iter([task]),
group_albums(session),
lookup_candidates(session),
user_query(session)
])
return pipeline.multiple(ipl.pull())
return _extend_pipeline([task],
group_albums(session),
lookup_candidates(session),
user_query(session))

resolve_duplicates(session, task)

if task.should_merge_duplicates:
# Create a new task for tagging the current items
# and duplicates together
duplicate_items = task.duplicate_items(session.lib)

# Duplicates would be reimported so make them look "fresh"
_freshen_items(duplicate_items)
duplicate_paths = [item.path for item in duplicate_items]

# Record merged paths in the session so they are not reimported
session.mark_merged(duplicate_paths)

merged_task = ImportTask(None, task.paths + duplicate_paths,
task.items + duplicate_items)

return _extend_pipeline([merged_task],
lookup_candidates(session),
user_query(session))

apply_choice(session, task)
return task

Expand All @@ -1373,6 +1428,7 @@ def resolve_duplicates(session, task):
u'skip': u's',
u'keep': u'k',
u'remove': u'r',
u'merge': u'm',
u'ask': u'a',
})
log.debug(u'default action for duplicates: {0}', duplicate_action)
Expand All @@ -1386,6 +1442,9 @@ def resolve_duplicates(session, task):
elif duplicate_action == u'r':
# Remove old.
task.should_remove_duplicates = True
elif duplicate_action == u'm':
# Merge duplicates together
task.should_merge_duplicates = True
else:
# No default action set; ask the session.
session.resolve_duplicate(task, found_duplicates)
Expand Down
4 changes: 3 additions & 1 deletion beets/ui/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,7 +791,7 @@ def resolve_duplicate(self, task, found_duplicates):
))

sel = ui.input_options(
(u'Skip new', u'Keep both', u'Remove old')
(u'Skip new', u'Keep both', u'Remove old', u'Merge all')
)

if sel == u's':
Expand All @@ -803,6 +803,8 @@ def resolve_duplicate(self, task, found_duplicates):
elif sel == u'r':
# Remove old.
task.should_remove_duplicates = True
elif sel == u'm':
task.should_merge_duplicates = True
else:
assert False

Expand Down
16 changes: 12 additions & 4 deletions docs/guides/tagger.rst
Original file line number Diff line number Diff line change
Expand Up @@ -234,17 +234,25 @@ If beets finds an album or item in your library that seems to be the same as the
one you're importing, you may see a prompt like this::

This album is already in the library!
[S]kip new, Keep both, Remove old?
[S]kip new, Keep both, Remove old, Merge all?

Beets wants to keep you safe from duplicates, which can be a real pain, so you
have three choices in this situation. You can skip importing the new music,
have four choices in this situation. You can skip importing the new music,
choosing to keep the stuff you already have in your library; you can keep both
the old and the new music; or you can remove the existing music and choose the
new stuff. If you choose that last "trump" option, any duplicates will be
the old and the new music; you can remove the existing music and choose the
new stuff; or you can merge the newly imported album and existing duplicate
into one single album.
If you choose that "remove" option, any duplicates will be
removed from your library database---and, if the corresponding files are located
inside of your beets library directory, the files themselves will be deleted as
well.

If you choose "merge", beets will try re-importing the existing and new tracks
as one bundle so they will get tagged together appropriately.
This is particularly helpful when you are importing extra tracks
of an album in your library with missing tracks, so beets will ask you the same
questions as it would if you were importing all tracks at once.

If you choose to keep two identically-named albums, beets can avoid storing both
in the same directory. See :ref:`aunique` for details.

Expand Down
11 changes: 6 additions & 5 deletions docs/reference/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -571,11 +571,12 @@ Default: ``yes``.
duplicate_action
~~~~~~~~~~~~~~~~

Either ``skip``, ``keep``, ``remove``, or ``ask``. Controls how duplicates
are treated in import task. "skip" means that new item(album or track) will be
skipped; "keep" means keep both old and new items; "remove" means remove old
item; "ask" means the user should be prompted for the action each time.
The default is ``ask``.
Either ``skip``, ``keep``, ``remove``, ``merge`` or ``ask``.
Controls how duplicates are treated in import task.
"skip" means that new item(album or track) will be skipped;
"keep" means keep both old and new items; "remove" means remove old
item; "merge" means merge into one album; "ask" means the user
should be prompted for the action each time. The default is ``ask``.

.. _bell:

Expand Down
4 changes: 3 additions & 1 deletion test/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def choose_match(self, task):

choose_item = choose_match

Resolution = Enum('Resolution', 'REMOVE SKIP KEEPBOTH')
Resolution = Enum('Resolution', 'REMOVE SKIP KEEPBOTH MERGE')

default_resolution = 'REMOVE'

Expand All @@ -553,6 +553,8 @@ def resolve_duplicate(self, task, found_duplicates):
task.set_choice(importer.action.SKIP)
elif res == self.Resolution.REMOVE:
task.should_remove_duplicates = True
elif res == self.Resolution.MERGE:
task.should_merge_duplicates = True


def generate_album_info(album_id, track_ids):
Expand Down
6 changes: 6 additions & 0 deletions test/test_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1248,6 +1248,12 @@ def test_skip_duplicate_album(self):
item = self.lib.items().get()
self.assertEqual(item.title, u't\xeftle 0')

def test_merge_duplicate_album(self):
self.importer.default_resolution = self.importer.Resolution.MERGE
self.importer.run()

self.assertEqual(len(self.lib.albums()), 1)

def test_twice_in_import_dir(self):
self.skipTest('write me')

Expand Down