Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#fix 1060 Add 'con' to featuring artists markers #1143

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions beets/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,3 +678,17 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH):
return min(res[9], limit)
else:
return limit


def feat_tokens(for_artist=True):
"""Returns the tokens to use to detect featuring artists in strings."""

FEAT_SPECIAL_CHARS = ['&', 'feat.', 'ft.']
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Style nit: if these are local variables, they should be lower-case. They could also be moved to globals if that's your preference, or even moved to the config file (although that would require that this code not live in util, which doesn't use the config yet).

FEAT_WORDS = ['ft', 'featuring', 'feat']
if for_artist: # appending to artist name enables more tokens
FEAT_WORDS += ['with', 'vs', 'and', 'con']
regex = r'(%s)' % '|'.join(['\\b%s\\b' % re.escape(x) for x in FEAT_WORDS])
if for_artist:
regex = r'(%s|%s)' % \
('|'.join([re.escape(x) for x in FEAT_SPECIAL_CHARS]), regex)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason that these "special chars" get treated differently? It seems to me like they should get the same \b treatment as the other words.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought too at first sight but

\b Matches the empty string, but only at the beginning or end of a word. A word is defined as a sequence of alphanumeric or underscore characters [...]

so these tokens are not words, and \b don't work with them.

return regex
18 changes: 6 additions & 12 deletions beetsplug/ftintitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"""
from beets.plugins import BeetsPlugin
from beets import ui
from beets.util import displayable_path
from beets.util import displayable_path, feat_tokens
from beets import config
import logging
import re
Expand All @@ -30,25 +30,19 @@ def split_on_feat(artist):
artist, which is always a string, and the featuring artist, which
may be a string or None if none is present.
"""
parts = re.split(
r'[fF]t\.|[fF]eaturing|[fF]eat\.|\b[wW]ith\b|&|vs\.|and',
artist,
1, # Only split on the first "feat".
)
parts = [s.strip() for s in parts]
# split on the first "feat".
regex = re.compile(feat_tokens().translate(None, '()'), re.IGNORECASE)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps the utility should just produce the regex without ()s in the first place—or, if the problem is capturing, use (?:this) for a non-capturing group.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep you're right it's more natural to add capturing group than remove it

parts = [s.strip() for s in regex.split(artist, 1)]
if len(parts) == 1:
return parts[0], None
else:
return parts
return tuple(parts)


def contains_feat(title):
"""Determine whether the title contains a "featured" marker.
"""
return bool(re.search(
r'[fF]t\.|[fF]eaturing|[fF]eat\.|\b[wW]ith\b|&',
title,
))
return bool(re.search(feat_tokens(), title, flags=re.IGNORECASE))


def update_metadata(item, feat_part, drop_feat):
Expand Down
7 changes: 4 additions & 3 deletions beetsplug/lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from beets.plugins import BeetsPlugin
from beets import ui
from beets import config
from beets.util import feat_tokens


# Global logger.
Expand Down Expand Up @@ -137,7 +138,7 @@ def search_pairs(item):
artists = [artist]

# Remove any featuring artists from the artists name
pattern = r"(.*?) (&|\b(and|ft|feat(uring)?\b))"
pattern = r"(.*?) %s" % feat_tokens()
match = re.search(pattern, artist, re.IGNORECASE)
if match:
artists.append(match.group(1))
Expand All @@ -150,8 +151,8 @@ def search_pairs(item):
titles.append(match.group(1))

# Remove any featuring artists from the title
pattern = r"(.*?) \b(ft|feat(uring)?)\b"
for title in titles:
pattern = r"(.*?) %s" % feat_tokens(for_artist=False)
for title in titles[:]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow; thanks for this subtle fix!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did not seem to be a problem with previous implementation, but a test did not pass with current code because of that

match = re.search(pattern, title, re.IGNORECASE)
if match:
titles.append(match.group(1))
Expand Down
59 changes: 59 additions & 0 deletions test/test_ftintitle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# This file is part of beets.
# Copyright 2014, Fabrice Laporte.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.

"""Tests for the 'ftintitle' plugin."""

from _common import unittest
from beetsplug import ftintitle


class FtInTitlePluginTest(unittest.TestCase):
def setUp(self):
"""Set up configuration"""
ftintitle.FtInTitlePlugin()

def test_split_on_feat(self):
parts = ftintitle.split_on_feat('Alice ft. Bob')
self.assertEqual(parts, ('Alice', 'Bob'))
parts = ftintitle.split_on_feat('Alice feat Bob')
self.assertEqual(parts, ('Alice', 'Bob'))
parts = ftintitle.split_on_feat('Alice feat. Bob')
self.assertEqual(parts, ('Alice', 'Bob'))
parts = ftintitle.split_on_feat('Alice featuring Bob')
self.assertEqual(parts, ('Alice', 'Bob'))
parts = ftintitle.split_on_feat('Alice & Bob')
self.assertEqual(parts, ('Alice', 'Bob'))
parts = ftintitle.split_on_feat('Alice and Bob')
self.assertEqual(parts, ('Alice', 'Bob'))
parts = ftintitle.split_on_feat('Alice With Bob')
self.assertEqual(parts, ('Alice', 'Bob'))
parts = ftintitle.split_on_feat('Alice defeat Bob')
self.assertEqual(parts, ('Alice defeat Bob', None))

def test_contains_feat(self):
self.assertTrue(ftintitle.contains_feat('Alice ft. Bob'))
self.assertTrue(ftintitle.contains_feat('Alice feat. Bob'))
self.assertTrue(ftintitle.contains_feat('Alice feat Bob'))
self.assertTrue(ftintitle.contains_feat('Alice featuring Bob'))
self.assertTrue(ftintitle.contains_feat('Alice & Bob'))
self.assertTrue(ftintitle.contains_feat('Alice and Bob'))
self.assertTrue(ftintitle.contains_feat('Alice With Bob'))
self.assertFalse(ftintitle.contains_feat('Alice defeat Bob'))


def suite():
return unittest.TestLoader().loadTestsFromName(__name__)

if __name__ == '__main__':
unittest.main(defaultTest='suite')