-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
#fix 1060 Add 'con' to featuring artists markers #1143
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -678,3 +678,17 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH): | |
return min(res[9], limit) | ||
else: | ||
return limit | ||
|
||
|
||
def feat_tokens(for_artist=True): | ||
"""Returns the tokens to use to detect featuring artists in strings.""" | ||
|
||
FEAT_SPECIAL_CHARS = ['&', 'feat.', 'ft.'] | ||
FEAT_WORDS = ['ft', 'featuring', 'feat'] | ||
if for_artist: # appending to artist name enables more tokens | ||
FEAT_WORDS += ['with', 'vs', 'and', 'con'] | ||
regex = r'(%s)' % '|'.join(['\\b%s\\b' % re.escape(x) for x in FEAT_WORDS]) | ||
if for_artist: | ||
regex = r'(%s|%s)' % \ | ||
('|'.join([re.escape(x) for x in FEAT_SPECIAL_CHARS]), regex) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason that these "special chars" get treated differently? It seems to me like they should get the same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought too at first sight but
so these tokens are not words, and |
||
return regex |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,7 @@ | |
""" | ||
from beets.plugins import BeetsPlugin | ||
from beets import ui | ||
from beets.util import displayable_path | ||
from beets.util import displayable_path, feat_tokens | ||
from beets import config | ||
import logging | ||
import re | ||
|
@@ -30,25 +30,19 @@ def split_on_feat(artist): | |
artist, which is always a string, and the featuring artist, which | ||
may be a string or None if none is present. | ||
""" | ||
parts = re.split( | ||
r'[fF]t\.|[fF]eaturing|[fF]eat\.|\b[wW]ith\b|&|vs\.|and', | ||
artist, | ||
1, # Only split on the first "feat". | ||
) | ||
parts = [s.strip() for s in parts] | ||
# split on the first "feat". | ||
regex = re.compile(feat_tokens().translate(None, '()'), re.IGNORECASE) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps the utility should just produce the regex without There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep you're right it's more natural to add capturing group than remove it |
||
parts = [s.strip() for s in regex.split(artist, 1)] | ||
if len(parts) == 1: | ||
return parts[0], None | ||
else: | ||
return parts | ||
return tuple(parts) | ||
|
||
|
||
def contains_feat(title): | ||
"""Determine whether the title contains a "featured" marker. | ||
""" | ||
return bool(re.search( | ||
r'[fF]t\.|[fF]eaturing|[fF]eat\.|\b[wW]ith\b|&', | ||
title, | ||
)) | ||
return bool(re.search(feat_tokens(), title, flags=re.IGNORECASE)) | ||
|
||
|
||
def update_metadata(item, feat_part, drop_feat): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ | |
from beets.plugins import BeetsPlugin | ||
from beets import ui | ||
from beets import config | ||
from beets.util import feat_tokens | ||
|
||
|
||
# Global logger. | ||
|
@@ -137,7 +138,7 @@ def search_pairs(item): | |
artists = [artist] | ||
|
||
# Remove any featuring artists from the artists name | ||
pattern = r"(.*?) (&|\b(and|ft|feat(uring)?\b))" | ||
pattern = r"(.*?) %s" % feat_tokens() | ||
match = re.search(pattern, artist, re.IGNORECASE) | ||
if match: | ||
artists.append(match.group(1)) | ||
|
@@ -150,8 +151,8 @@ def search_pairs(item): | |
titles.append(match.group(1)) | ||
|
||
# Remove any featuring artists from the title | ||
pattern = r"(.*?) \b(ft|feat(uring)?)\b" | ||
for title in titles: | ||
pattern = r"(.*?) %s" % feat_tokens(for_artist=False) | ||
for title in titles[:]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wow; thanks for this subtle fix! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did not seem to be a problem with previous implementation, but a test did not pass with current code because of that |
||
match = re.search(pattern, title, re.IGNORECASE) | ||
if match: | ||
titles.append(match.group(1)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# This file is part of beets. | ||
# Copyright 2014, Fabrice Laporte. | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining | ||
# a copy of this software and associated documentation files (the | ||
# "Software"), to deal in the Software without restriction, including | ||
# without limitation the rights to use, copy, modify, merge, publish, | ||
# distribute, sublicense, and/or sell copies of the Software, and to | ||
# permit persons to whom the Software is furnished to do so, subject to | ||
# the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be | ||
# included in all copies or substantial portions of the Software. | ||
|
||
"""Tests for the 'ftintitle' plugin.""" | ||
|
||
from _common import unittest | ||
from beetsplug import ftintitle | ||
|
||
|
||
class FtInTitlePluginTest(unittest.TestCase): | ||
def setUp(self): | ||
"""Set up configuration""" | ||
ftintitle.FtInTitlePlugin() | ||
|
||
def test_split_on_feat(self): | ||
parts = ftintitle.split_on_feat('Alice ft. Bob') | ||
self.assertEqual(parts, ('Alice', 'Bob')) | ||
parts = ftintitle.split_on_feat('Alice feat Bob') | ||
self.assertEqual(parts, ('Alice', 'Bob')) | ||
parts = ftintitle.split_on_feat('Alice feat. Bob') | ||
self.assertEqual(parts, ('Alice', 'Bob')) | ||
parts = ftintitle.split_on_feat('Alice featuring Bob') | ||
self.assertEqual(parts, ('Alice', 'Bob')) | ||
parts = ftintitle.split_on_feat('Alice & Bob') | ||
self.assertEqual(parts, ('Alice', 'Bob')) | ||
parts = ftintitle.split_on_feat('Alice and Bob') | ||
self.assertEqual(parts, ('Alice', 'Bob')) | ||
parts = ftintitle.split_on_feat('Alice With Bob') | ||
self.assertEqual(parts, ('Alice', 'Bob')) | ||
parts = ftintitle.split_on_feat('Alice defeat Bob') | ||
self.assertEqual(parts, ('Alice defeat Bob', None)) | ||
|
||
def test_contains_feat(self): | ||
self.assertTrue(ftintitle.contains_feat('Alice ft. Bob')) | ||
self.assertTrue(ftintitle.contains_feat('Alice feat. Bob')) | ||
self.assertTrue(ftintitle.contains_feat('Alice feat Bob')) | ||
self.assertTrue(ftintitle.contains_feat('Alice featuring Bob')) | ||
self.assertTrue(ftintitle.contains_feat('Alice & Bob')) | ||
self.assertTrue(ftintitle.contains_feat('Alice and Bob')) | ||
self.assertTrue(ftintitle.contains_feat('Alice With Bob')) | ||
self.assertFalse(ftintitle.contains_feat('Alice defeat Bob')) | ||
|
||
|
||
def suite(): | ||
return unittest.TestLoader().loadTestsFromName(__name__) | ||
|
||
if __name__ == '__main__': | ||
unittest.main(defaultTest='suite') |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Style nit: if these are local variables, they should be lower-case. They could also be moved to globals if that's your preference, or even moved to the config file (although that would require that this code not live in
util
, which doesn't use the config yet).