Skip to content

Commit

Permalink
Simplify word boundaries (#1060)
Browse files Browse the repository at this point in the history
Use lookahead/lookbehind matching to ensure there is whitespace around the
token. Replaces the use of \b, which doesn't work for "ft.", etc.
  • Loading branch information
sampsyo committed Dec 16, 2014
1 parent a984c1a commit c2c1e72
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 7 deletions.
11 changes: 4 additions & 7 deletions beets/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,12 +686,9 @@ def feat_tokens(for_artist=True):
The `for_artist` option determines whether the regex should be
suitable for matching artist fields (the default) or title fields.
"""
feat_special_chars = ['&', 'feat.', 'ft.']
feat_words = ['ft', 'featuring', 'feat']
if for_artist: # appending to artist name enables more tokens
feat_words += ['with', 'vs', 'and', 'con']
regex = r'%s' % '|'.join(['\\b%s\\b' % re.escape(x) for x in feat_words])
if for_artist:
regex = r'%s|%s' % \
('|'.join([re.escape(x) for x in feat_special_chars]), regex)
return '(?:{0})'.format(regex)
feat_words += ['with', 'vs', 'and', 'con', '&', 'feat.', 'ft.']
return '(?<=\s)(?:{0})(?=\s)'.format(

This comment has been minimized.

Copy link
@Kraymer

Kraymer Dec 16, 2014

Contributor

🙇 u got inspired!

'|'.join(re.escape(x) for x in feat_words)
)
1 change: 1 addition & 0 deletions test/test_ftintitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def test_contains_feat(self):
self.assertTrue(ftintitle.contains_feat('Alice and Bob'))
self.assertTrue(ftintitle.contains_feat('Alice With Bob'))
self.assertFalse(ftintitle.contains_feat('Alice defeat Bob'))
self.assertFalse(ftintitle.contains_feat('Aliceft.Bob'))


def suite():
Expand Down

0 comments on commit c2c1e72

Please sign in to comment.