From 60148918d9bc63405e90504f05aa4aeee9d31d8b Mon Sep 17 00:00:00 2001 From: Jack Wilsdon Date: Thu, 17 Mar 2016 17:47:50 +0000 Subject: [PATCH] Fix LyricsWiki scraping code LyricsWiki now escapes song lyrics using HTML entities (presumably to prevent scraping), so we now unescape these before parsing. LyricsWiki has also added a script tag inside the div we are scraping, so we have to remove this using `scrape_lyrics_from_html`. --- beetsplug/lyrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 66b859f59c..b979e1b64f 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -321,7 +321,8 @@ def fetch(self, artist, title): html = self.fetch_url(url) if not html: return - lyrics = extract_text_in(html, u"
") + lyrics = extract_text_in(unescape(html), u"
") + lyrics = scrape_lyrics_from_html(lyrics) if lyrics and 'Unfortunately, we are not licensed' not in lyrics: return lyrics