spotDL · Oct 19, 2020 · Sep 30, 2020 · Oct 1, 2020 · Oct 1, 2020 · Oct 1, 2020
diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 # spotDL
 
+[![MIT License](https://img.shields.io/apm/l/atomic-design-ui.svg?)](https://github.com/spotDL/spotify-downloader/blob/master/LICENSE)
+
 ⚠ Interested Contributors, please read our [contributing Guidelines](CONTRIBUTING.md) first.
 
 ⚠ We are dropping active development of spotDL v2. No focused efforts will be made to resolve v2
@@ -8,6 +10,9 @@ specific issues.
 ⚠ We are actively looking for Contributors/Organization Members for all projects under development. 
 If interested, see [#857](https://github.com/spotDL/spotify-downloader/issues/857)
 
+⚠ There are a few feature requests we'd like the community to vote on. Do voice your support for features you'd like.
+See [#867](https://github.com/spotDL/spotify-downloader/issues/867) to vote.
+
 <br><br>
 
 What spotDL does:
@@ -28,16 +33,30 @@ You need to download ffmpeg to use this tool, download it from:
 
 We have rebuilt spotDL from scratch to be much faster, simpler and better than the old spotDL.
 The documentation for the same is a work in progress. v3.0.2 is yet to be released to PyPi so you
-can't install it using `pip`, this is intensional. v3.0.2 is still in alpha testing. We request that
+can't install it using `pip`, this is intentional. v3.0.2 is still in alpha testing. We request that
 you use spotDL v3 and open issues for problems that you come across.
 
-# How to use
+# Installation
+
+1. For v2, run
+    ```
+    $pip install spotdl
+    ```
+
+2. For v3, (latest version)
+    ```
+    $pip install https://github.com/spotDL/spotify-downloader/archive/master.zip
+    ```
+
+3. Voila !
+
+# How to use (instructions for v3)
 To download a song run,
 
     # spotdl $trackUrl
     spotdl https://open.spotify.com/track/08mG3Y1vljYA6bvDt4Wqkj?si=SxezdxmlTx-CaVoucHmrUA
 
-To download a album run,
+To download an album run,
 
     # spotdl $albumUrl
     spotdl https://open.spotify.com/album/2YMWspDGtbDgYULXvVQFM6?si=gF5dOQm8QUSo-NdZVsFjAQ
@@ -74,3 +93,7 @@ tracks for more speed.
 1. [@ritiek](https://github.com/ritiek) for creating and maintaining spotDL for 4 years
 2. [@rocketinventor](https://github.com/rocketinventor) for figuring out the YouTube Music querying
 3. [@Mikhail-Zex](https://github.com/Mikhail-Zex) for, never mind...
+
+# A few interesting forks
+1. [aasmpro/spotify/downloader](https://github.com/aasmpro/spotify-downloader)
+    - Sets metadata for songs that are already downloaded (v2 only.)
diff --git a/dev utils/spotdlTF.py b/dev utils/spotdlTF.py
@@ -26,5 +26,7 @@
 
             if not onlyCount:
                 print('%3d   %-60s   %s' % (songCount, contributingArtists[:-2], songName))
-
-        print('\nThere are %d songs in "%s"' % (songCount, eachFile[2:-19]))
+
+        print("Found search results")
+
+        print('\nThere are %d songs in "%s"' % (songCount, eachFile[2:-19]))
diff --git a/setup.py b/setup.py
@@ -14,13 +14,12 @@
     version = '3.1.0',
 
     install_requires = [
-        'fuzzywuzzy',
         'spotipy',
         'pytube3',
         'tqdm',
+        'rapidfuzz',
         'requests',
         'mutagen',
-        'python-Levenshtein-wheels',
     ],
 
     description="Downloads Spotify music from Youtube with metadata and album art",
@@ -63,4 +62,4 @@
     entry_points = {
         "console_scripts": ["spotdl = spotdl.__main__:console_entry_point"]
     }
-)
+)
diff --git a/spotdl/__main__.py b/spotdl/__main__.py
@@ -115,6 +115,17 @@ def console_entry_point():
 
             downloader.download_multiple_songs(songObjList)
 
+        elif request.endswith('.txt'):
+            print('Fetching songs from %s...' % request)
+            songObjList = []
+
+            with open(request, 'r') as songFile:
+                for songLink in songFile.readlines():
+                    song = SongObj.from_url(songLink)
+                    songObjList.append(song)
+
+            downloader.download_multiple_songs(songObjList)
+
         elif request.endswith('.spotdlTrackingFile'):
             print('Preparing to resume download...')
             downloader.resume_download_from_tracking_file(request)

diff --git a/spotdl/download/downloader.py b/spotdl/download/downloader.py
@@ -12,6 +12,7 @@
 from spotdl.patches.pyTube import YouTube
 
 from mutagen.easyid3 import EasyID3, ID3
+from mutagen.id3 import USLT
 from mutagen.id3 import APIC as AlbumCover
 
 from urllib.request import urlopen
@@ -217,6 +218,14 @@ def download_song(songObj: SongObj, displayManager: DisplayManager = None,
         data = rawAlbumArt
     )
 
+    #! adding lyrics
+    try:
+        lyrics = songObj.get_song_lyrics()
+        USLTOutput = USLT(encoding=3, lang=u'eng', desc=u'desc', text=lyrics)
+        audioFile["USLT::'eng'"] = USLTOutput
+    except:
+        pass
+
     audioFile.save(v2_version = 3)
 
     # Do the necessary cleanup

diff --git a/spotdl/search/provider.py b/spotdl/search/provider.py
@@ -3,10 +3,15 @@
 #===============
 
 #! the following are for the search provider to function
-from fuzzywuzzy.fuzz import partial_ratio
+from rapidfuzz.fuzz import partial_ratio
 from json import loads as convert_json_to_dict
 from requests import post
 
+#! the following are used to scrape lyrics
+from requests import get
+import re
+from html import unescape
+
 #! Just for static typing
 from typing import List
 
@@ -26,21 +31,25 @@
 #=======================
 #=== helper function ===
 #=======================
-def match_percentage(str1:str, str2:str) -> bool:
+
+def match_percentage(str1: str, str2: str, score_cutoff: float = 0) -> float:
     '''
     `str` `str1` : a random sentence
 
     `str` `str2` : another random sentence
 
-    RETURNS `int`
+    `float` `score_cutoff` : minimum score required to consider it a match
+                             returns 0 when similarity < score_cutoff
+
+    RETURNS `float`
 
-    A wrapper around `fuzzywuzzy.partial_ratio` to handle UTF-8 encoded
+    A wrapper around `rapidfuzz.fuzz.partial_ratio` to handle UTF-8 encoded
     emojis that usually cause errors
     '''
 
     #! this will throw an error if either string contains a UTF-8 encoded emoji 
     try:
-        return partial_ratio(str1, str2)
+        return partial_ratio(str1, str2, score_cutoff=score_cutoff)
 
     #! we build new strings that contain only alphanumerical characters and spaces
     #! and return the partial_ratio of that
@@ -50,14 +59,14 @@ def match_percentage(str1:str, str2:str) -> bool:
         for eachLetter in str1:
             if eachLetter.isalnum() or eachLetter.isspace():
                 newStr1 += eachLetter
-        
+
         newStr2 = ''
 
-        for eachLetter in str1:
+        for eachLetter in str2:
             if eachLetter.isalnum() or eachLetter.isspace():
                 newStr2 += eachLetter
-        
-        return partial_ratio(newStr1, newStr2)
+
+        return partial_ratio(newStr1, newStr2, score_cutoff=score_cutoff)
 
 #========================================================================
 #=== Background functions/Variables (Not meant to be called directly) ===
@@ -402,15 +411,15 @@ def search_and_order_ytm_results(songName: str, songArtists: List[str],
         #! we use fuzzy matching because YouTube spellings might be mucked up
         if result['type'] == 'song':
             for artist in songArtists:
-                if match_percentage (artist.lower(), result['artist'].lower()) > 85:
+                if match_percentage (artist.lower(), result['artist'].lower(), 85):
                     artistMatchNumber += 1
         else:
             #! i.e if video
             for artist in songArtists:
                 #! something like match_percentage('rionos', 'aiobahn, rionos Motivation
                 #! (remix)' would return 100, so we're absolutely corrent in matching
                 #! artists to song name.
-                if match_percentage(artist.lower(), result['name'].lower()) > 85:
+                if match_percentage(artist.lower(), result['name'].lower(), 85):
                     artistMatchNumber += 1
 
         #! Skip if there are no artists in common, (else, results like 'Griffith Swank -
@@ -485,3 +494,82 @@ def search_and_get_best_match(songName: str, songArtists: List[str],
     #! In theory, the first 'TUPLE' in sortedResults should have the highest match
     #! value, we send back only the link
     return sortedResults[0][0]
+
+
+def get_lyrics(songName: str, artistName: str) -> str:
+    '''
+    `str` `songName`   : Name of the song
+    `str` `artistName` : Name of the primary artist
+
+    RETURNS `str` : lyrics of the song
+    '''
+
+    #! used try, except just in case genius doesn't return any results.
+    try:
+
+        #! Access Token for genius api.
+        geniusHeaders = {
+            'Authorization':
+            'Bearer alXXDbPZtK1m2RrZ8I4k2Hn8Ahsd0Gh_o076HYvcdlBvmc0ULL1H8Z8xRlew5qaG',
+        }
+
+        #! Seperate song name and artist name using '+' instead of spaces (' ')    
+        query = '+'.join((songName+artistName).split(' '))
+
+        #! Base url for a search query.
+        searchURL = 'https://api.genius.com/search'
+
+        #! Search response from Genius
+        geniusResponse = get(searchURL, headers=geniusHeaders,
+                             params={'q': query}).json()['response']
+
+        #! Gets the songID of the best match from the genius response and then uses that to get it's URL.
+        bestMatchURL = 'https://api.genius.com/songs/' + \
+            str(geniusResponse['hits'][0]['result']['id'])
+
+        #! Gets the lyrics page url from genius' response.
+        songURL = get(bestMatchURL, headers=geniusHeaders).json()[
+            'response']['song']['url']
+
+        #! Compile All the required Regular Expressions.
+
+        #! Matches the lyrics.( including html tags)
+        lyricsRegex = re.compile(
+            r'<div class="Lyrics__Container.*?>.*</div><div class="RightSidebar.*?>')
+        #! Matches HTML tags.
+        htmlTagRegex = re.compile(r'<.*?>')
+
+        #! Matches Genius Tags like [Chorus]
+        genTagRegex = re.compile(r'\[.*?\]', re.DOTALL)
+
+        #! we use this regex to handle egde cases: (\n Some_lyric \n) -> (Some_Lyric)
+        edgeRegEx = re.compile(r'(\()(\n)(.*?)(\n)(\))')
+
+        #! Gets the genius lyrics page.
+        geniusPage = get(songURL).text
+
+        #! Takes the first string that matches lyricsRegex
+        preLyrics = re.findall(lyricsRegex, geniusPage)[0]
+
+        #! Substitutes HTML Tags for newline characters.
+        preLyrics = htmlTagRegex.sub('\n', preLyrics)
+
+        #! Replace Multiple Newline characters by a single newline character.
+        lyrics = []
+        for i in preLyrics.split('\n'):
+            if i != '\n' and i:
+                lyrics.append(i)
+
+        lyrics = "\n".join(lyrics)
+
+        #! Replaces all HTML escaped characters with unescaped characters.
+        lyrics = unescape(lyrics)
+
+        #! Substitute genius tags for newlines.
+        lyrics = genTagRegex.sub('\n', lyrics)
+        lyrics = edgeRegEx.sub(r'\1\3\5', lyrics)  #! Edge case handling
+    except:
+        lyrics = ''
+
+    #! Removes trailing and leading newlines and spaces.
+    return lyrics.strip()
diff --git a/spotdl/search/songObj.py b/spotdl/search/songObj.py
@@ -1,4 +1,5 @@
 from spotdl.search.provider import search_and_get_best_match
+from spotdl.search.provider import get_lyrics
 from spotdl.search.spotifyClient import get_spotify_client
 
 from os.path import join
@@ -15,11 +16,12 @@ class SongObj():
     #====================
     #=== Constructors ===
     #====================
-    def __init__(self, rawTrackMeta, rawAlbumMeta, rawArtistMeta, youtubeLink):
+    def __init__(self, rawTrackMeta, rawAlbumMeta, rawArtistMeta, youtubeLink, lyrics):
         self. __rawTrackMeta = rawTrackMeta
         self.__rawAlbumMeta  = rawArtistMeta
         self.__rawArtistMeta = rawArtistMeta
         self.__youtubeLink   = youtubeLink
+        self.__lyrics = lyrics
 
     #! constructors here are a bit mucky, there are two different constructors for two
     #! different use cases, hence the actual __init__ function does not exist
@@ -72,9 +74,13 @@ def from_url(cls, spotifyURL: str):
 
         youtubeLink = youtubeLink
 
+        # Get Lyrics Using Songs Name and Primary Artist's name.
+        lyrics = get_lyrics(songName, contributingArtists[0])
+
         return  cls(
             rawTrackMeta, rawAlbumMeta,
-            rawArtistMeta, youtubeLink
+            rawArtistMeta, youtubeLink,
+            lyrics
         )
 
     @classmethod
@@ -83,10 +89,12 @@ def from_dump(cls, dataDump: dict):
         rawAlbumMeta  = dataDump['rawAlbumMeta']
         rawArtistMeta = dataDump['rawAlbumMeta']
         youtubeLink   = dataDump['youtubeLink']
+        lyrics        = dataDump['lyrics']
 
         return  cls(
             rawTrackMeta, rawAlbumMeta,
-            rawArtistMeta, youtubeLink
+            rawArtistMeta, youtubeLink,
+            lyrics
         )
 
     def __eq__(self, comparedSong) -> bool:
@@ -158,6 +166,14 @@ def get_contributing_artists(self) -> List[str]:
             contributingArtists.append(artist['name'])
 
         return contributingArtists
+
+    #! 6. Song Lyrics
+    def get_song_lyrics(self) -> str:
+        '''
+        returns the lyrics of the song.
+        '''
+
+        return self.__lyrics
 
     #! Album Details:
 
@@ -222,5 +238,6 @@ def get_data_dump(self) -> dict:
             'youtubeLink'  : self.__youtubeLink,
             'rawTrackMeta' : self.__rawTrackMeta,
             'rawAlbumMeta' : self.__rawAlbumMeta,
-            'rawArtistMeta': self.__rawArtistMeta
+            'rawArtistMeta': self.__rawArtistMeta,
+            'lyrics'       : self.__lyrics
         }