diff --git a/.gitignore b/.gitignore index e2d1dc0..e966af1 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ build/ src/settings.cfg tab_scraper/ release_notes.txt +src copy/utils.py +.DS_Store diff --git a/README.md b/README.md index 0803bd2..fac450a 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,16 @@ # tab-scraper -An interface for downloading guitar tabs from Ultimate Guitar. + +An interface for downloading guitar tabs from Ultimate Guitar. Please check the news at the bottom. ![ui-image](screens/ui-screen.png) Get screenshots of Guitar Chords, Tabs, Bass Tabs and Ukulele Chords with no clutter. -Chords | Tab -:------:|:------| -![chords](screens/feather-chords.png) | ![tab](screens/sultans-tab.png) +| Chords | Tab | +| :---------------------------------: | :---------------------------- | +| ![chords](screens/feather-chords.png) | ![tab](screens/sultans-tab.png) | -You can also download GuitarPro and PowerTab files.
+You can also download GuitarPro and PowerTab files. `
` All files are sorted into directories for quick and easy access. ### Prerequisites @@ -25,17 +26,18 @@ All files are sorted into directories for quick and easy access. #### Command Line -1. Open settings.cfg and enter in the root directory where you would like all tabs to be stored e.g. username/Music/Tabs/ -2. Download [Geckodriver](https://github.com/mozilla/geckodriver/releases) and put the geckodriver executable into the src directory. +1. Open settings.cfg and enter in the root directory where you would like all tabs to be stored e.g. ``username/Music/Tabs/ `` +2. Download [Geckodriver](https://github.com/mozilla/geckodriver/releases) and put the geckodriver executable into the ``src`` directory. 3. Run `pip install -r requirements.txt` -4. run `python tab_scraper.py` from src directory. +4. run `python tab_scraper.py` from ``src`` directory. ### Built With - Python 3 - - [PyQT5](https://pypi.org/project/PyQt5/) - - [Selenium](https://selenium-python.readthedocs.io/) - - [Geckodriver](https://github.com/mozilla/geckodriver/releases) + +### News + +I am assuming the original author has given up on this project, as I myself forked it and began working on it over a year ago (and also promptly disappeared from the project) and there have been no updates since. I'll do my best to get this working again on all platforms, but I cannot make any promises. I'll likely take the existing GUI and rebuild it using libraries I'm familiar with, and build new logic to go in the back end as well. Keep an eye out for updates. diff --git a/jsonout.txt b/jsonout.txt new file mode 100644 index 0000000..2230b86 --- /dev/null +++ b/jsonout.txt @@ -0,0 +1,134 @@ +[ + { + "artist_name": "TesseracT", + "artist_url": "/artist/tesseract_29262", + "song_name": "Juno", + "marketing_type": "official", + "tab_url": "https://www.ultimate-guitar.com/pro/?utm_source=UltimateGuitar&utm_medium=Search&utm_campaign=UG+Search&utm_content=Official+Version&artist=TesseracT&song=juno&tab_id=2433301", + "device": null, + "app_link": "//www.ultimate-guitar.com/send?ug_from=yozio_splash&url=https://play.google.com/store/apps/details?id=com.ultimateguitar.tabs&ug_campaign=UG_TPAndroid_SearchTpLink_SearchPage_mobile0&referrer=utm_campaign=UG_TPAndroid_SearchTpLink_SearchPage_mobile0", + "highlight": { + "song_name": [ + [ + 0, + 4 + ] + ], + "artist_name": [ + [ + 0, + 9 + ] + ] + } + }, + { + "artist_name": "TesseracT", + "artist_url": "/artist/tesseract_29262", + "song_name": "Juno", + "marketing_type": "TabPro", + "rating": 4.8404, + "votes": 7, + "tab_url": "https://www.ultimate-guitar.com/pro/?utm_source=UltimateGuitar&utm_medium=Search&utm_campaign=UG+Search&artist=TesseracT&song=juno&tab_id=2425607", + "device": null, + "app_link": "//www.ultimate-guitar.com/send?ug_from=yozio_splash&url=https://play.google.com/store/apps/details?id=com.ultimateguitar.tabs&ug_campaign=UG_TPAndroid_SearchTpLink_SearchPage_mobile0&referrer=utm_campaign=UG_TPAndroid_SearchTpLink_SearchPage_mobile0", + "highlight": { + "song_name": [ + [ + 0, + 4 + ] + ], + "artist_name": [ + [ + 0, + 9 + ] + ] + } + }, + { + "id": 2390557, + "song_id": 2743409, + "song_name": "Juno", + "artist_id": 29262, + "artist_name": "TesseracT", + "type": "Pro", + "part": "", + "version": 1, + "votes": 10, + "rating": 4.80031, + "date": "1527099921", + "status": "approved", + "preset_id": 28177, + "tab_access_type": "public", + "tp_version": 1, + "tonality_name": "Fm", + "version_description": "Whole song transcription, all instruments, ambient guitar and lyrics included. Multiple notation/time signature interpretations for similar parts as it's quite ambiguous. The Tab Pro conversion by UG has a few weird things going on, and the lyrics don't align like in the original file, so keep that in mind.", + "verified": 0, + "recording": { + "is_acoustic": 0, + "tonality_name": "", + "performance": null, + "recording_artists": [] + }, + "artist_url": "https://www.ultimate-guitar.com/artist/tesseract_29262", + "tab_url": "https://tabs.ultimate-guitar.com/tab/tesseract/juno-guitar-pro-2390557" + }, + { + "id": 2395961, + "song_id": 2743409, + "song_name": "Juno", + "artist_id": 29262, + "artist_name": "TesseracT", + "type": "Pro", + "part": "", + "version": 2, + "votes": 0, + "rating": 0, + "date": "1528113155", + "status": "approved", + "preset_id": 28177, + "tab_access_type": "public", + "tp_version": 2, + "tonality_name": "", + "version_description": "", + "verified": 0, + "recording": { + "is_acoustic": 0, + "tonality_name": "", + "performance": null, + "recording_artists": [] + }, + "artist_url": "https://www.ultimate-guitar.com/artist/tesseract_29262", + "tab_url": "https://tabs.ultimate-guitar.com/tab/tesseract/juno-guitar-pro-2395961" + }, + { + "id": 2425607, + "song_id": 2743409, + "song_name": "Juno", + "artist_id": 29262, + "artist_name": "TesseracT", + "type": "Pro", + "part": "", + "version": 3, + "votes": 7, + "rating": 4.8404, + "date": "1531581826", + "status": "approved", + "preset_id": 28177, + "tab_access_type": "public", + "tp_version": 3, + "tonality_name": "F", + "version_description": "I saw TesseracT in Paris to be sure of finger placement.", + "verified": 0, + "recording": { + "is_acoustic": 0, + "tonality_name": "", + "performance": null, + "recording_artists": [] + }, + "artist_url": "https://www.ultimate-guitar.com/artist/tesseract_29262", + "tab_url": "https://tabs.ultimate-guitar.com/tab/tesseract/juno-guitar-pro-2425607" + } +] \ No newline at end of file diff --git a/new/utilities.py b/new/utilities.py new file mode 100644 index 0000000..d3a455c --- /dev/null +++ b/new/utilities.py @@ -0,0 +1,81 @@ +# To do: +# Check for updates in search results page on UG +# Build new GUI (likely using tkinter, as I'm not that familiar with PyQt5) +# Build functions to search on UG and parse search results using updated Selenium +# Connect GUI and utility functions +import os,sys,json +import subprocess +import sys +from tqdm import tqdm +def install(package): + subprocess.check_call([sys.executable, "-m", "pip", "install", package]) +fail = False +try: + from bs4 import BeautifulSoup as bs +except ImportError: + fail = True + print(f'WARNING: MODULE BeautifulSoup4 MISSING, ATTEMPTING TO INSTALL') + install('beautifulsoup4') +from datetime import date, timedelta +try: + from selenium import webdriver + from selenium.webdriver.chrome.service import Service +except ImportError: + fail = True + print(f'WARNING: MODULE selenium MISSING, ATTEMPTING TO INSTALL') + install('selenium') +from time import sleep +try: + from webdriver_manager.chrome import ChromeDriverManager +except ImportError: + fail = True + print(f'WARNING: MODULE webdriver-manager MISSING, ATTEMPTING TO INSTALL') + install('webdriver-manager') +try: + import requests +except ImportError: + fail = True + print(f'WARNING: MODULE requests MISSING, ATTEMPTING TO INSTALL') + install('requests') +if fail == True: + print(f'WARNING: ONE OR MORE MODULES WERE MISSING AND HAVE BEEN INSTALLED. THE PROGRAM WILL NEED TO BE RE-RUN AS A RESULT.') + quit() +# set up currentdir variable +currentdir = os.path.dirname(os.path.realpath(__file__)) +parentdir = os.path.dirname(currentdir) +sys.path.append(parentdir) +# creating a simple logger +logger = open(os.path.join(currentdir,'logfile.txt'),'w') +# create a webdriver instance using selenium, set chrome options to headless to avoid a window popping up every time you scrape +from selenium.webdriver.chrome.options import Options +chrome_options = Options() +# set chrome options "headless" to ensure a window doesn't pop up +chrome_options.add_argument("--headless") +# and the below line to simulate an actual browser window size +chrome_options.add_argument("--window-size=1100,1000") +# the below line instantiates the driver instance, with a simple webdriver check +driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=chrome_options) +# the below line gives us a useragent header to pretend to be a real browser +driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'}) + + + +SEARCH_URL = "https://www.ultimate-guitar.com/search.php?search_type=title&value={}" +RESULTS_PATTERN = "\"\;results\"\;:(\[.*?\]),\"\;pagination\"\;" +#RESULTS_PATTERN = "\"results\":(\[.*?\]),\"pagination\"" +RESULTS_COUNT_PATTERN = "\"\;tabs\"\;,\"\;results_count\"\;:([0-9]+?),\"\;results\"\;" +#RESULTS_COUNT_PATTERN = "\"tabs\",\"results_count\":([0-9]+?),\"results\"" +DOWNLOAD_TIMEOUT = 15 +# retrieves a URL and returns a soup object +def retrieve(url): + # retrieve the page using our driver instance + driver.get(url) + # sleep for 3 seconds to allow the page to fully load, and also to simulate human behavior + sleep(3) + # get the page source to scrape through for information + data = driver.page_source + # turn it into a soup object and return it + soup = bs(data, features="html.parser") + driver.close() + return soup + diff --git a/scrapetest.txt b/scrapetest.txt new file mode 100644 index 0000000..04d9380 --- /dev/null +++ b/scrapetest.txt @@ -0,0 +1,1028 @@ + + + + + + + + + + + + + + + +Tesseract Juno, 4 chords & tabs found @ Ultimate-Guitar.Com Search + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+ + + + + + + + + diff --git a/settings.cfg b/settings.cfg index fda9cb8..406816c 100644 --- a/settings.cfg +++ b/settings.cfg @@ -3,4 +3,4 @@ # the full path to the root directory you want to store all tabs in # e.g. = C:\Users\yourname\Music\Tabs\ # make sure it ends with a slash -destination_root = . +destination_root = /Users/devingardner/Desktop/coding/downloaded_tabs diff --git a/src copy/geckodriver b/src copy/geckodriver new file mode 100755 index 0000000..85fd08f Binary files /dev/null and b/src copy/geckodriver differ diff --git a/src copy/other/testing output.txt b/src copy/other/testing output.txt new file mode 100644 index 0000000..16bd1bf --- /dev/null +++ b/src copy/other/testing output.txt @@ -0,0 +1,16 @@ +PS C:\Users\Workstation2\Desktop\vsc\dict\tab-scraper> python +Python 3.9.5 (tags/v3.9.5:0a7dcbd, May 3 2021, 17:27:52) [MSC v.1928 64 bit (AMD64)] on win32 +Type "help", "copyright", "credits" or "license" for more information. +import requests as rq, json, re +SEARCH_URL = "https://www.ultimate-guitar.com/search.php?search_type=title&value={}" +RESULTS_PATTERN = "\"\;results\"\;:(\[.*?\]),\"\;pagination\"\;" +RESULTS_COUNT_PATTERN = "\"\;tabs\"\;,\"\;results_count\"\;:([0-9]+?),\"\;results\"\;" +DOWNLOAD_TIMEOUT = 15 +search_string = 'tesseract juno' +response = rq.get(SEARCH_URL.format(search_string)) +response_body = response.content.decode() +results = re.search(RESULTS_PATTERN, response_body).group(1) +count = int(re.search(RESULTS_COUNT_PATTERN, response_body).group(1)) +results = re.sub(r'"', '"', results) +results = json.loads(results) +with open('jsonout.txt','w') as jhand: json.dump(results, jhand, indent=2) \ No newline at end of file diff --git a/src copy/settings.cfg b/src copy/settings.cfg new file mode 100644 index 0000000..642f85d --- /dev/null +++ b/src copy/settings.cfg @@ -0,0 +1,3 @@ +[MAIN] +destination_root = C:/Users/Workstation2/Desktop/Pro/Periphery + diff --git a/src copy/tab_scraper.py b/src copy/tab_scraper.py new file mode 100644 index 0000000..e36a11c --- /dev/null +++ b/src copy/tab_scraper.py @@ -0,0 +1,215 @@ +from PyQt5 import QtCore, QtGui, QtWidgets +import utils +import sys +import os +from configparser import ConfigParser + +VERSION = 'v.0.1.2' +TOTAL_WIDTH = 1000 +SEARCH_WIDTH = 200 +SEARCH_ELEMENT_WIDTH = 150 +TEXT_BOX_HEIGHT = 30 +CHECK_BOX_HEIGHT = 20 +BUTTON_HEIGHT = 30 +BUTTON_WIDTH = 150 +DIRECTORY_BUTTON_WIDTH = 30 +OFFSET = 25 +CHECK_BOX_OFFSET = 15 +CHECK_BOX_NAMES = ["{}Chords", "{}Tab", "{}GuitarPro", "{}PowerTab", "{}Bass", "{}Ukulele"] +TYPES_DICT = {"Chords": "Chords", + "Tab": "Tabs", + "GuitarPro": "Pro", + "PowerTab": "Power", + "Bass": "Bass Tabs", + "Ukulele": "Ukulele Chords"} +TABLE_COLUMNS = ["Type", "Artist", "Title", "Rating", "Votes"] + + +class MainWindow(object): + def setup_ui(self, search_window): + self.status = '' + self.results = [] + font = QtGui.QFont() + font.setPointSize(12) + + window_height = ((OFFSET * 2 + TEXT_BOX_HEIGHT) + + ((CHECK_BOX_HEIGHT + CHECK_BOX_OFFSET) * len(CHECK_BOX_NAMES) - CHECK_BOX_OFFSET) + + (OFFSET * 2 + BUTTON_HEIGHT)) * 2 + + search_window.setObjectName("SearchWindow") + search_window.setWindowTitle("Tab Scraper " + VERSION) + search_window.setMinimumSize(QtCore.QSize(TOTAL_WIDTH, window_height)) + search_window.setMaximumSize(QtCore.QSize(TOTAL_WIDTH, window_height)) + search_window.setFont(font) + search_window.setTabShape(QtWidgets.QTabWidget.Rounded) + + self.central_widget = QtWidgets.QWidget(search_window) + self.central_widget.setObjectName("centralwidget") + + self.check_boxes = [" "] * len(CHECK_BOX_NAMES) + for i, name in enumerate(CHECK_BOX_NAMES): + self.check_boxes[i] = QtWidgets.QCheckBox(self.central_widget) + self.check_boxes[i].setGeometry(QtCore.QRect(OFFSET, + ((OFFSET * 2 + TEXT_BOX_HEIGHT) + + (CHECK_BOX_HEIGHT + CHECK_BOX_OFFSET) * i), + SEARCH_ELEMENT_WIDTH, + CHECK_BOX_HEIGHT)) + self.check_boxes[i].setObjectName(name.format("checkBox")) + self.check_boxes[i].setText(name.format("")) + + self.search_button = QtWidgets.QPushButton(self.central_widget) + self.search_button.setGeometry(QtCore.QRect(OFFSET, + ((OFFSET * 2 + TEXT_BOX_HEIGHT) + + (CHECK_BOX_HEIGHT + CHECK_BOX_OFFSET) * + len(CHECK_BOX_NAMES) - CHECK_BOX_OFFSET + OFFSET), + BUTTON_WIDTH, + BUTTON_HEIGHT)) + self.search_button.setObjectName("searchButton") + self.search_button.setText("Search") + self.search_button.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) + self.search_button.clicked.connect(self.search_tabs) + + self.download_button = QtWidgets.QPushButton(self.central_widget) + self.download_button.setGeometry(QtCore.QRect(OFFSET, + ((OFFSET * 2 + TEXT_BOX_HEIGHT) + + (CHECK_BOX_HEIGHT + CHECK_BOX_OFFSET) * + len(CHECK_BOX_NAMES) - CHECK_BOX_OFFSET + OFFSET + + (OFFSET + BUTTON_HEIGHT)), + BUTTON_WIDTH - DIRECTORY_BUTTON_WIDTH, + BUTTON_HEIGHT)) + self.download_button.setObjectName("downloadButton") + self.download_button.setText("Download") + self.download_button.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) + self.download_button.clicked.connect(self.download_tab) + + self.set_directory_button = QtWidgets.QPushButton(self.central_widget) + self.set_directory_button.setGeometry(QtCore.QRect(OFFSET + (BUTTON_WIDTH - DIRECTORY_BUTTON_WIDTH), + ((OFFSET * 2 + TEXT_BOX_HEIGHT) + + (CHECK_BOX_HEIGHT + CHECK_BOX_OFFSET) * + len(CHECK_BOX_NAMES) - CHECK_BOX_OFFSET + OFFSET + + (OFFSET + BUTTON_HEIGHT)), + DIRECTORY_BUTTON_WIDTH, + BUTTON_HEIGHT)) + self.set_directory_button.setObjectName("downloadButton") + self.set_directory_button.setText("...") + self.set_directory_button.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) + self.set_directory_button.clicked.connect(self.set_download_location) + + self.status_message = QtWidgets.QLabel(self.central_widget) + self.status_message.setGeometry(QtCore.QRect(OFFSET, + ((OFFSET * 2 + TEXT_BOX_HEIGHT) + + (CHECK_BOX_HEIGHT + CHECK_BOX_OFFSET) * + len(CHECK_BOX_NAMES) - CHECK_BOX_OFFSET + OFFSET + + (OFFSET + BUTTON_HEIGHT) * 2), + BUTTON_WIDTH, + BUTTON_HEIGHT)) + status_font = QtGui.QFont() + status_font.setPointSize(10) + self.status_message.setFont(status_font) + self.status_message.setText(self.status) + + + self.search_input = QtWidgets.QLineEdit(self.central_widget) + self.search_input.setGeometry(QtCore.QRect(OFFSET, OFFSET, SEARCH_ELEMENT_WIDTH, TEXT_BOX_HEIGHT)) + self.search_input.setObjectName("lineEdit") + self.search_input.returnPressed.connect(self.search_button.click) + self.search_input.setPlaceholderText("Search query...") + + self.tableWidget = QtWidgets.QTableWidget(self.central_widget) + self.tableWidget.setGeometry(QtCore.QRect(SEARCH_WIDTH, OFFSET, TOTAL_WIDTH - SEARCH_WIDTH - OFFSET, + window_height - OFFSET * 2)) + self.tableWidget.setSortingEnabled(False) + self.tableWidget.setAlternatingRowColors(True) + self.tableWidget.setSelectionMode(QtWidgets.QAbstractItemView.SingleSelection) + self.tableWidget.setSelectionBehavior(QtWidgets.QAbstractItemView.SelectRows) + self.tableWidget.setVerticalScrollMode(QtWidgets.QAbstractItemView.ScrollPerPixel) + self.tableWidget.setHorizontalScrollMode(QtWidgets.QAbstractItemView.ScrollPerPixel) + self.tableWidget.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers) + + self.tableWidget.setShowGrid(False) + self.tableWidget.setObjectName("tableWidget") + self.tableWidget.setColumnCount(len(TABLE_COLUMNS)) + self.tableWidget.setRowCount(len(self.results)) + + for i, header in enumerate(TABLE_COLUMNS): + item = QtWidgets.QTableWidgetItem() + item.setText(header) + self.tableWidget.setHorizontalHeaderItem(i, item) + + header = self.tableWidget.horizontalHeader() + for i in range(len(TABLE_COLUMNS)): + if TABLE_COLUMNS[i] == "Title": + header.setSectionResizeMode(i, QtWidgets.QHeaderView.Stretch) + else: + header.setSectionResizeMode(i, QtWidgets.QHeaderView.ResizeToContents) + + self.tableWidget.horizontalHeader().setDefaultSectionSize(100) + self.tableWidget.horizontalHeader().setMinimumSectionSize(100) + + QtCore.QMetaObject.connectSlotsByName(search_window) + search_window.setCentralWidget(self.central_widget) + + def search_tabs(self): + types = [] + for check_box in self.check_boxes: + if check_box.isChecked(): + types.append(TYPES_DICT[check_box.text()]) + search_string = "%20".join(self.search_input.text().split()) + if len(search_string) > 0: + self.results = utils.search_tabs(search_string, types) + self.update_table() + + def update_table(self): + self.tableWidget.setRowCount(len(self.results)) + for i in range(len(self.results)): + for j in range(len(TABLE_COLUMNS)): + item = QtWidgets.QTableWidgetItem() + item.setText(self.results[i][j]) + self.tableWidget.setItem(i, j, item) + + def download_tab(self): + self.status_message.setText('Downloading...') + self.status_message.repaint() + if len(self.results) > 0: + row = self.results[self.tableWidget.currentRow()] + url = row[-2] + is_file = False + if row[0] == "Pro" or row[0] == "Power": + is_file = True + + if is_file: + utils.download_file(url, row[0], row[1].replace("/", "")) + else: + utils.download_tab(url, row[0], row[1].replace("/", ""), row[2], row[6]) + self.status_message.setText('Download finished') + #self.central_widget.update() + + def set_download_location(self): + dialog = QtWidgets.QFileDialog() + folder_path = dialog.getExistingDirectory(None, "Select Folder") + + if getattr(sys, 'frozen', False): + # If the application is run as a bundle, the pyInstaller bootloader + # extends the sys module by a flag frozen=True + application_path = os.path.dirname(sys.executable) + else: + application_path = os.path.dirname(os.path.abspath(os.path.splitext(__file__)[0])) + settings_file = os.path.join(application_path, "settings.cfg") + + config = ConfigParser() + config.read(settings_file) + + config.set('MAIN', 'destination_root', folder_path) + with open(settings_file, 'w+') as f: + config.write(f) + + + + +if __name__ == "__main__": + app = QtWidgets.QApplication(sys.argv) + SearchWindow = QtWidgets.QMainWindow() + ui = MainWindow() + ui.setup_ui(SearchWindow) + SearchWindow.show() + sys.exit(app.exec_()) diff --git a/src copy/utils.py b/src copy/utils.py new file mode 100644 index 0000000..a2d8222 --- /dev/null +++ b/src copy/utils.py @@ -0,0 +1,197 @@ +import os +import re +import sys +import json +import requests +from platform import system as opersyst +from time import sleep +from selenium import webdriver +from selenium.webdriver.firefox.options import Options +from selenium.webdriver.firefox.firefox_profile import FirefoxProfile +from selenium.common.exceptions import NoSuchElementException +from configparser import ConfigParser + + +SEARCH_URL = "https://www.ultimate-guitar.com/search.php?search_type=title&value={}" +RESULTS_PATTERN = "\"\;results\"\;:(\[.*?\]),\"\;pagination\"\;" +#RESULTS_PATTERN = "\"results\":(\[.*?\]),\"pagination\"" +RESULTS_COUNT_PATTERN = "\"\;tabs\"\;,\"\;results_count\"\;:([0-9]+?),\"\;results\"\;" +#RESULTS_COUNT_PATTERN = "\"tabs\",\"results_count\":([0-9]+?),\"results\"" +DOWNLOAD_TIMEOUT = 15 + +# simple find function to find firefox executable +def find(name, path): + for root, dirs, files in os.walk(path): + if name in files: + return os.path.join(root, name) +def root_path(): + return os.path.abspath(os.sep) + +if opersyst() == 'Windows': + location = find('firefox.exe',root_path()) +else: + location = find('firefox',root_path()) + + +def search_tabs(search_string, types): + page = 1 + # get first page of results + response = requests.get(SEARCH_URL.format(search_string)) + # count is the number of results, used to know how many pages to search + count = 0 + try: + # isolate results from page using regex + response_body = response.content.decode() + results = re.search(RESULTS_PATTERN, response_body).group(1) + count = int(re.search(RESULTS_COUNT_PATTERN, response_body).group(1)) + results = re.sub(r'"', '"', results) + except AttributeError: + results = '' + response_data = json.loads(results) + + ret = [] + while count > 0: + for item in response_data: + try: + # Get every result that has a desired type + if item["type"] in types: + ret.append((item["type"], item["artist_name"], item["song_name"], + str(round(float(item["rating"]), 1)), str(item["votes"]), + item["tab_url"], str(item["version"]))) + except KeyError: + # key error on "official" tabs, not interested in these tabs + '' + count -= 1 + if count > 0: + page += 1 + response = requests.get(SEARCH_URL.format(search_string)) + try: + # isolate results from page using regex + response_body = response.content.decode() + results = re.search(RESULTS_PATTERN, response_body).group(1) + results = re.sub(r'"', '"', results) + except AttributeError: + results = '' + response_data = json.loads(results) + return ret + + +def download_tab(url, tab_type, artist, title, version): + if getattr(sys, 'frozen', False): + # If the application is run as a bundle, the pyInstaller bootloader + # extends the sys module by a flag frozen=True + application_path = os.path.dirname(sys.executable) + else: + application_path = os.path.dirname(os.path.abspath(os.path.splitext(__file__)[0])) + settings_file = os.path.join(application_path, "settings.cfg") + + config = ConfigParser() + config.read(settings_file) + cfg = config['MAIN'] + + # get path to gecko executable by joining the application path with 'geckodriver' and the .exe file extension + # if the executed tab_scraper is an exe + currentdir = os.path.dirname(os.path.realpath(__file__)) + parentdir = os.path.dirname(currentdir) + sys.path.append(parentdir) + #gecko_path = (os.path.join(currentdir, "geckodriver", ".exe" if os.path.splitext(__file__)[1] == ".bat" else ""))[:-1] + if opersyst() == 'Windows': + gecko_path = os.path.join(currentdir,'geckodriver.exe') + else: + gecko_path = os.path.join(currentdir,'geckodriver') + + # create destination directory if it doesn't exist + destination_root = cfg['destination_root'] + destination = os.path.join(destination_root, tab_type, artist) + os.makedirs(destination, exist_ok=True) + + options = Options() + options.headless = True + options.binary_location = location + driver = webdriver.Firefox(options=options, executable_path=gecko_path) + driver.get(url) + + # clear the privacy policy message + try: + popup_btn = driver.find_element_by_xpath('//button[text()="Got it, thanks!"]') + popup_btn.click() + except NoSuchElementException: + pass + + # clear the official tab ad + try: + popup_btn = driver.find_element_by_xpath('//div[contains(@class, "ai-ah")]//button') + popup_btn.click() + except NoSuchElementException: + pass + + # hide the autoscroll tool + try: + autoscroll = driver.find_element_by_xpath('//span[text()="Autoscroll"]/parent::button/parent::div/parent::section') + driver.execute_script("arguments[0].setAttribute('style', 'display: none')", autoscroll) + except NoSuchElementException: + pass + + tab = driver.find_element_by_tag_name("pre") + filename = os.path.join(destination, (title + " (Ver " + version + ")" + ".png")) + tab.screenshot(filename) + + driver.quit() + + +def download_file(url, tab_type, artist): + if getattr(sys, 'frozen', False): + # If the application is run as a bundle, the pyInstaller bootloader + # extends the sys module by a flag frozen=True + application_path = os.path.dirname(sys.executable) + else: + application_path = os.path.dirname(os.path.abspath(os.path.splitext(__file__)[0])) + settings_file = os.path.join(application_path, "settings.cfg") + config = ConfigParser() + config.read(settings_file) + cfg = config['MAIN'] + + # get path to gecko executable by joining the application path with 'geckodriver' and the .exe file extension + # if the executed tab_scraper is an exe + currentdir = os.path.dirname(os.path.realpath(__file__)) + parentdir = os.path.dirname(currentdir) + sys.path.append(parentdir) + #gecko_path = (os.path.join(application_path, "geckodriver", ".exe" if os.path.splitext(__file__)[1] == ".exe" else ""))[:-1] + if opersyst() == 'Windows': + gecko_path = os.path.join(currentdir,'geckodriver.exe') + else: + gecko_path = os.path.join(currentdir,'geckodriver') + + # create destination directory if it doesn't exist + destination_root = cfg['destination_root'] + destination = os.path.join(destination_root, tab_type, artist) + os.makedirs(destination, exist_ok=True) + + # count how many files are in the destination already + nFiles = len(os.listdir(destination)) + + options = Options() + options.headless = True + options.binary_location = location + + profile = FirefoxProfile() + profile.set_preference("browser.download.folderList", 2) + profile.set_preference("browser.download.manager.showWhenStarting", False) + profile.set_preference("browser.download.dir", destination) + profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/octet-stream") + + driver = webdriver.Firefox(options=options, firefox_profile=profile, executable_path=gecko_path) + driver.get(url) + button = driver.find_element_by_xpath('//button/span[text()="DOWNLOAD Guitar Pro TAB" ' + 'or text()="DOWNLOAD Power TAB"]') + driver.execute_script("arguments[0].click();", button) + + # kill firefox process after download completes or a timeout is reached + downloading = True + timeout = DOWNLOAD_TIMEOUT + while downloading and timeout > 0: + sleep(0.5) + if len(os.listdir(destination)) > nFiles: + downloading = False + timeout -= 0.5 + driver.quit() diff --git a/src/tab_scraper.py b/src/tab_scraper.py index 1546562..e36a11c 100644 --- a/src/tab_scraper.py +++ b/src/tab_scraper.py @@ -1,5 +1,5 @@ from PyQt5 import QtCore, QtGui, QtWidgets -from src import utils +import utils import sys import os from configparser import ConfigParser