From 30babf1908eedeb118e0b15eb7f9e17520b5ddcd Mon Sep 17 00:00:00 2001 From: Robert Bradley Date: Sun, 17 Dec 2023 07:50:03 +0000 Subject: [PATCH] fix: #493 Leeds issues --- .../councils/LeedsCityCouncil.py | 84 ++++++++++++------- 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py index 6a498ce554..88e231cffc 100644 --- a/uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py @@ -8,6 +8,7 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import Select from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.common.keys import Keys import pandas as pd import urllib.request @@ -31,54 +32,82 @@ def parse_data(self, page: str, **kwargs) -> dict: check_uprn(user_uprn) check_postcode(user_postcode) # Create Selenium webdriver - page = f"https://www.leeds.gov.uk/residents/bins-and-recycling/check-your-bin-day" + page = ( + f"https://www.leeds.gov.uk/residents/bins-and-recycling/check-your-bin-day" + ) driver = create_webdriver(web_driver) driver.get(page) - # If you bang in the house number (or property name) and postcode in the box it should find your property - - #iframe_presense = WebDriverWait(driver, 30).until( - # EC.presence_of_element_located((By.ID, "fillform-frame-1")) - #) - - #driver.switch_to.frame(iframe_presense) wait = WebDriverWait(driver, 60) - postcode_box = wait.until( - EC.element_to_be_clickable((By.ID, 'ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_txtPostCode')) + EC.element_to_be_clickable( + ( + By.ID, + "ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_txtPostCode", + ) + ) ) postcode_box.send_keys(user_postcode) - + postcode_btn_present = wait.until( + EC.presence_of_element_located( + ( + By.ID, + "ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_btnSearchAddress", + ) + ) + ) postcode_btn = wait.until( - EC.element_to_be_clickable((By.ID, 'ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_btnSearchAddress')) + EC.element_to_be_clickable( + ( + By.XPATH, + '//*[@id="ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_btnSearchAddress"]', + ) + ) ) - postcode_btn.click() + postcode_btn.send_keys(Keys.ENTER) + + dropdown_present = wait.until( + EC.presence_of_element_located( + ( + By.XPATH, + '//*[@id="ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_ddlAddressList"]/option', + ) + ) + ) address_dropdown = wait.until( - EC.element_to_be_clickable((By.ID, 'ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_ddlAddressList')) + EC.element_to_be_clickable( + ( + By.ID, + "ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_ddlAddressList", + ) + ) ) + + dropdown_present.click() + dropdownSelect = Select(address_dropdown) dropdownSelect.select_by_value(str(user_uprn)) results = wait.until( - EC.presence_of_element_located((By.ID, "ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_BinResultsDetails")) + EC.presence_of_element_located( + ( + By.ID, + "ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_BinResultsDetails", + ) + ) ) - data = {"bins": []} # dictionary for data - soup = BeautifulSoup(driver.page_source, 'html.parser') - + soup = BeautifulSoup(driver.page_source, "html.parser") - bin_types = soup.find_all('ul', class_='binCollectionTimesList') + bin_types = soup.find_all("ul", class_="binCollectionTimesList") for bin_collection_dates in bin_types: - bin_collection_list = bin_collection_dates.find_all('li', class_='') + bin_collection_list = bin_collection_dates.find_all("li", class_="") if bin_collection_list: - collection_dates = [ - date.text.strip() - for date in bin_collection_list - ] + collection_dates = [date.text.strip() for date in bin_collection_list] # Convert the collection dates to the desired format formatted_dates = [ @@ -87,14 +116,11 @@ def parse_data(self, page: str, **kwargs) -> dict: ] # Extract the type of bin from the header - bin_type = bin_collection_dates.find_previous('h3').text.split()[0] + bin_type = bin_collection_dates.find_previous("h3").text.split()[0] # Adding data to the 'bins' dictionary for each date for date in formatted_dates: - dict_data = { - "type": bin_type, - "collectionDate": date - } + dict_data = {"type": bin_type, "collectionDate": date} data["bins"].append(dict_data) return data