Skip to content

Commit

Permalink
fix: Kirklees Council
Browse files Browse the repository at this point in the history
fix: #1129 - Breaking Change. UPRN required
  • Loading branch information
m26dvd committed Jan 5, 2025
1 parent 5b9499a commit a3bae15
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 134 deletions.
6 changes: 2 additions & 4 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -936,13 +936,11 @@
"wiki_note": "Follow the instructions [here](https://waste-services.kingston.gov.uk/waste) until the \"Your bin days\" page, then copy the URL and replace the URL in the command."
},
"KirkleesCouncil": {
"house_number": "24",
"postcode": "HD7 5DX",
"uprn": "83002937",
"skip_get_url": true,
"url": "https://www.kirklees.gov.uk/beta/your-property-bins-recycling/your-bins",
"web_driver": "http://selenium:4444",
"wiki_name": "Kirklees Council",
"wiki_note": "Pass the house number and postcode in their respective parameters. This parser requires a Selenium webdriver."
"wiki_note": "Provide your UPRN. Find your UPRN using [FindMyAddress](https://www.findmyaddress.co.uk/search)."
},
"KnowsleyMBCouncil": {
"house_number": "22",
Expand Down
193 changes: 63 additions & 130 deletions uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py
Original file line number Diff line number Diff line change
@@ -1,143 +1,76 @@
import time
from datetime import datetime
from typing import Optional

from bs4 import BeautifulSoup
from selenium.common import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.drivers.chrome import ChromeDriver
import requests

from selenium import webdriver

from uk_bin_collection.uk_bin_collection.common import create_webdriver
from uk_bin_collection.uk_bin_collection.common import date_format
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


def wait_for_element(driver, element_type, element: str, timeout: int = 5):
element_present = EC.presence_of_element_located((element_type, element))
wait_for_element_conditions(driver, element_present, timeout=timeout)


def wait_for_element_conditions(driver, conditions, timeout: int = 5):
try:
WebDriverWait(driver, timeout).until(conditions)
except TimeoutException:
print("Timed out waiting for page to load")
raise


# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def __init__(self):
self._driver: Optional[WebDriver] = None

def parse_data(self, *args, **kwargs) -> dict:
try:
return self._parse_data(*args, **kwargs)
finally:
if self._driver:
self._driver.quit()

def _parse_data(self, page: str, **kwargs) -> dict:
"""
Process:
- Use a house number and postcode that is known to be domestic and resolves to a
single unique address. When the address search form is submitted with
those details, a session is created
- Now a session exists, navigate to the calendar URL, specifying the UPRN
- Extract info from the 'alt' attribute of the images on that page
"""
data = {"bins": []}
collections = []

user_paon = kwargs["paon"]
user_postcode = kwargs["postcode"]

self._driver = driver = webdriver.Chrome()
# self._driver = driver = create_webdriver(
# web_driver=kwargs["web_driver"],
# headless=kwargs.get("headless", True),
# session_name=__name__,
# )
driver.implicitly_wait(1)

driver.get(
"https://my.kirklees.gov.uk/service/Bins_and_recycling___Manage_your_bins"
)

time.sleep(5)

# Switch to iframe
iframe = driver.find_element(By.CSS_SELECTOR, "#fillform-frame-1")
driver.switch_to.frame(iframe)

wait_for_element(
driver, By.ID, "mandatory_Postcode", timeout=10
)

postcode_input = driver.find_element(
By.ID, "Postcode"
)
postcode_input.send_keys(user_postcode)

wait_for_element(driver, By.ID, "List")
time.sleep(2)

WebDriverWait(driver, 10).until(
EC.element_to_be_clickable(
(
By.XPATH,
"//select[@name='List']//option[contains(., '"
+ user_paon
+ "')]",
)
)
).click()

time.sleep(10)

# For whatever reason, the page sometimes automatically goes to the next step
next_button = driver.find_element(By.XPATH, '/html/body/div/div/section/form/div/nav/div[2]/button')
if next_button.is_displayed():
next_button.click()


time.sleep(5)

soup = BeautifulSoup(self._driver.page_source, features="html.parser")
soup.prettify()

radio_button_text = soup.find_all("label", {"class": "radio-label"})
for label in radio_button_text:
parsed_text = label.text.split("x ")
row = parsed_text[1].lower().split("collection date: ")
bin_type = row[0].split("(")[0].strip()
date_text = row[1].strip().replace(")", "")
if date_text == "today":
bin_date = datetime.now()
else:
bin_date = datetime.strptime(date_text, "%A %d %B %Y")
collections.append((bin_type, bin_date))

ordered_data = sorted(collections, key=lambda x: x[1])
for item in ordered_data:
dict_data = {
"type": item[0].replace("standard ", "").capitalize(),
"collectionDate": item[1].strftime(date_format),
}
data["bins"].append(dict_data)

return data
def parse_data(self, page: str, **kwargs) -> dict:

user_uprn = kwargs.get("uprn")
check_uprn(user_uprn)
bindata = {"bins": []}

SESSION_URL = "https://my.kirklees.gov.uk/authapi/isauthenticated?uri=https%253A%252F%252Fmy.kirklees.gov.uk%252Fservice%252FBins_and_recycling___Manage_your_bins&hostname=my.kirklees.gov.uk&withCredentials=true"

API_URL = "https://my.kirklees.gov.uk/apibroker/runLookup"

data = {
"formValues": {"Search": {"validatedUPRN": {"value": user_uprn}}},
}
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"User-Agent": "Mozilla/5.0",
"X-Requested-With": "XMLHttpRequest",
"Referer": "https://my.kirklees.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=",
}
s = requests.session()
r = s.get(SESSION_URL)
r.raise_for_status()
session_data = r.json()
sid = session_data["auth-session"]
params = {
"id": "65e08e60b299d",
"repeat_against": "",
"noRetry": "false",
"getOnlyTokens": "undefined",
"log_id": "",
"app_name": "AF-Renderer::Self",
# unix_timestamp
"_": str(int(time.time() * 1000)),
"sid": sid,
}

r = s.post(API_URL, json=data, headers=headers, params=params)
r.raise_for_status()

data = r.json()
rows_data = data["integration"]["transformed"]["rows_data"]
if not isinstance(rows_data, dict):
raise ValueError("Invalid data returned from API")

for bin_id, bin_info in rows_data.items():
label = bin_info.get("label", "Unknown")
next_collection_date = bin_info.get("NextCollectionDate", "Unknown")
# Convert the date string into a readable format
try:
formatted_date = datetime.strptime(
next_collection_date, "%Y-%m-%dT%H:%M:%S"
).strftime(date_format)
except ValueError:
formatted_date = "Unknown"

dict_data = {"type": label, "collectionDate": formatted_date}
bindata["bins"].append(dict_data)

return bindata

0 comments on commit a3bae15

Please sign in to comment.