fix: Kirklees Council

fix: #1129 - Breaking Change. UPRN required
robbrad · Jan 5, 2025 · a3bae15 · a3bae15
1 parent 5b9499a
commit a3bae15
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 134 deletions.
diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -936,13 +936,11 @@
         "wiki_note": "Follow the instructions [here](https://waste-services.kingston.gov.uk/waste) until the \"Your bin days\" page, then copy the URL and replace the URL in the command."
     },
     "KirkleesCouncil": {
-        "house_number": "24",
-        "postcode": "HD7 5DX",
+        "uprn": "83002937",
         "skip_get_url": true,
         "url": "https://www.kirklees.gov.uk/beta/your-property-bins-recycling/your-bins",
-        "web_driver": "http://selenium:4444",
         "wiki_name": "Kirklees Council",
-        "wiki_note": "Pass the house number and postcode in their respective parameters. This parser requires a Selenium webdriver."
+        "wiki_note": "Provide your UPRN. Find your UPRN using [FindMyAddress](https://www.findmyaddress.co.uk/search)."
     },
     "KnowsleyMBCouncil": {
         "house_number": "22",

diff --git a/uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py b/uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py
@@ -1,143 +1,76 @@
 import time
-from datetime import datetime
-from typing import Optional
 
-from bs4 import BeautifulSoup
-from selenium.common import TimeoutException
-from selenium.webdriver.common.by import By
-from selenium.webdriver.common.keys import Keys
-from selenium.webdriver.remote.webdriver import WebDriver
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.support.wait import WebDriverWait
-from webdriver_manager.drivers.chrome import ChromeDriver
+import requests
 
-from selenium import webdriver
-
-from uk_bin_collection.uk_bin_collection.common import create_webdriver
-from uk_bin_collection.uk_bin_collection.common import date_format
+from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
 
-def wait_for_element(driver, element_type, element: str, timeout: int = 5):
-    element_present = EC.presence_of_element_located((element_type, element))
-    wait_for_element_conditions(driver, element_present, timeout=timeout)
-
-
-def wait_for_element_conditions(driver, conditions, timeout: int = 5):
-    try:
-        WebDriverWait(driver, timeout).until(conditions)
-    except TimeoutException:
-        print("Timed out waiting for page to load")
-        raise
-
-
+# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
     Concrete classes have to implement all abstract operations of the
     base class. They can also override some operations with a default
     implementation.
     """
 
-    def __init__(self):
-        self._driver: Optional[WebDriver] = None
-
-    def parse_data(self, *args, **kwargs) -> dict:
-        try:
-            return self._parse_data(*args, **kwargs)
-        finally:
-            if self._driver:
-                self._driver.quit()
-
-    def _parse_data(self, page: str, **kwargs) -> dict:
-        """
-        Process:
-
-        - Use a house number and postcode that is known to be domestic and resolves to a
-          single unique address. When the address search form is submitted with
-          those details, a session is created
-
-        - Now a session exists, navigate to the calendar URL, specifying the UPRN
-
-        - Extract info from the 'alt' attribute of the images on that page
-        """
-        data = {"bins": []}
-        collections = []
-
-        user_paon = kwargs["paon"]
-        user_postcode = kwargs["postcode"]
-
-        self._driver = driver = webdriver.Chrome()
-        # self._driver = driver = create_webdriver(
-        #     web_driver=kwargs["web_driver"],
-        #     headless=kwargs.get("headless", True),
-        #     session_name=__name__,
-        # )
-        driver.implicitly_wait(1)
-
-        driver.get(
-            "https://my.kirklees.gov.uk/service/Bins_and_recycling___Manage_your_bins"
-        )
-
-        time.sleep(5)
-
-        # Switch to iframe
-        iframe = driver.find_element(By.CSS_SELECTOR, "#fillform-frame-1")
-        driver.switch_to.frame(iframe)
-
-        wait_for_element(
-            driver, By.ID, "mandatory_Postcode", timeout=10
-        )
-
-        postcode_input = driver.find_element(
-            By.ID, "Postcode"
-        )
-        postcode_input.send_keys(user_postcode)
-
-        wait_for_element(driver, By.ID, "List")
-        time.sleep(2)
-
-        WebDriverWait(driver, 10).until(
-            EC.element_to_be_clickable(
-                (
-                    By.XPATH,
-                    "//select[@name='List']//option[contains(., '"
-                    + user_paon
-                    + "')]",
-                )
-            )
-        ).click()
-
-        time.sleep(10)
-
-        # For whatever reason, the page sometimes automatically goes to the next step
-        next_button = driver.find_element(By.XPATH, '/html/body/div/div/section/form/div/nav/div[2]/button')
-        if next_button.is_displayed():
-            next_button.click()
-
-
-        time.sleep(5)
-
-        soup = BeautifulSoup(self._driver.page_source, features="html.parser")
-        soup.prettify()
-
-        radio_button_text = soup.find_all("label", {"class": "radio-label"})
-        for label in radio_button_text:
-            parsed_text = label.text.split("x ")
-            row = parsed_text[1].lower().split("collection date: ")
-            bin_type = row[0].split("(")[0].strip()
-            date_text = row[1].strip().replace(")", "")
-            if date_text == "today":
-                bin_date = datetime.now()
-            else:
-                bin_date = datetime.strptime(date_text, "%A %d %B %Y")
-            collections.append((bin_type, bin_date))
-
-        ordered_data = sorted(collections, key=lambda x: x[1])
-        for item in ordered_data:
-            dict_data = {
-                "type": item[0].replace("standard ", "").capitalize(),
-                "collectionDate": item[1].strftime(date_format),
-            }
-            data["bins"].append(dict_data)
-
-        return data
+    def parse_data(self, page: str, **kwargs) -> dict:
+
+        user_uprn = kwargs.get("uprn")
+        check_uprn(user_uprn)
+        bindata = {"bins": []}
+
+        SESSION_URL = "https://my.kirklees.gov.uk/authapi/isauthenticated?uri=https%253A%252F%252Fmy.kirklees.gov.uk%252Fservice%252FBins_and_recycling___Manage_your_bins&hostname=my.kirklees.gov.uk&withCredentials=true"
+
+        API_URL = "https://my.kirklees.gov.uk/apibroker/runLookup"
+
+        data = {
+            "formValues": {"Search": {"validatedUPRN": {"value": user_uprn}}},
+        }
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "User-Agent": "Mozilla/5.0",
+            "X-Requested-With": "XMLHttpRequest",
+            "Referer": "https://my.kirklees.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=",
+        }
+        s = requests.session()
+        r = s.get(SESSION_URL)
+        r.raise_for_status()
+        session_data = r.json()
+        sid = session_data["auth-session"]
+        params = {
+            "id": "65e08e60b299d",
+            "repeat_against": "",
+            "noRetry": "false",
+            "getOnlyTokens": "undefined",
+            "log_id": "",
+            "app_name": "AF-Renderer::Self",
+            # unix_timestamp
+            "_": str(int(time.time() * 1000)),
+            "sid": sid,
+        }
+
+        r = s.post(API_URL, json=data, headers=headers, params=params)
+        r.raise_for_status()
+
+        data = r.json()
+        rows_data = data["integration"]["transformed"]["rows_data"]
+        if not isinstance(rows_data, dict):
+            raise ValueError("Invalid data returned from API")
+
+        for bin_id, bin_info in rows_data.items():
+            label = bin_info.get("label", "Unknown")
+            next_collection_date = bin_info.get("NextCollectionDate", "Unknown")
+            # Convert the date string into a readable format
+            try:
+                formatted_date = datetime.strptime(
+                    next_collection_date, "%Y-%m-%dT%H:%M:%S"
+                ).strftime(date_format)
+            except ValueError:
+                formatted_date = "Unknown"
+
+            dict_data = {"type": label, "collectionDate": formatted_date}
+            bindata["bins"].append(dict_data)
+
+        return bindata