Merge pull request #23 from metaodi/develop

Release 0.3.0
metaodi · Aug 31, 2023 · 0af02e4 · 0af02e4
2 parents 31419d0 + 1a37906
commit 0af02e4
Show file tree

Hide file tree

Showing 25 changed files with 6,891 additions and 80 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,10 @@
+repos:
+  - repo: https://github.com/psf/black
+    rev: 23.7.0
+    hooks:
+      - id: black
+        language_version: python3.8
+  - repo: https://github.com/pycqa/flake8
+    rev: 6.0.0
+    hooks:
+      - id: flake8
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p
 
 ## [Unreleased]
 
+## [0.3.0] - 2023-08-31
+### Added
+- Support for server-side pagination (transparent to the user)
+- `ResultVeryLargeWarning` to warn developers about very large queries
+- Logging to help with debugging
+- Add pre-commit configuration
+
+### Changed
+- Use `black` code style
+
 ## [0.2.1] - 2022-01-31
 ### Fixed
 - In order to fix issue #17 a bug in pyodata had to be fixed. pyodata 1.9.0 contains the bugfix and is now specified as the minimum version.
@@ -53,7 +63,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p
 - `Fixed` for any bug fixes.
 - `Security` to invite users to upgrade in case of vulnerabilities.
 
-[Unreleased]: https://github.com/metaodi/swissparlpy/compare/v0.2.1...HEAD
+[Unreleased]: https://github.com/metaodi/swissparlpy/compare/v0.3.0...HEAD
+[0.3.0]: https://github.com/metaodi/swissparlpy/compare/v0.2.1...v0.3.0
 [0.2.1]: https://github.com/metaodi/swissparlpy/compare/v0.2.0...v0.2.1
 [0.2.0]: https://github.com/metaodi/swissparlpy/compare/v0.1.1...v0.2.0
 [0.1.1]: https://github.com/metaodi/swissparlpy/compare/v0.1.0...v0.1.1

diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 [![PyPI Version][pypi-image]][pypi-url]
 [![Build Status][build-image]][build-url]
+[![Code style: black][black-image]][black-url]
+[![pre-commit][pre-commit-image]][pre-commit-url]
 
 
 swissparlpy
@@ -18,6 +20,7 @@ This module provides easy access to the data of the [OData webservice](https://w
     * [Date ranges](#date-ranges)
     * [Advanced filter](#advanced-filter)
     * [Large queries](#large-queries)
+    * [API documentation](#documentation)
 * [Credits](#credits)
 * [Development](#development)
 * [Release](#release)
@@ -221,6 +224,15 @@ for session in sessions50:
 df_voting50 = pd.concat([pd.read_pickle(os.path.join(path, x)) for x in os.listdir(path)])
 ```
 
+### Documentation
+
+The referencing table has been created and is available [here](docs/swissparAPY_diagram.pdf). It contains the dependency diagram between all of the tables as well, some exhaustive descriptions as well as the code needed to generate such interactive documentation.
+The documentation can indeed be recreated using [dbdiagram.io](https://dbdiagram.io/home).
+
+Below is a first look of what the dependencies are between the tables contained in the API:
+
+![db diagram of swiss parliament API](/docs/swissparAPY_diagram.png "db diagram of swiss parliament API")
+
 ## Credits
 
 This library is inspired by the R package [swissparl](https://github.com/zumbov2/swissparl) of [David Zumbach](https://github.com/zumbov2).
@@ -251,3 +263,7 @@ To create a new release, follow these steps (please respect [Semantic Versioning
 [pypi-url]: https://pypi.org/project/swissparlpy/
 [build-image]: https://github.com/metaodi/swissparlpy/actions/workflows/build.yml/badge.svg
 [build-url]: https://github.com/metaodi/swissparlpy/actions/workflows/build.yml
+[black-image]: https://img.shields.io/badge/code%20style-black-000000.svg
+[black-url]: https://github.com/psf/black
+[pre-commit-image]: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit
+[pre-commit-url]: https://github.com/pre-commit/pre-commit
diff --git a/dev_setup.sh b/dev_setup.sh
@@ -5,4 +5,5 @@ source pyenv/bin/activate
 
 pip install --upgrade pip
 pip install flit
+pre-commit install
 flit install -s
diff --git a/docs/swissparAPY_diagram.pdf b/docs/swissparAPY_diagram.pdf
diff --git a/docs/swissparAPY_diagram.png b/docs/swissparAPY_diagram.png
diff --git a/examples/Swiss Parliament API.ipynb b/examples/Swiss Parliament API.ipynb
@@ -15,12 +15,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import swissparlpy\n",
+    "import swissparlpy as spp\n",
     "import requests\n",
     "import pandas as pd\n",
     "import os\n",
     "import urllib3\n",
-    "from datetime import datetime"
+    "from datetime import datetime, timezone"
    ]
   },
   {
@@ -54,7 +54,7 @@
    "source": [
     "session = requests.Session()\n",
     "session.verify = False # disable SSL verification\n",
-    "client = swissparlpy.SwissParlClient(session=session)"
+    "client = spp.SwissParlClient(session=session)"
    ]
   },
   {
@@ -74,8 +74,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import swissparlpy as spp\n",
-    "\n",
     "tables = spp.get_tables()\n",
     "glimpse_df = pd.DataFrame(spp.get_glimpse(tables[0]))\n",
     "glimpse_df"
@@ -196,11 +194,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "utc_start_date = datetime.fromisoformat('2019-10-01').astimezone(timezone.utc)\n",
+    "utc_end_Date = datetime.fromisoformat('2019-10-31').astimezone(timezone.utc)\n",
     "business_oct19 = client.get_data(\n",
     "    \"Business\",\n",
     "    Language=\"DE\",\n",
-    "    SubmissionDate__gte=datetime.fromisoformat('2019-10-01'),\n",
-    "    SubmissionDate__lt=datetime.fromisoformat('2019-10-31')\n",
+    "    SubmissionDate__gte=utc_start_date,\n",
+    "    SubmissionDate__lt=utc_end_Date\n",
     ")\n",
     "business_oct19.count"
    ]
@@ -295,11 +295,118 @@
     "df_voting50 = pd.concat([pd.read_pickle(os.path.join(path, x)) for x in os.listdir(path)])\n",
     "df_voting50"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9dc1854c-9f97-4bd4-bc21-50b361efa54b",
+   "metadata": {},
+   "source": [
+    "## Queries with lots of results (server-side pagination)\n",
+    "\n",
+    "There is a server-side limit of 1000 items that are being returned.\n",
+    "swissparlpy handles this server-side pagination transparently, so a user of the library should not worry about it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "408f6b47-c352-49ac-ac0a-b4fbaa00b7cc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "business = client.get_data(\"Business\", Language = \"DE\")\n",
+    "business.count"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b6b842d0-ebd5-44a2-9169-0d91128f3da3",
+   "metadata": {},
+   "source": [
+    "As we can see, there are over 50k results.\n",
+    "Initially only the first 1000 are loaded:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b463504-3a2e-417d-b302-47adb70d23a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(business.data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02717c13-01a6-4772-b492-c56afdddac85",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "business[1]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "49fd8ec7-e5d8-4b10-ba03-1543bf52b81e",
+   "metadata": {},
+   "source": [
+    "But as soon as a next element is accessed, new data is (lazy) loaded:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fafc6266-cc43-4975-9e39-2866f29ef713",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "business[1001]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "54556cc8-e076-4ea7-bb10-63aaf9f80e10",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(business.data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "19628322-4a7f-4720-a910-e2d72c1d744c",
+   "metadata": {},
+   "source": [
+    "If the last element is needed, all the data is loaded (NOTE: this uses quite some memory):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bcdb29ce-30ba-428c-9162-b2c843020227",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "business[-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "79adb078-dffc-46a2-8746-2b54238b3718",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(business.data)"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -313,7 +420,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.8"
+   "version": "3.8.10"
   }
  },
  "nbformat": 4,

diff --git a/examples/download_votes_in_batches.py b/examples/download_votes_in_batches.py
@@ -14,7 +14,7 @@ def save_votes_of_session(id, path):
     data = spp.get_data("Voting", Language="DE", IdSession=id)
     print(f"{data.count} rows loaded.")
     df = pd.DataFrame(data)
-    pickle_path = os.path.join(path, f'{id}.pks')
+    pickle_path = os.path.join(path, f"{id}.pks")
     df.to_pickle(pickle_path)
     print(f"Saved pickle at {pickle_path}")
 
@@ -25,7 +25,9 @@ def save_votes_of_session(id, path):
 
 for session in sessions50:
     print(f"Loading session {session['ID']}")
-    save_votes_of_session(session['ID'], path)
+    save_votes_of_session(session["ID"], path)
 
 # Combine to one dataframe
-df_voting50 = pd.concat([pd.read_pickle(os.path.join(path, x)) for x in os.listdir(path)])
+df_voting50 = pd.concat(
+    [pd.read_pickle(os.path.join(path, x)) for x in os.listdir(path)]
+)
diff --git a/examples/filter.py b/examples/filter.py
@@ -2,9 +2,7 @@
 from pprint import pprint
 
 subjects = spp.get_data(
-    table="SubjectBusiness",
-    BusinessShortNumber="05.057",
-    Language="DE"
+    table="SubjectBusiness", BusinessShortNumber="05.057", Language="DE"
 )
 
 print(f"Total rows: {len(subjects)}")

diff --git a/examples/filter_advanced.py b/examples/filter_advanced.py
@@ -3,13 +3,10 @@
 
 
 def name_filter(e):
-    return spp.filter.or_(
-        e.FirstName == 'Stefan',
-        e.LastName == 'Seiler'
-    )
+    return spp.filter.or_(e.FirstName == "Stefan", e.LastName == "Seiler")
 
 
-persons = spp.get_data("Person", filter=name_filter, Language='DE')
+persons = spp.get_data("Person", filter=name_filter, Language="DE")
 
 df = pd.DataFrame(persons)
-print(df[['FirstName', 'LastName']])
+print(df[["FirstName", "LastName"]])
diff --git a/examples/filter_query.py b/examples/filter_query.py
@@ -3,8 +3,8 @@
 
 persons = spp.get_data(
     table="Person",
-    filter="(FirstName eq 'Stefan' or LastName eq 'Seiler') and Language eq 'DE'"
+    filter="(FirstName eq 'Stefan' or LastName eq 'Seiler') and Language eq 'DE'",
 )
 
 df = pd.DataFrame(persons)
-print(df[['FirstName', 'LastName']])
+print(df[["FirstName", "LastName"]])
diff --git a/examples/list_all_tables_and_properties.py b/examples/list_all_tables_and_properties.py
@@ -6,5 +6,5 @@
 for table, props in overview.items():
     print(table)
     for prop in props:
-        print(f' + {prop}')
-    print('')
+        print(f" + {prop}")
+    print("")
diff --git a/examples/pagination.py b/examples/pagination.py
@@ -0,0 +1,23 @@
+import logging
+import swissparlpy
+from pprint import pprint
+
+
+# setup logger to see debug messages from swissparlpy
+spp_logger = logging.getLogger("swissparlpy.client")
+spp_logger.setLevel(logging.DEBUG)
+
+logging.basicConfig(
+    format="%(asctime)s  %(levelname)-10s %(name)s: %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logging.captureWarnings(True)
+
+
+business = swissparlpy.get_data("Business", Language="DE")
+print(f"Count: {business.count})")
+print(f"Internal data: {len(business.data)}")
+
+pprint(business)
+pprint(business[1])
+pprint(business[1001])
diff --git a/examples/slice.py b/examples/slice.py
@@ -1,15 +1,13 @@
 import swissparlpy as spp
 from pprint import pprint
 
-sessions = spp.get_data(
-    table="Session"
-)
+sessions = spp.get_data(table="Session")
 
 print(f"Total rows: {len(sessions)}")
 for session in sessions[5:10]:
     pprint(session)
 
 
 # print any element
-print('')
+print("")
 pprint(sessions[587])
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,7 +11,7 @@ classifiers = ["License :: OSI Approved :: MIT License"]
 dynamic = ["version", "description"]
 dependencies = [
     "requests",
-    "pyodata>=1.9.0",
+    "pyodata>=1.10.0",
 ]
 
 [project.optional-dependencies]
@@ -26,6 +26,8 @@ dev = [
     "flit",
     "jupyter",
     "pandas",
+    "black",
+    "pre-commit",
 ]