Skip to content

Commit

Permalink
Refactor to better support usage as module (#61)
Browse files Browse the repository at this point in the history
* Refactor to better support usage as module

* Bump version

* Ensure only 1 connection during lambda execution

* Fix typing

* Rework to lazy-app to make for easier testing

* Update path

* Cleanup helper for flexibility

* Use context manager to close connections

* Add tests

* Pre-commit

* Rework settings

* Simplify db setup

* Add missing quote and change steps to load data

* Fix settings import

* Update for fast reloading and how to run API locally

* Update README.md

Co-authored-by: Anthony Lukach <anthonylukach@gmail.com>

---------

Co-authored-by: Zachary Deziel <zachary.deziel@gmail.com>
  • Loading branch information
alukach and zacdezgeo authored Sep 20, 2024
1 parent f2fe8e2 commit 085a7fa
Show file tree
Hide file tree
Showing 19 changed files with 355 additions and 241 deletions.
62 changes: 57 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
# Space2Stats

Consistent, comparable, authoritative data describing sub-national variation is a constant point of complication for World Bank teams, our development partners, and client countries when assessing and investigating economic issues and national policy. This project will focus on creating and disseminating such data through aggregation of geospatial information at standard administrative divisions, and through the attribution of household survey data with foundational geospatial variables.
Consistent, comparable, authoritative data describing sub-national variation is a constant point of complication for World Bank teams, our development partners, and client countries when assessing and investigating economic issues and national policy. This project will focus on creating and disseminating such data through aggregation of geospatial information at standard administrative divisions, and through the attribution of household survey data with foundational geospatial variables.

## Getting Started Locally

- Setup the database:
- Setup the database:

```
docker-compose up -d
```

- Create a `db.env` file:

```.env
PGHOST=localhost
PGPORT=5439
Expand All @@ -20,15 +22,65 @@ PGTABLENAME=space2stats
```

- Load our dataset into the database

```
./postgres/download_parquet.sh
python postgres/chunk_parquet.py
./postgres/load_parquet_chunks.sh
./load_to_prod.sh
```

> You can get started with a subset of data for NYC with `./load_nyc_sample.sh` which requires changing your `db.env` value for `PGTABLENAME` to `space2stats_nyc_sample`.
- Access your data using the Space2statS API! See the [example notebook](notebooks/space2stats_api_demo.ipynb).
- Access your data using the Space2stats API! See the [example notebook](notebooks/space2stats_api_demo.ipynb).

## Usage as an API

The API can be run with:

```
python -m space2stats
```

## Usage as a module

The module can be installed via `pip` directly from Github:

```
pip install "git+https://github.com/worldbank/DECAT_Space2Stats.git#subdirectory=space2stats_api/src"
```

It can then be used within Python as such:

```py
from space2stats import StatsTable

with StatsTable.connect() as stats_table:
...
```

Connection parameters may be explicitely provided. Otherwise, connection parameters will expected to be available via standard [PostgreSQL Environment Variables](https://www.postgresql.org/docs/current/libpq-envars.html#LIBPQ-ENVARS).

```py
from space2stats import StatsTable

with StatsTable.connect(
PGHOST="localhost",
PGPORT="5432",
PGUSER="postgres",
PGPASSWORD="changeme",
PGDATABASE="postgis",
PGTABLENAME="space2stats",
) as stats_table:
...

# alternatively:
# settings = Settings(
# PGHOST="localhost",
# PGPORT="5432",
# PGUSER="postgres",
# PGPASSWORD="changeme",
# PGDATABASE="postgis",
# PGTABLENAME="space2stats",
# )
# with StatsTable.connect(settings):
# ...
```
2 changes: 1 addition & 1 deletion space2stats_api/cdk/aws_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
"Space2StatsFunction",
entry="../src",
runtime=_lambda.Runtime.PYTHON_3_11,
index="space2stats/handler.py",
index="space2stats/api/handler.py",
timeout=Duration.seconds(120),
handler="handler",
environment={
Expand Down
6 changes: 5 additions & 1 deletion space2stats_api/src/space2stats/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""space2stats."""

__version__ = "0.1.0"
from .lib import StatsTable
from .settings import Settings

__all__ = ["StatsTable", "Settings"]
__version__ = "1.0.0"
8 changes: 4 additions & 4 deletions space2stats_api/src/space2stats/__main__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from .app import app

try:
import uvicorn # noqa

Expand All @@ -15,9 +13,11 @@
), "uvicorn must be installed: `python -m pip install 'space2stats[server]'`"

uvicorn.run(
app=app,
app="space2stats.api.app:build_app",
host=os.getenv("UVICORN_HOST", "127.0.0.1"),
port=os.getenv("UVICORN_PORT", "8000"),
port=int(os.getenv("UVICORN_PORT", "8000")),
root_path=os.getenv("UVICORN_ROOT_PATH", ""),
log_level="info",
factory=True,
reload=True,
)
3 changes: 3 additions & 0 deletions space2stats_api/src/space2stats/api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .app import build_app

__all__ = ["build_app"]
77 changes: 77 additions & 0 deletions space2stats_api/src/space2stats/api/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from contextlib import asynccontextmanager
from typing import Any, Dict, List, Optional

import boto3
from asgi_s3_response_middleware import S3ResponseMiddleware
from fastapi import Depends, FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import ORJSONResponse
from starlette.requests import Request
from starlette_cramjam.middleware import CompressionMiddleware

from ..lib import StatsTable
from .db import close_db_connection, connect_to_db
from .errors import add_exception_handlers
from .schemas import SummaryRequest
from .settings import Settings

s3_client = boto3.client("s3")


def build_app(settings: Optional[Settings] = None) -> FastAPI:
settings = settings or Settings()

@asynccontextmanager
async def lifespan(app: FastAPI):
await connect_to_db(app, settings=settings)
yield
await close_db_connection(app)

app = FastAPI(
default_response_class=ORJSONResponse,
lifespan=lifespan,
)

app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.add_middleware(CompressionMiddleware)
app.add_middleware(
S3ResponseMiddleware,
s3_bucket_name=settings.S3_BUCKET_NAME,
s3_client=s3_client,
)

add_exception_handlers(app)

def stats_table(request: Request):
"""Dependency to generate a per-request connection to stats table"""
with request.app.state.pool.connection() as conn:
yield StatsTable(conn=conn, table_name=settings.PGTABLENAME)

@app.post("/summary", response_model=List[Dict[str, Any]])
def get_summary(body: SummaryRequest, table: StatsTable = Depends(stats_table)):
return table.summaries(
body.aoi,
body.spatial_join_method,
body.fields,
body.geometry,
)

@app.get("/fields", response_model=List[str])
def fields(table: StatsTable = Depends(stats_table)):
return table.fields()

@app.get("/")
def read_root():
return {"message": "Welcome to Space2Stats!"}

@app.get("/health")
def health():
return {"status": "ok"}

return app
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,10 @@

async def connect_to_db(
app: FastAPI,
settings: Optional[Settings] = None,
settings: Settings,
pool_kwargs: Optional[Dict[str, Any]] = None,
) -> None:
"""Connect to Database."""
if not settings:
settings = Settings()

pool_kwargs = pool_kwargs or {}

app.state.pool = ConnectionPool(
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@

from mangum import Mangum

from .app import app
from .app import build_app
from .db import connect_to_db
from .settings import Settings

settings = Settings(DB_MAX_CONN_SIZE=1) # disable connection pooling
app = build_app(settings)


@app.on_event("startup")
async def startup_event() -> None:
"""Connect to database on startup."""
await connect_to_db(app)
await connect_to_db(app, settings=settings)


handler = Mangum(app, lifespan="off")
Expand Down
12 changes: 12 additions & 0 deletions space2stats_api/src/space2stats/api/schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from typing import List, Literal, Optional

from pydantic import BaseModel

from ..types import AoiModel


class SummaryRequest(BaseModel):
aoi: AoiModel
spatial_join_method: Literal["touches", "centroid", "within"]
fields: List[str]
geometry: Optional[Literal["polygon", "point"]] = None
6 changes: 6 additions & 0 deletions space2stats_api/src/space2stats/api/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from ..settings import Settings as DbSettings


class Settings(DbSettings):
# Bucket for large responses
S3_BUCKET_NAME: str
78 changes: 0 additions & 78 deletions space2stats_api/src/space2stats/app.py

This file was deleted.

Loading

0 comments on commit 085a7fa

Please sign in to comment.