Skip to content

Commit

Permalink
Merge pull request #61 from oslokommune/streamline
Browse files Browse the repository at this point in the history
Bring up to date
  • Loading branch information
simenheg authored Jun 6, 2024
2 parents caa34e7 + 481ce48 commit 1be8e03
Show file tree
Hide file tree
Showing 23 changed files with 8,580 additions and 4,276 deletions.
30 changes: 17 additions & 13 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@ name: "CodeQL"

on:
push:
branches: [ master ]
branches: [main]
paths-ignore:
- '**/*.md'
pull_request:
branches: [ master ]
branches: [main]
paths-ignore:
- '**/*.md'
schedule:
- cron: '0 6 * * 1'
- cron: "0 6 * * 1"
workflow_dispatch:

jobs:
Expand All @@ -23,17 +27,17 @@ jobs:
strategy:
fail-fast: false
matrix:
language: [ 'python' ]
language: ["python"]

steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Checkout repository
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1

- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
queries: +security-and-quality
- name: Initialize CodeQL
uses: github/codeql-action/init@d13ca047ae14d02299ae648f2ba451622ecb7270 # v3.23.1
with:
languages: ${{ matrix.language }}
queries: +security-and-quality

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@d13ca047ae14d02299ae648f2ba451622ecb7270 # v3.23.1
41 changes: 22 additions & 19 deletions .github/workflows/deploy-dev.yml
Original file line number Diff line number Diff line change
@@ -1,30 +1,33 @@
name: Deploy Dev

on:
push:
branches:
- main
paths-ignore:
- '**/*.md'
workflow_dispatch:

jobs:
deploy-dev:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install Python dependencies
run: pip install tox
- name: Test with tox
run: tox -e py,flake8,black
- uses: actions/setup-node@v1
with:
node-version: '12.x'
- name: Install NPM dependencies
run: npm install
- name: Deploy Lambda functions
run: |
AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_DEV }} \
AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY_DEV }} \
./node_modules/.bin/sls deploy -s dev
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Set up Python 3.11
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
with:
python-version: '3.11'
- name: Install Python dependencies
run: pip install tox
- name: Test with tox
run: tox -e py,flake8,black
- uses: actions/setup-node@b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8 # v4.0.1
with:
node-version: '18.x'
- name: Install NPM dependencies
run: npm install
- name: Deploy Lambda functions
run: |
AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_DEV }} \
AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY_DEV }} \
./node_modules/.bin/sls deploy -s dev
39 changes: 20 additions & 19 deletions .github/workflows/deploy-prod.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
name: Deploy Prod

on:
push:
branches:
Expand All @@ -9,22 +10,22 @@ jobs:
deploy-prod:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install Python dependencies
run: pip install tox
- name: Test with tox
run: tox -e py,flake8,black
- uses: actions/setup-node@v1
with:
node-version: '12.x'
- name: Install NPM dependencies
run: npm install
- name: Deploy Lambda functions
run: |
AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_PROD }} \
AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY_PROD }} \
./node_modules/.bin/sls deploy -s prod
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Set up Python 3.11
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
with:
python-version: '3.11'
- name: Install Python dependencies
run: pip install tox
- name: Test with tox
run: tox -e py,flake8,black
- uses: actions/setup-node@b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8 # v4.0.1
with:
node-version: '18.x'
- name: Install NPM dependencies
run: npm install
- name: Deploy Lambda functions
run: |
AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_PROD }} \
AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY_PROD }} \
./node_modules/.bin/sls deploy -s prod
7 changes: 4 additions & 3 deletions .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
name: "Pull Request Labeler"

on:
pull_request:

Expand All @@ -13,6 +14,6 @@ jobs:
pull-requests: write

steps:
- uses: actions/labeler@v3
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
- uses: actions/labeler@ac9175f8a1f3625fd0d4fb234536d26811351594 # v4.3.0
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
21 changes: 12 additions & 9 deletions .github/workflows/pr-tests.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
name: PR Tests

on:
pull_request:
paths-ignore:
- '**/*.md'

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install dependencies
run: pip install tox
- name: Test with tox
run: tox -e py,flake8,black
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Set up Python 3.11
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
with:
python-version: '3.11'
- name: Install dependencies
run: pip install tox
- name: Test with tox
run: tox -e py,flake8,black
Empty file added bydelsfakta_api/__init__.py
Empty file.
6 changes: 6 additions & 0 deletions bydelsfakta_api/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class IllegalFormatError(Exception):
pass


class S3FileNotFoundError(Exception):
pass
95 changes: 95 additions & 0 deletions bydelsfakta_api/handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import json
import logging
import os

import requests
from aws_xray_sdk.core import patch, xray_recorder

from bydelsfakta_api.service import get_latest_edition, get_latest_version, get_objects
from bydelsfakta_api.exceptions import IllegalFormatError, S3FileNotFoundError

patch(["requests"])

metadata_api = os.environ["METADATA_API_URL"]


logger = logging.getLogger()
logger.setLevel(logging.INFO)

CONFIDENTIALITY_MAP = {
"public": "green",
"restricted": "yellow",
"non-public": "red",
}


def handler(event, context):
if (
event["requestContext"]["authorizer"]["principalId"]
!= "service-account-bydelsfakta-frontend"
):
return {
"statusCode": 403,
"body": "Forbidden: Only the Bydelsfakta frontend is allowed to use this API",
}
return _handle_event(event)


@xray_recorder.capture("handle_event")
def _handle_event(event):
dataset_id = event["pathParameters"]["dataset"]
logger.info(f"Fetching Bydelsfakta data for {dataset_id}")

dataset_response = requests.get(f"{metadata_api}/datasets/{dataset_id}")
if dataset_response.status_code == 404:
return _response(404, f"No dataset with id {dataset_id}")

dataset = json.loads(dataset_response.text)
stage = dataset.get("processing_stage", "processed")
confidentiality = (
CONFIDENTIALITY_MAP[dataset["accessRights"]]
if "accessRights" in dataset
else dataset["confidentiality"]
)
parent_id = dataset.get("parent_id", None)

query = []
if event["queryStringParameters"] and "geography" in event["queryStringParameters"]:
query = event["queryStringParameters"]["geography"]

try:
version = get_latest_version(dataset_id)
except IllegalFormatError:
return _response(400, "One or more versions have illegal format")

try:
edition = get_latest_edition(dataset_id, version)
except IllegalFormatError:
return _response(400, "One or more editions have illegal format")

edition_id = edition["Id"].split("/")[-1]

base_key = "/".join(
[
stage,
confidentiality,
*([parent_id] if parent_id else []),
dataset_id,
f"version={version}",
f"edition={edition_id}",
"",
]
)

try:
return _response(200, get_objects(base_key, query))
except S3FileNotFoundError as e:
return _response(422, str(e))


def _response(status, body):
return {
"statusCode": status,
"headers": {"Content-Type": "application/json"},
"body": json.dumps(body, ensure_ascii=False),
}
81 changes: 81 additions & 0 deletions bydelsfakta_api/service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import json
import logging
import os
import re

import boto3
import botocore
import requests
from aws_xray_sdk.core import patch, xray_recorder

from bydelsfakta_api.exceptions import IllegalFormatError, S3FileNotFoundError

patch(["boto3"])
patch(["requests"])

metadata_api = os.environ["METADATA_API_URL"]

logger = logging.getLogger()
logger.setLevel(logging.INFO)

session = boto3.Session()
s3 = session.client("s3")

bucket = "ok-origo-dataplatform-{}".format(os.environ["STAGE"])


@xray_recorder.capture("get_objects")
def get_objects(base_key, query):
logger.info(f"Fetching data from {base_key}")

if not query:
objs = s3.list_objects_v2(Bucket=bucket, Prefix=base_key)["Contents"]
keys = [obj["Key"] for obj in objs]
else:
pattern = re.compile(r"(\d\d)")
numbers = pattern.findall(query)
keys = [f"{base_key}{geography}.json" for geography in numbers]

if not keys:
raise S3FileNotFoundError(
"Even though an edition exists, no files were found for the dataset"
)

objects = []
for key in keys:
try:
obj = s3.get_object(Bucket=bucket, Key=key)["Body"].read().decode("utf-8")
except botocore.exceptions.ClientError as e:
if e.response.get("Error", {}).get("Code") == "NoSuchKey":
raise S3FileNotFoundError(f"File {key} could not be found")
raise
objects.append(json.loads(obj))

return objects


@xray_recorder.capture("get_latest_version")
def get_latest_version(dataset_id):
all_versions = requests.get(f"{metadata_api}/datasets/{dataset_id}/versions")
all_versions = json.loads(all_versions.text)
if not all(["Id" in version for version in all_versions]):
logger.info("Versions with bad format was found:")
logger.info([version for version in all_versions if "Id" not in version])
raise IllegalFormatError("Wrong format")
latest_version = max(
all_versions, key=lambda x: x["version"] if "version" in x else -1
)
return latest_version["version"]


@xray_recorder.capture("get_latest_edition")
def get_latest_edition(dataset_id, version):
all_editions = requests.get(
f"{metadata_api}/datasets/{dataset_id}/versions/{version}/editions"
)
all_editions = json.loads(all_editions.text)
if not all(["Id" in edition for edition in all_editions]):
logger.info("Editions with bad format was found:")
logger.info([edition for edition in all_editions if "Id" not in edition])
raise IllegalFormatError("Wrong format")
return max(all_editions, key=lambda x: x["Id"] if "Id" in x else -1)
Loading

0 comments on commit 1be8e03

Please sign in to comment.