Skip to content

Commit

Permalink
Merge pull request #14 from mjanez/latest
Browse files Browse the repository at this point in the history
Fix cronjobs
  • Loading branch information
mjanez authored Apr 22, 2023
2 parents fc4ee51 + 2c48eef commit 6266abc
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 45 deletions.
11 changes: 6 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ ENV PYCSW_CRON_DAYS_INTERVAL=2

RUN apt-get -q -y update && \
apt-get install -y wget && \
DEBIAN_FRONTEND=noninteractive apt-get -yq install gettext-base
ADD https://raw.githubusercontent.com/eficode/wait-for/v2.2.3/wait-for /wait-for
RUN chmod +x /wait-for && \
DEBIAN_FRONTEND=noninteractive apt-get -yq install gettext-base && \
wget -O /wait-for https://raw.githubusercontent.com/eficode/wait-for/v2.2.3/wait-for && \
chmod +x /wait-for && \
python3 -m pip install pdm

WORKDIR ${APP_DIR}
Expand All @@ -28,6 +28,7 @@ RUN pdm install --no-self --group prod
COPY pycsw/conf/pycsw.conf.template pycsw/entrypoint.sh .
COPY ckan2pycsw ckan2pycsw

EXPOSE 8080/TCP
EXPOSE ${PYCSW_PORT}/TCP

ENTRYPOINT ["/bin/bash", "./entrypoint.sh"]
CMD ["pdm", "run", "python3", "-m", "gunicorn", "pycsw.wsgi:application", "-b", "0.0.0.0:${PYCSW_PORT}"]
CMD ["tail", "-f", "/dev/null"]
10 changes: 5 additions & 5 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ ENV DEV_MODE=True
ENV TIMEOUT=300

RUN apt-get -q -y update && \
apt-get install -y wget && \
DEBIAN_FRONTEND=noninteractive apt-get -yq install gettext-base
ADD https://raw.githubusercontent.com/eficode/wait-for/v2.2.3/wait-for /wait-for
RUN chmod +x /wait-for && \
apt-get install -y wget procps && \
DEBIAN_FRONTEND=noninteractive apt-get -yq install gettext-base && \
wget -O /wait-for https://raw.githubusercontent.com/eficode/wait-for/v2.2.3/wait-for && \
chmod +x /wait-for && \
python3 -m pip install pdm debugpy

WORKDIR ${APP_DIR}
Expand All @@ -27,7 +27,7 @@ RUN pdm install --no-self --group prod
COPY pycsw/conf/pycsw.conf.template pycsw/entrypoint_dev.sh .
COPY ckan2pycsw ckan2pycsw

EXPOSE 8080/TCP
EXPOSE ${PYCSW_PORT}/TCP
EXPOSE 5678/TCP

# Set entrypoint with debugpy
Expand Down
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ docker compose up -d --build
```

>**Note**:<br>
> Deploy the dev (local build) `docker compose_dev.yaml` with:
> Deploy the dev (local build) `docker-compose.dev.yml` with:
>
>```bash
> docker compose -f docker compose_dev.yml up --build
> docker compose -f docker-compose.dev.yml up --build
>```
Expand Down Expand Up @@ -205,14 +205,17 @@ New metadata schemas can be extended or added to convert elements extracted from
```

## Test
Perform a Perform a GetRecords request:
Perform a `GetRecords` request and return all:

{PYCSW_URL}?request=GetRecords&service=CSW&version=2.0.2&typeNames=gmd:MD_Metadata&outputSchema=http://www.isotc211.org/2005/gmd&elementSetName=full
{PYCSW_URL}?request=GetRecords&service=CSW&version=3.0.0&typeNames=gmd:MD_Metadata&outputSchema=http://www.isotc211.org/2005/gmd&elementSetName=full


- The `ckan-pycsw` logs will be created in the [`/log`](/log/) folder.
- Metadata records in `XML` format ([ISO 19139](https://www.iso.org/standard/67253.html)) are stored in the [`/metadata`](/metadata/) folder.

>**Note**
> The `GetRecords` operation allows clients to discover resources (datasets). The response is an `XML` document and the output schema can be specified.

## Debug
### VSCode
1. Build and run container.
Expand Down
38 changes: 29 additions & 9 deletions ckan2pycsw/ckan2pycsw.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
from urllib.parse import urljoin
import os
from datetime import datetime, timedelta
import subprocess
import time

# third-party libraries
import psutil
import requests
import pycsw.core.config
from pycsw.core import admin, metadata, repository, util
Expand All @@ -29,10 +32,11 @@
try:
PYCSW_CRON_DAYS_INTERVAL = int(os.environ["PYCSW_CRON_DAYS_INTERVAL"])
except (KeyError, ValueError):
PYCSW_CRON_DAYS_INTERVAL = 2
PYCSW_CRON_DAYS_INTERVAL = 3
method = "nightly"
URL = os.environ["CKAN_URL"]
PYCSW_URL = os.environ["PYCSW_URL"]
PYCSW_PORT = os.environ["PYCSW_PORT"]
APP_DIR = os.environ.get("APP_DIR", "/app")
CKAN_API = "api/3/action/package_search"
PYCSW_CKAN_SCHEMA = os.environ.get("PYCSW_CKAN_SCHEMA", "iso19139_geodcatap")
Expand Down Expand Up @@ -131,13 +135,31 @@ def main():
xml_dirpath=APP_DIR + "/metadata/")


def run_at_specific_time(start_date):
def run_scheduler():
scheduler = BlockingScheduler(timezone=TZ)
scheduler.add_job(main, "interval", hours=PYCSW_CRON_DAYS_INTERVAL, start_date=start_date)
scheduler.add_job(run_tasks, "interval", days=PYCSW_CRON_DAYS_INTERVAL, next_run_time=datetime.now())
scheduler.start()
cron_datetime = datetime.now().strftime("%Y-%m-%d %H:%M")
print(f"{log_module}:ckan2pycsw | Schedule App: {cron_datetime}")
logging.info(f"{log_module}:ckan2pycsw | Schedule App: {cron_datetime}")

def run_tasks():
"""
Check if gunicorn is running. Kill any gunicorn process with "gunicorn" or "pycsw.wsgi:application" in its name or command line.
Execute the main function. Restart gunicorn after the main function finishes.
"""
log_file(APP_DIR + "/log")
for proc in psutil.process_iter(["pid", "name", "cmdline"]):
if "gunicorn" in proc.info["name"] or "pycsw.wsgi:application" in ' '.join(proc.info["cmdline"]):
print(f"Stopping gunicorn process with PID {proc.info['pid']}...")
proc.kill()
time.sleep(5) # Wait for the gunicorn process to fully stop

# Execute the main function
main()

# Restart gunicorn after the main function finishes
try:
subprocess.Popen(["pdm", "run", "python3", "-m", "gunicorn", "pycsw.wsgi:application", "-b", f"0.0.0.0:{PYCSW_PORT}"])
except Exception as e:
logging.error(f"{log_module}:ckan2pycsw | Error starting gunicorn: {e}")

if __name__ == "__main__":
if DEV_MODE == True or DEV_MODE == "True":
Expand All @@ -148,7 +170,5 @@ def run_at_specific_time(start_date):
ptvsd.wait_for_attach()
main()
else:
start_date = datetime.now()
main()
run_at_specific_time(start_date)
run_scheduler()

20 changes: 8 additions & 12 deletions docker-compose_dev.yml → docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,15 @@ services:
dockerfile: Dockerfile.dev
env_file:
- .env
healthcheck:
test:
- CMD
- wget
- -qO
- /dev/null
- ${PYCSW_URL}
networks:
default: null
logging:
driver: "json-file"
options:
max-size: "100m"
max-file: "10"
ports:
- "0.0.0.0:${PYCSW_PORT}:${PYCSW_PORT}"
# debugpy
- "5678:5678"
- "0.0.0.0:${PYCSW_PORT}:${PYCSW_PORT}"
# debugpy
- "5678:5678"
volumes:
- ./log:${APP_DIR}/log
- ./metadata:${APP_DIR}/metadata
Expand Down
17 changes: 8 additions & 9 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,19 @@ services:
image: ghcr.io/mjanez/ckan-pycsw:latest
env_file:
- .env
healthcheck:
test:
- CMD
- wget
- -qO
- /dev/null
- ${PYCSW_URL}
networks:
default: null
logging:
driver: "json-file"
options:
max-size: "100m"
max-file: "10"
ports:
- "0.0.0.0:${PYCSW_PORT}:${PYCSW_PORT}"
volumes:
- ./log:${APP_DIR}/log
- ./metadata:${APP_DIR}/metadata
restart: on-failure:3
healthcheck:
test: ["CMD", "wget", "-qO", "/dev/null", "http://localhost:${PYCSW_PORT}"]
networks:
default:
name: ckan2pycsw_default
24 changes: 23 additions & 1 deletion pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies = [
"debugpy>=1.6.6",
"simplejson>=3.19.1",
"apscheduler>=3.10.1",
"psutil>=5.9.5",
]
requires-python = ">=3.8"
license = {text = "GPLv3"}
Expand Down

0 comments on commit 6266abc

Please sign in to comment.