Skip to content

Commit

Permalink
Merge pull request #31 from beeldengeluid/fix-integration-test
Browse files Browse the repository at this point in the history
Fix integration test
  • Loading branch information
Veldhoen authored Apr 29, 2024
2 parents 41b72a9 + d8faea0 commit bee53d1
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 44 deletions.
16 changes: 5 additions & 11 deletions .github/workflows/_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,16 @@ jobs:
- name: "Install dev environment"
run: poetry install --no-interaction --no-ansi

- name: install libgl1
run: sudo apt-get install -y libgl1

- name: install ffmpeg
run: sudo apt-get update && sudo apt-get install -y ffmpeg --fix-missing

- name: "pytest"
run: |
cp config/config.yml config.yml
cp config/config-test.yml config.yml
poetry run pytest
- name: "flake8"
run: "poetry run flake8"
#- name: "flake8"
#run: "poetry run flake8"

- name: "black"
run: "poetry run black --check ."

- name: "mypy"
run: "poetry run mypy ."
#- name: "mypy"
#run: "poetry run mypy ."
29 changes: 0 additions & 29 deletions .github/workflows/test.yml

This file was deleted.

47 changes: 47 additions & 0 deletions config/config-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Important for understanding DANE configs: https://github.com/CLARIAH/DANE/blob/main/DANE/config.py
# To read more about the configuration: https://github.com/beeldengeluid/dane-example-worker/wiki/Config

# Important note:
# FIRST the home dir config is applied (~/.DANE/config.yml),
# THEN the base_config.yml will overwrite anything,
# THEN the local config.yml

# Note: For local testing, copy this file to config.yml (in main dir of this repo)
# Or export DANE_HOME=./config to point DANE to this file

RABBITMQ:
HOST: dane-rabbitmq-api.default.svc.cluster.local
PORT: 5672
EXCHANGE: DANE-exchange
RESPONSE_QUEUE: DANE-response-queue
USER: guest # change this for production mode
PASSWORD: guest # change this for production mode
ELASTICSEARCH:
HOST:
- elasticsearch
PORT: 9200
USER: '' # change this for production mode
PASSWORD: '' # change this for production mode
SCHEME: http
INDEX: dane-index-k8s
FILE_SYSTEM:
BASE_MOUNT: data # data when running locally, /data when running in container
INPUT_DIR: input-files
OUTPUT_DIR: output-files
INPUT:
TEST_INPUT_PATH: testsource__testcarrier/inputfile.txt
S3_ENDPOINT_URL: https://s3-host
S3_BUCKET: example-input
S3_FOLDER_IN_BUCKET: assets # folder within the bucketMODEL: s3://bucket/model
S3_BUCKET_MODEL: example-model
DELETE_ON_COMPLETION: True
OUTPUT:
DELETE_ON_COMPLETION: True
TRANSFER_ON_COMPLETION: True
S3_ENDPOINT_URL: https://s3-host
S3_BUCKET: bucket-name # bucket reserved for 1 type of output
S3_FOLDER_IN_BUCKET: folder # folder within the bucket
WORKER_SETTINGS:
SETTING_0: foo
DANE_DEPENDENCIES:
- input-generating-worker
4 changes: 3 additions & 1 deletion io_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,8 @@ def obtain_input_file(s3_uri: str) -> ThisWorkerInput:
)
success = s3.download_file(bucket, object_name, output_folder)
if success:
# TODO uncompress the <input_base>.tar.gz
if input_file_path.find(".tar.gz") != -1:
input_file_path = untar_input_file(input_file_path)

provenance = Provenance(
activity_name="download",
Expand Down Expand Up @@ -333,3 +334,4 @@ def untar_input_file(tar_file_path: str):
path = str(Path(tar_file_path).parent)
with tarfile.open(tar_file_path) as tar:
tar.extractall(path=path, filter="data") # type: ignore
return path
7 changes: 6 additions & 1 deletion main_data_processor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
from typing import Tuple, Optional
import time
import os
from dane.config import cfg
from dane.s3_util import validate_s3_uri
from io_util import (
Expand Down Expand Up @@ -126,7 +127,11 @@ def apply_model(
) -> ThisWorkerOutput:
logger.info("Starting model application")
start = time.time() * 1000 # convert to ms
with open(feature_extraction_input.input_file_path, "r") as f:
file_to_read = os.path.join(
feature_extraction_input.input_file_path,
feature_extraction_input.source_id + ".input",
)
with open(file_to_read, "r") as f:
cnt = len(f.readline().split())
destination = get_output_file_path(
feature_extraction_input.source_id, OutputType.FOOBAR
Expand Down
5 changes: 3 additions & 2 deletions tests/integration/S3_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ def create_and_fill_buckets(aws, create_sample_input):
cfg.INPUT.S3_BUCKET_MODEL,
]:
client.create_bucket(Bucket=bucket)
client.put_object(
Body=fn_tar_in,

client.upload_file(
Filename=fn_tar_in,
Bucket=cfg.INPUT.S3_BUCKET,
Key=key_in,
)
Expand Down

0 comments on commit bee53d1

Please sign in to comment.