Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial Work preparing for CSV Parser. #201

Merged
merged 7 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions bicep/modules/blade_common.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,22 @@ module manifestDagShareUpload './script-share-upload/main.bicep' = {
}
}

// module csvDagShareUpload './script-share-upload/main.bicep' = {
// name: '${bladeConfig.sectionName}-storage-dag-upload-csv'
// params: {
// storageAccountName: configStorage.outputs.name
// location: location
// shareName: 'airflow-dags'
// filename: 'airflowdags'
// compress: true
// fileurl: 'https://community.opengroup.org/osdu/platform/data-flow/ingestion/csv-parser/csv-parser/-/archive/master/csv-parser-master.tar.gz'
// useExistingManagedIdentity: true
// managedIdentityName: deploymentScriptIdentity
// existingManagedIdentitySubId: subscription().subscriptionId
// existingManagedIdentityResourceGroupName:resourceGroup().name
// }
// }

/*
_______ .______ ___ .______ __ __
/ _____|| _ \ / \ | _ \ | | | |
Expand Down
24 changes: 18 additions & 6 deletions bicep/modules/script-share-upload/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ echo "Waiting on Identity RBAC replication (${initialDelay})"
sleep ${initialDelay}

# Installing required packages
apk add --no-cache curl
apk add --no-cache curl zip

# Derive the filename from the URL
url_basename=$(basename ${URL})
Expand All @@ -26,13 +26,25 @@ if [[ ${URL} == *.tar.gz ]]; then
tar -xzf ${url_basename} --strip-components=1 -C extracted_files

if [[ ${compress} == "True" ]]; then
echo "Creating tar.gz of contents of ${FILE} and uploading it compressed up to file share ${SHARE}"
echo "Creating zip of contents of ${FILE} and uploading it compressed up to file share ${SHARE}"
# Remove the original downloaded tar file
rm ${url_basename}
# Create a new tar file with the same name
tar -czf ${url_basename} -C extracted_files/${FILE} .
az storage file upload -s ${SHARE} --source ./${url_basename} -onone
echo "Tar.gz file ${url_basename} uploaded to file share ${SHARE}."
# Create a new zip file with the desired name
zip_filename="${url_basename%.tar.gz}.zip"

# Save the current working directory
original_dir=$(pwd)

# Navigate to the extracted_files/${FILE} directory
cd extracted_files/${FILE}

# Create the zip from the contents without including the extracted_files/${FILE} path itself
zip -r ${original_dir}/${zip_filename} *
# Navigate back to the original directory
cd ${original_dir}
# Upload the zip file to the file share
az storage file upload -s ${SHARE} --source ./${zip_filename} -onone
echo "Zip file ${zip_filename} uploaded to file share ${SHARE}."
else
# Batch upload the extracted files to the file share using the specified pattern
echo "Uploading extracted files to file share ${SHARE} with pattern ${FILE}/**"
Expand Down
116 changes: 50 additions & 66 deletions charts/osdu-developer-init/templates/workflow-init.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ spec:
secretKeyRef:
name: {{ .Values.clientSecret.name | quote }}
key: {{ .Values.clientSecret.key | quote }}
- name: WORKFLOWS
value: {{ .Values.workflows | toJson | quote }}
containers:
- name: sleep
image: istio/base
Expand All @@ -54,8 +56,9 @@ metadata:
namespace: {{ $namespace }}
data:
init.sh: |
#!/bin/sh
set -eu
#!/usr/bin/env sh
set -euo pipefail
set -o nounset

apk add --no-cache curl jq

Expand All @@ -82,77 +85,58 @@ data:

TOKEN=$(echo "$BODY" | jq .access_token | tr -d '"')

# First workflow
WORKFLOW_NAME="Osdu_ingest"
WORKFLOW_DESCRIPTION="Manifest Ingest workflow for OSDU"
# Log the WORKFLOWS variable to check its format
echo "WORKFLOWS: $WORKFLOWS"

echo "Registering workflow: $WORKFLOW_NAME"
OUTPUT=$(curl -s -w "%{http_code}" --request POST \
--url http://workflow.osdu-core/api/workflow/v1/workflow \
--header "Host: workflow.osdu-core" \
--header "accept: application/json" \
--header "content-type: application/json" \
--header "authorization: Bearer $TOKEN" \
--header "data-partition-id: ${PARTITION}" \
--data "{
\"workflowName\": \"$WORKFLOW_NAME\",
\"description\": \"$WORKFLOW_DESCRIPTION\",
\"registrationInstructions\": {
\"active\": true,
\"dagName\": \"$WORKFLOW_NAME\",
\"concurrentWorkflowRun\": 5,
\"concurrentTaskRun\": 5,
\"workflowDetailContent\": \"\",
\"etc\": \"autotest\"
}
}")

HTTP_STATUS_CODE=$(echo $OUTPUT | grep -oE '[0-9]{3}$')
BODY=${OUTPUT%???}

if [ "$HTTP_STATUS_CODE" = "200" ]; then
echo "Success: $(echo "$BODY" | jq .)"
else
echo "Error: Unexpected HTTP status code $HTTP_STATUS_CODE"
echo "Response body: $BODY"
# Ensure WORKFLOWS is properly formatted JSON
if ! echo "$WORKFLOWS" | jq empty; then
echo "Error: WORKFLOWS is not valid JSON"
exit 1
fi

# Second workflow
WORKFLOW_NAME="Osdu_ingest_by_reference"
WORKFLOW_DESCRIPTION="ManifestIngest by reference workflow for OSDU"
# Iterate over each workflow in the WORKFLOWS array
echo "$WORKFLOWS" | jq -c '.[]' | while read -r WORKFLOW; do
# Debugging: Log the current workflow being processed
echo "Processing workflow: $WORKFLOW"

echo "Registering workflow: $WORKFLOW_NAME"
OUTPUT=$(curl -s -w "%{http_code}" --request POST \
--url http://workflow.osdu-core/api/workflow/v1/workflow \
--header "Host: workflow.osdu-core" \
--header "accept: application/json" \
--header "content-type: application/json" \
--header "authorization: Bearer $TOKEN" \
--header "data-partition-id: ${PARTITION}" \
--data "{
\"workflowName\": \"$WORKFLOW_NAME\",
\"description\": \"$WORKFLOW_DESCRIPTION\",
\"registrationInstructions\": {
\"active\": true,
\"dagName\": \"$WORKFLOW_NAME\",
\"concurrentWorkflowRun\": 5,
\"concurrentTaskRun\": 5,
\"workflowDetailContent\": \"\",
\"etc\": \"autotest\"
}
}")
WORKFLOW_NAME=$(echo $WORKFLOW | jq -r '.name')
WORKFLOW_DESCRIPTION=$(echo $WORKFLOW | jq -r '.description')

HTTP_STATUS_CODE=$(echo $OUTPUT | grep -oE '[0-9]{3}$')
BODY=${OUTPUT%???}
echo "Registering workflow: $WORKFLOW_NAME"
OUTPUT=$(curl -s -w "%{http_code}" --request POST \
--url http://workflow.osdu-core/api/workflow/v1/workflow/system \
--header "Host: workflow.osdu-core" \
--header "accept: application/json" \
--header "content-type: application/json" \
--header "authorization: Bearer $TOKEN" \
--header "data-partition-id: ${PARTITION}" \
--data "{
\"workflowName\": \"$WORKFLOW_NAME\",
\"description\": \"$WORKFLOW_DESCRIPTION\",
\"registrationInstructions\": {
\"active\": true,
\"dagName\": \"$WORKFLOW_NAME\",
\"concurrentWorkflowRun\": 5,
\"concurrentTaskRun\": 5,
\"workflowDetailContent\": \"\",
\"etc\": \"autotest\"
}
}")

if [ "$HTTP_STATUS_CODE" = "200" ]; then
echo "Success: $(echo "$BODY" | jq .)"
else
echo "Error: Unexpected HTTP status code $HTTP_STATUS_CODE"
echo "Response body: $BODY"
exit 1
fi
HTTP_STATUS_CODE=$(echo $OUTPUT | grep -oE '[0-9]{3}$')
BODY=${OUTPUT%???}

if [ "$HTTP_STATUS_CODE" = "200" ]; then
echo "Info: Workflow created. HTTP status code $HTTP_STATUS_CODE"
elif [ "$HTTP_STATUS_CODE" = "409" ]; then
echo "Info: Workflow already exists. HTTP status code $HTTP_STATUS_CODE"
echo "Response body: $BODY"
else
echo "Error: Unexpected HTTP status code $HTTP_STATUS_CODE"
echo "Response body: $BODY"
exit 1
fi
done

exit 0
{{- end }}
7 changes: 7 additions & 0 deletions software/applications/osdu-core/workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,13 @@ spec:
installationType: osduCore
jobs:
workflowInit: true
workflows:
- name: "Osdu_ingest"
description: "Manifest Ingest workflow for OSDU"
- name: "Osdu_ingest_by_reference"
description: "Manifest Ingest by reference workflow for OSDU"
# - name: 'csv-parser'
# description: 'CSV Parser workflow for OSDU'
partition: opendes
clientSecret:
name: active-directory
Expand Down