Skip to content

Commit

Permalink
Changed CSV stuff.
Browse files Browse the repository at this point in the history
  • Loading branch information
danielscholl committed Sep 24, 2024
1 parent 68ca924 commit 061f407
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 29 deletions.
2 changes: 1 addition & 1 deletion bicep/modules/blade_common.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ module csvDagShareUpload './script-share-csvdag/main.bicep' = {
location: location
shareName: 'airflow-dags'
filename: 'airflowdags'
fileurl: 'https://raw.githubusercontent.com/Azure/osdu-developer/refs/heads/csv/dags/csv_parser.zip'
fileurl: 'https://community.opengroup.org/osdu/platform/data-flow/ingestion/csv-parser/csv-parser/-/archive/master/csv-parser-master.tar.gz'
useExistingManagedIdentity: true
managedIdentityName: deploymentScriptIdentity
existingManagedIdentitySubId: subscription().subscriptionId
Expand Down
15 changes: 14 additions & 1 deletion bicep/modules/script-share-csvdag/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ param shareName string = 'sample-share'
param filename string = 'sample.json'

@description('Name of the file as it is stored in the share')
param fileurl string = 'https://raw.githubusercontent.com/Azure/osdu-developer/refs/heads/main/dags/csv_parser.zip'
param fileurl string = 'https://community.opengroup.org/osdu/platform/data-flow/ingestion/csv-parser/csv-parser/-/archive/master/csv-parser-master.tar.gz'

@description('The location of the Storage Account and where to deploy the module resources to')
param location string = resourceGroup().location
Expand Down Expand Up @@ -69,6 +69,18 @@ resource rbac 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (!empty(
}
}

var searchAndReplace = [
{
find: '{| DAG_NAME |}'
replace: 'csv-parser'
}
{
find: '{| DOCKER_IMAGE |}'
replace: 'msosdu.azurecr.io/csv-parser-msi:v5'
}

]

resource uploadFile 'Microsoft.Resources/deploymentScripts@2023-08-01' = {
name: 'script-${storageAccount.name}-${replace(replace(filename, ':', ''), '/', '-')}'
location: location
Expand All @@ -90,6 +102,7 @@ resource uploadFile 'Microsoft.Resources/deploymentScripts@2023-08-01' = {
{ name: 'URL', value: fileurl }
{ name: 'SHARE', value: shareName }
{ name: 'initialDelay', value: initialScriptDelay }
{ name: 'SEARCH_AND_REPLACE', value: string(searchAndReplace) }
]
scriptContent: loadTextContent('script.sh')
cleanupPreference: cleanupPreference
Expand Down
102 changes: 76 additions & 26 deletions bicep/modules/script-share-csvdag/script.sh
Original file line number Diff line number Diff line change
@@ -1,36 +1,86 @@
#!/bin/bash
set -e

# This script performs the following tasks:
# 1. Waits for Identity RBAC replication.
# 2. Installs required packages.
# 3. Downloads a tar.gz file from a specified URL and extracts its contents.
# 4. Processes a specific Python file within the extracted contents, performing complex find/replace operations based on a provided JSON configuration.
# 5. Compresses the DAG and uploads it to a file share.
#
# The pattern of the SEARCH_AND_REPLACE variable is as follows:
# [
# {
# "find": "{| DAG_NAME |}",
# "replace": "csv-parser"
# },
# {
# "find": "{| DOCKER_IMAGE |}",
# "replace": "msosdu.azurecr.io/csv-parser-msi:v5"
# }
# ]
#
# The SEARCH_AND_REPLACE variable is required for the script to perform the find/replace operations.



echo "Waiting on Identity RBAC replication (${initialDelay})"
sleep "${initialDelay}"

# Installing required packages
apk add --no-cache curl zip

# Download and extract the file
url_basename=$(basename "${URL}")
echo "Downloading and extracting file from ${URL}"
curl -sL "${URL}" -o temp.zip && unzip temp.zip -d extracted_files && rm temp.zip

# Process csv-parser.py file if it exists
if [ -f "extracted_files/csv-parser.py" ]; then
echo "Processing csv-parser.py file"
sed -i \
-e "s/__KEYVAULT_URI__/${KEYVAULT_URI}/g" \
-e "s/__APPINSIGHTS_KEY__/${APPINSIGHTS_KEY}/g" \
-e "s/__AZURE_ENABLE_MSI__/${AZURE_ENABLE_MSI}/g" \
-e "s/__AZURE_TENANT_ID__/${AZURE_TENANT_ID}/g" \
-e "s/__AZURE_CLIENT_ID__/${AZURE_CLIENT_ID}/g" \
-e "s/__AZURE_CLIENT_SECRET__/${AZURE_CLIENT_SECRET}/g" \
-e "s/__AAD_CLIENT_ID__/${AAD_CLIENT_ID}/g" \
extracted_files/csv-parser.py
# Download the source code and extract it.
url_basename=$(basename ${URL})
echo "Derived filename from URL: ${url_basename}"
echo "Downloading file from ${URL} to ${url_basename}"
curl -so "${url_basename}" "${URL}"
echo "Extracting tar.gz archive..."
mkdir -p extracted_files
tar -xzf "${url_basename}" --strip-components=1 -C extracted_files


# Find and Replace.
csv_file="extracted_files/${FILE}/csv_ingestion_all_steps.py"
if [ -f "${csv_file}" ]; then
echo "Processing ${csv_file} file"

# Escape patterns for sed
escape_sed_pattern() {
printf '%s' "$1" | sed 's/[\/&]/\\&/g; s/[][$.*^]/\\&/g'
}
escape_sed_replacement() {
printf '%s' "$1" | sed 's/[\/&]/\\&/g'
}

# Create sed script from search and replace JSON
sed_script_file="sed_script.sed"

echo "${SEARCH_AND_REPLACE}" | jq -c '.[]' | while IFS= read -r item; do
find=$(echo "$item" | jq -r '.find')
replace=$(echo "$item" | jq -r '.replace')

find_escaped=$(escape_sed_pattern "$find")
replace_escaped=$(escape_sed_replacement "$replace")

echo "find: ${find_escaped}"
echo "replace: ${replace_escaped}"

echo "s/${find_escaped}/${replace_escaped}/g" >> "$sed_script_file"
done

echo "Running sed script:"
cat "$sed_script_file"
sed -f "$sed_script_file" "$csv_file" > "extracted_files/${FILE}/csv-parser.py"
rm "$sed_script_file"
rm "$csv_file"
fi

# Create and upload zip file
echo "Creating zip of contents and uploading to file share ${SHARE}"
zip_filename="${url_basename}"
(cd extracted_files && zip -r "../${zip_filename}" .)
rm -rf extracted_files
az storage file upload -s "${SHARE}" --source "./${zip_filename}" -o none
echo "Zip file ${zip_filename} uploaded to file share ${SHARE}."
# Compress the DAG folder and upload it to a file share.
rm "${url_basename}"
zip_filename="${url_basename%.tar.gz}.zip"
current_dir=$(pwd)
cd "extracted_files/${FILE}" || exit 1
zip -r "${current_dir}/${zip_filename}" .
cd - || exit 1

az storage file upload -s "${SHARE}" --source "${zip_filename}" -onone
echo "Zip file ${zip_filename} uploaded to file share ${SHARE}."
1 change: 0 additions & 1 deletion charts/osdu-developer-init/templates/workflow-init.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ data:
--header "accept: application/json" \
--header "content-type: application/json" \
--header "authorization: Bearer $TOKEN" \
--header "data-partition-id: ${PARTITION}" \
--data "{
\"workflowName\": \"$WORKFLOW_NAME\",
\"description\": \"$WORKFLOW_DESCRIPTION\",
Expand Down

0 comments on commit 061f407

Please sign in to comment.