Skip to content

Removed Dupes

Removed Dupes #30

Workflow file for this run

# .github/workflows/deploy.yml
name: Task1 QC and Deploy
on:
push:
branches:
- main
permissions:
contents: write
packages: write
issues: write
id-token: write
pages: write
jobs:
process_raw:
name: Process Raw CSV Files
runs-on: self-hosted
outputs:
data: ${{ steps.set_vars.outputs.json }}
steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Get Changed CSV Files
run: |
#!/bin/bash
# Get the list of CSV files changed in the last 24 hours
data=$(git log --since="24 hours ago" --name-only --diff-filter=A --pretty=format: -- '*.csv' | sort | uniq)
# Convert the list into a multi-line format
formatted_data=$(echo "$data" | tr ' ' '\n')
# Export the formatted data variable to the environment
echo "data<<EOF" >> $GITHUB_ENV
echo "$formatted_data" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
# Print the changed CSV files
echo "Changed CSV files in the last 24 hours:"
echo "$formatted_data"
- name: Parse Raw CSV Files
id: set_vars
run: |
# Initialize an empty JSON array
json_array="["
first_item=true
# Loop through each CSV file in $data
for file in $data; do
# Extract the directory and filename
dir=$(dirname "$file")
filename=$(basename "$file")
# Extract the run-* part from the directory
run_part=$(basename "$dir")
# Split the filename into sub, task, and version
IFS='_' read -r sub task version <<< "$filename"
version="${version%.csv}" # Remove the .csv extension from version
# Escape variables for JSON
sub_escaped=$(printf '%s' "$sub" | sed 's/"/\\"/g')
task_escaped=$(printf '%s' "$task" | sed 's/"/\\"/g')
version_escaped=$(printf '%s' "$version" | sed 's/"/\\"/g')
run_part_escaped=$(printf '%s' "$run_part" | sed 's/"/\\"/g')
# Build JSON object
json_object="{\"sub\":\"$sub_escaped\",\"task\":\"$task_escaped\",\"version\":\"$version_escaped\",\"run_part\":\"$run_part_escaped\"}"
# Append to JSON array
if [ "$first_item" = true ]; then
json_array="$json_array$json_object"
first_item=false
else
json_array="$json_array,$json_object"
fi
# Print the extracted values
echo "Run Part: $run_part"
echo "Subject: $sub"
echo "Task: $task"
echo "Version: $version"
done
json_array="$json_array]"
# Output the JSON array
echo "json=$json_array" >> $GITHUB_OUTPUT
run_qc:
name: Run Quality Control
runs-on: self-hosted
needs: process_raw
strategy:
matrix:
config: ${{ fromJson(needs.process_raw.outputs.data) }}
steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Debug Environment Variables
run: |
echo "Subject: ${{ matrix.config.sub }}"
echo "Task: ${{ matrix.config.task }}"
echo "Version: ${{ matrix.config.version }}"
echo "Run Part: ${{ matrix.config.run_part }}"
- name: Install Python Dependencies
run: |
python -m pip install --upgrade pip
- name: Run Quality Control Script
env:
TEASE: ${{ secrets.TEASE }}
run: |
sub=${{ matrix.config.sub }}
task=${{ matrix.config.task }}
vers=${{ matrix.config.version }}
run_part=${{ matrix.config.run_part }}
echo "Processing subject: $sub"
echo "Processing task: $task"
echo "Processing version: $vers"
csv_file="./data/${sub}/processed/${run_part}/${sub}_${task}_${vers}.csv"
mkdir -p "./data/${sub}/${run_part}"
log_file="./data/${sub}/${run_part}/qc_${task}_${vers}.log"
echo "CSV file: $csv_file"
echo "Log file: $log_file"
if [ -f "$csv_file" ]; then
python ./code/ATSqC.py -s "$csv_file" -o "./data/${sub}/${run_part}" -sub "$sub" | tee "$log_file"
echo "QC for ${sub}_${task}_${vers} completed"
else
echo "CSV file $csv_file does not exist"
fi
- name: Upload QC Results
uses: actions/upload-artifact@v3
with:
name: qc-results-${{ matrix.config.sub }}-${{ matrix.config.task }}-${{ matrix.config.version }}-${{ matrix.config.run_part }}
path: |
./data/${{ matrix.config.sub }}/${{ matrix.config.run_part }}/*.png
./data/${{ matrix.config.sub }}/${{ matrix.config.run_part }}/*.log
commit_results:
name: Commit and Push QC Results
runs-on: self-hosted
needs: run_qc
steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Download all artifacts
uses: actions/download-artifact@v3
with:
path: ./
- name: Move QC Results
run: |
echo "Moving QC results to data directory..."
for dir in qc-results-*; do
echo "Processing artifact directory: $dir"
# Extract sub, task, version, and run_part from the directory name
IFS='-' read -r _ _ sub task version run_part <<< "$dir"
echo "Subject: $sub, Task: $task, Version: $version, Run Part: $run_part"
# Define the target directories
run_part_dir="./data/${sub}/${run_part}"
# Create the target directories if they don't exist
mkdir -p "$run_part_dir"
# Move .png files to the processed directory
mv "$dir"/*.png "$run_part_dir/" 2>/dev/null || :
# Move .log files to the run_part directory
mv "$dir"/*.log "$run_part_dir/" 2>/dev/null || :
done
- name: Commit and Push QC Results
run: |
git config --global user.name "miloswrath"
git config --global user.email "miloswrath@users.noreply.github.com"
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY
git add ./data/*/*/*.png ./data/*/*/*.log
git commit -m "Add QC results for subjects $(date +%Y-%m-%d)"
git push origin main
- name: List Directory After QC
run: |
echo "Listing directory after running QC:"
find ./data -type d
find ./data -type f
add:
name: Generate Jekyll Posts and Deploy
runs-on: ubuntu-latest
needs: [process_raw, run_qc, commit_results]
steps:
# 1. Checkout the Repository
- name: Checkout Repository
uses: actions/checkout@v4
with:
persist-credentials: false # Recommended for security
fetch-depth: 0
ref: main
- name: Pull Latest Changes
run: git pull origin main
# 4. Set Up Ruby Environment
- name: Setup Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: '3.1' # Specify your Ruby version
bundler-cache: true # Caches installed gems automatically
cache-version: 1 # Increment if you need to reset the cache
# 5. Install Ruby Dependencies
- name: Install Dependencies
run: bundle install
# 6. Generate Jekyll Posts from PNGs
- name: Generate Jekyll Posts
run: |
set -e # Exit immediately if a command exits with a non-zero status
set -x # Print commands and their arguments as they are executed
run_part=${{ needs.process_raw.outputs.run_part }}
POSTS_DIR="_posts"
mkdir -p "$POSTS_DIR" # Ensure the _posts directory exists
# Initialize an associative array to group images by subject
declare -A subjects
# Get the list of PNG files added in the latest commit
images=$(git diff --name-only HEAD~1 HEAD | grep '\.png$')
echo "Images: $images"
# Iterate over each PNG file and group them by subject number
for file in $images; do
# Check if the file exists to avoid errors
if [ ! -f "$file" ]; then
continue
fi
# Extract the subject number from the filename (assuming it's the first part before '_')
filename=$(basename "$file")
subject=$(echo "$filename" | awk -F_ '{print $1}')
# Append the filename to the subject's array
subjects["$subject"]+="$file "
done
# Generate Jekyll posts for each subject
for subject in "${!subjects[@]}"; do
# Define the post filename with current date and subject number
timestamp=$(date +%H%M%S)
post_filename="$POSTS_DIR/$(date +%Y-%m-%d)-subject-$subject-$timestamp.md"
# Create the Jekyll post
{
echo "---"
echo "layout: post"
echo "title: Subject $subject"
echo "date: $(date +%Y-%m-%d)"
echo "categories: subjects"
echo "---"
echo ""
# Add images to the post
for image in ${subjects["$subject"]}; do
# Adjust the image path based on your site structure
# Assuming images are served from the root
echo "![]($image)"
done
} > "$post_filename"
echo "Created post: $post_filename"
done
# 7. List _posts Directory for Verification (Optional)
- name: List _posts Directory
run: |
echo "Listing _posts directory:"
ls -la _posts
# 8. Commit and Push Generated Posts
- name: Commit and Push Posts
run: |
git config --global user.name "miloswrath"
git config --global user.email "miloswrath@users.noreply.github.com"
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY
# Add new posts to git
git add _posts/*.md
# Commit changes if there are any
if ! git diff --cached --exit-code > /dev/null; then
git commit -m "Add new posts for subjects $(date +%Y-%m-%d)"
git push origin main # Replace 'main' with your default branch if different
else
echo "No changes to commit."
fi
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# 9. Build the Jekyll Site
- name: Build with Jekyll
run: bundle exec jekyll build --verbose --baseurl "${{ github.event.inputs.base_path || '' }}"
env:
JEKYLL_ENV: production
# 10. Deploy to GitHub Pages
# Using GitHub's built-in Pages action
- name: Configure GitHub Pages
uses: actions/configure-pages@v5
- name: Upload Pages Artifact
uses: actions/upload-pages-artifact@v1
with:
path: ./_site # Ensure this matches your Jekyll build output
- name: List _site Directory
run: |
echo "Listing _site directory:"
ls -la _site
- name: Deploy to GitHub Pages
uses: actions/deploy-pages@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}