Skip to content

Commit

Permalink
Merge pull request #325 from AutomatingSciencePipeline/development
Browse files Browse the repository at this point in the history
Change the storage bucket to mongodb, add default experiments to database, other changes
  • Loading branch information
rhit-windsors authored Oct 21, 2024
2 parents b4309f8 + 34cdb01 commit c27f260
Show file tree
Hide file tree
Showing 12 changed files with 192 additions and 29 deletions.
15 changes: 14 additions & 1 deletion apps/backend/app.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Module that uses flask to host endpoints for the backend"""
import threading
import base64
from concurrent.futures import ProcessPoolExecutor
import os
import bson
from flask import Flask, Response, request, jsonify
from kubernetes import client, config
import pymongo
from modules.mongo import upload_experiment_aggregated_results, upload_experiment_zip, upload_log_file, verify_mongo_connection
from modules.mongo import upload_experiment_aggregated_results, upload_experiment_zip, upload_log_file, verify_mongo_connection, check_insert_default_experiments, download_experiment_file

from spawn_runner import create_job, create_job_object
flaskApp = Flask(__name__)
Expand All @@ -29,6 +30,10 @@
)
# connect to the glados database
gladosDB = mongoClient["gladosdb"]
# call the function to check if the documents for default experiments exist
# start that in a different thread so that it can do its thing in peace
addDefaultExpsThread = threading.Thread(target=check_insert_default_experiments, args={mongoClient})
addDefaultExpsThread.start()

# setup the mongo collections
experimentsCollection = gladosDB.experiments
Expand Down Expand Up @@ -91,6 +96,14 @@ def check_mongo():
return Response(status=200)
except Exception:
return Response(status=500)

@flaskApp.get("/downloadExpFile")
def download_exp_file():
try:
experiment_id = request.args.get('expId', default='', type=str)
return {'contents': download_experiment_file(experiment_id, mongoClient)}
except Exception:
return Response(status=500)

if __name__ == '__main__':
flaskApp.run()
2 changes: 1 addition & 1 deletion apps/backend/job-runner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
spec:
containers:
- name: runner
image: gladospipeline/glados-runner:main
image: gladospipeline/glados-runner:development
imagePullPolicy: Always
command: []
env:
Expand Down
59 changes: 54 additions & 5 deletions apps/backend/modules/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
import pymongo
from pymongo.errors import ConnectionFailure
from bson import Binary
from gridfs import GridFSBucket

def verify_mongo_connection(mongoClient):
def verify_mongo_connection(mongoClient: pymongo.MongoClient):
try:
mongoClient.admin.command('ping')
except ConnectionFailure as err:
# just use a generic exception
raise Exception("MongoDB server not available/unreachable") from err

def upload_experiment_aggregated_results(experimentId: str, results: str, mongoClient):
def upload_experiment_aggregated_results(experimentId: str, results: str, mongoClient: pymongo.MongoClient):
experimentResultEntry = {"_id": experimentId, "resultContent": results}
# Get the results connection
resultsCollection = mongoClient["gladosdb"].results
Expand All @@ -23,7 +24,7 @@ def upload_experiment_aggregated_results(experimentId: str, results: str, mongoC
# Change to generic exception
raise Exception("Encountered error while storing aggregated results in MongoDB") from err

def upload_experiment_zip(experimentId: str, encoded: Binary, mongoClient):
def upload_experiment_zip(experimentId: str, encoded: Binary, mongoClient: pymongo.MongoClient):
experimentZipEntry = {"_id": experimentId, "fileContent": encoded}
zipCollection = mongoClient["gladosdb"].zips
try:
Expand All @@ -33,11 +34,59 @@ def upload_experiment_zip(experimentId: str, encoded: Binary, mongoClient):
except Exception as err:
raise Exception("Encountered error while storing results zip in MongoDB") from err

def upload_log_file(experimentId: str, contents: str, mongoClient):
def upload_log_file(experimentId: str, contents: str, mongoClient: pymongo.MongoClient):
logFileEntry = {"_id": experimentId, "fileContent": contents}
logCollection = mongoClient["gladosdb"].logs
try:
resultId = logCollection.insert_one(logFileEntry).inserted_id
return resultId
except Exception as err:
raise Exception("Encountered error while storing log file in MongoDB") from err
raise Exception("Encountered error while storing log file in MongoDB") from err

def check_insert_default_experiments(mongoClient: pymongo.MongoClient):
# this gets run on its own thread, so let it try to enter the default experiments
def insertExperiments():
defaultExperimentCollection = mongoClient["gladosdb"].defaultExperiments
experiments = [
# python experiments
{"name": "addNums.py", "type": "python", "url": "https://raw.githubusercontent.com/AutomatingSciencePipeline/Monorepo/refs/heads/main/example_experiments/python/addNums.py"},
{"name": "addNumsFailsOnXis1Yis5.py", "type": "python", "url": "https://raw.githubusercontent.com/AutomatingSciencePipeline/Monorepo/refs/heads/main/example_experiments/python/addNumsFailsOnXis1Yis5.py"},
{"name": "addNumsTimeOutOnXis1Yis5.py", "type": "python", "url": "https://raw.githubusercontent.com/AutomatingSciencePipeline/Monorepo/refs/heads/main/example_experiments/python/addNumsTimeOutOnXis1Yis5.py"},
{"name": "addNumsTimed.py", "type": "python", "url": "https://raw.githubusercontent.com/AutomatingSciencePipeline/Monorepo/refs/heads/main/example_experiments/python/addNumsTimed.py"},
{"name": "addNumsWithConstants.py", "type": "python", "url": "https://raw.githubusercontent.com/AutomatingSciencePipeline/Monorepo/refs/heads/main/example_experiments/python/addNumsWithConstants.py"},
{"name": "alwaysFail.py", "type": "python", "url": "https://raw.githubusercontent.com/AutomatingSciencePipeline/Monorepo/refs/heads/main/example_experiments/python/alwaysFail.py"},
{"name": "genetic_algorithm.py", "type": "python", "url": "https://raw.githubusercontent.com/AutomatingSciencePipeline/Monorepo/refs/heads/main/example_experiments/python/genetic_algorithm.py"}
# C experiments
# Java experiments
]

for exp in experiments:
count = defaultExperimentCollection.count_documents({"name": exp["name"]})
if count == 0:
defaultExperimentCollection.insert_one(exp)

try:
insertExperiments()
except:
# keep trying
check_insert_default_experiments(mongoClient)

def download_experiment_file(expId: str, mongoClient: pymongo.MongoClient):
# we are going to have to get the binary data from mongo here
# setup the bucket
db = mongoClient["gladosdb"]
bucket = GridFSBucket(db, bucket_name='fileBucket')
files = bucket.find({"metadata.expId": expId}).to_list()
num_files = 0
file_name = ""
for file in files:
num_files += 1
if num_files > 1:
raise Exception("There are more than 1 file for a single experiment!")
file_name = file.filename
if file_name == "":
raise Exception("No file found!")
file = bucket.open_download_stream_by_name(file_name)
contents = file.read()
return contents.decode("utf-8")

Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,15 @@ export const DispatchStep = ({ id, form, ...props }) => {
console.log('Submitting Experiment');
submitExperiment(form.values, userId as string).then(async (expId) => {
console.log(`Uploading file for ${expId}:`, files);
const uploadResponse = await uploadExec(expId, files[0]);
const uploadResponse = await fetch('/api/files/uploadFile', {
method: 'POST',
headers: new Headers({ 'Content-Type': 'application/json' }),
credentials: 'same-origin',
body: JSON.stringify({
"fileToUpload": arrayBufferToBase64(await files[0].arrayBuffer()),
"experimentId": expId
})
});
if (uploadResponse) {
console.log(`Handing experiment ${expId} to the backend`);
const response = await fetch(`/api/experiments/${expId}`, {
Expand Down Expand Up @@ -100,3 +108,13 @@ export const DispatchStep = ({ id, form, ...props }) => {
</Dropzone>
);
};

function arrayBufferToBase64(buffer) {
let binary = '';
const bytes = new Uint8Array(buffer);
for (let i = 0; i < bytes.byteLength; i++) {
binary += String.fromCharCode(bytes[i]);
}
return Buffer.from(binary).toString("base64");
}

48 changes: 47 additions & 1 deletion apps/frontend/frontend.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,50 @@ RUN npm run build

EXPOSE $FRONTEND_WEBSERVER_PORT

CMD ["npm", "start"]
CMD ["npm", "start"]

# FROM node:20-alpine AS base

# FROM base AS deps

# RUN apk add --no-cache libc6-compat
# WORKDIR /app

# COPY package.json ./

# RUN npm update && npm install

# # Install this to optimize images
# RUN npm i sharp

# # If you want yarn update and install uncomment the bellow

# # RUN yarn install && yarn upgrade

# FROM base AS builder
# WORKDIR /app
# COPY --from=deps /app/node_modules ./node_modules
# COPY . .

# RUN npm run build

# FROM base AS runner
# WORKDIR /app

# ENV NODE_ENV=production
# RUN addgroup --system --gid 1001 nodejs
# RUN adduser --system --uid 1001 nextjs

# COPY --from=builder /app/public ./public

# RUN mkdir .next
# RUN chown nextjs:nodejs .next

# COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
# COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static

# USER nextjs

# EXPOSE $FRONTEND_WEBSERVER_PORT

# CMD ["node", "server.js"]
1 change: 1 addition & 0 deletions apps/frontend/lib/mongodb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export const DB_NAME = 'gladosdb';
export const COLLECTION_LOGS = 'logs';
export const COLLECTION_ZIPS = 'zips';
export const COLLECTION_RESULTS_CSVS = 'results';
export const COLLECTION_EXPERIMENT_FILES = 'files';

let client: MongoClient;
let clientPromise: Promise<MongoClient> = new Promise((success) => {
Expand Down
41 changes: 41 additions & 0 deletions apps/frontend/pages/api/files/uploadFile.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import clientPromise, { DB_NAME, COLLECTION_RESULTS_CSVS, COLLECTION_EXPERIMENT_FILES } from '../../../lib/mongodb';
import { NextApiHandler } from 'next';
import { GridFSBucket } from 'mongodb';
import { Readable } from 'stream';

const mongoFileUploader: NextApiHandler<string> = async (req, res) => {
if (req.method === 'POST') {
const { fileToUpload, experimentId } = req.body;

if (!fileToUpload || !experimentId) {
return res.status(400).json({ response: "Not enough arguments!" } as any);
}

try {
const client = await clientPromise;
const db = client.db(DB_NAME);
const bucket = new GridFSBucket(db, { bucketName: 'fileBucket' });

const readableStream = new Readable();
readableStream.push(fileToUpload);
readableStream.push(null);

readableStream.
pipe(bucket.openUploadStream(`experimentFile${experimentId}`, {
chunkSizeBytes: 1048576,
metadata: { expId: experimentId }
}) as any);


res.status(200).json({ response: 'Successfully wrote file!' } as any);
return;
}
catch (error) {
const message = "Failed to upload experiment file!";
console.error("Error writing experiment file.");
res.status(500).json({ response: message } as any);
}
}
}

export default mongoFileUploader;
2 changes: 1 addition & 1 deletion apps/runner/modules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def upload_experiment_zip(experiment: ExperimentData, encoded: Binary):
"experimentId": experiment.expId,
"encoded": base64.b64encode(encoded).decode("utf-8")
}
_callBackend(url, payload, "inserted result csv into mongodb with id")
_callBackend(url, payload, "inserted zip into mongodb with id")

def upload_experiment_log(experimentId: DocumentId):
filePath = get_filepath_for_experiment_log(experimentId)
Expand Down
27 changes: 11 additions & 16 deletions apps/runner/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import json
import time
import typing
import base64

import requests
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore, storage
Expand Down Expand Up @@ -39,7 +42,6 @@
firebaseCredentials = credentials.Certificate(json.loads(_get_env(ENV_FIREBASE_CREDENTIALS)))
firebaseApp = firebase_admin.initialize_app(firebaseCredentials)
firebaseDb = firestore.client()
firebaseBucket = storage.bucket("gladosbase.appspot.com")

syslogger.info("GLADOS Runner Started")

Expand Down Expand Up @@ -193,8 +195,14 @@ def download_experiment_files(experiment: ExperimentData):
experiment.file = filepath
explogger.info(f"Downloading {filepath} to ExperimentFiles/{experiment.expId}/{filepath}")
try:
filedata = firebaseBucket.blob(filepath)
filedata.download_to_filename(filepath)
# try to call the backend to download
url = f'http://glados-service-backend:{os.getenv("BACKEND_PORT")}/downloadExpFile?expId={experiment.expId}'
response = requests.get(url, timeout=60)
file_contents = base64.b64decode(response.json()["contents"]).decode()
# write the file contents to file path
with open(filepath, "x") as file:
file.write(file_contents)

except Exception as err:
explogger.error(f"Error {err} occurred while trying to download experiment file")
raise GladosInternalError('Failed to download experiment files') from err
Expand All @@ -220,12 +228,6 @@ def remove_downloaded_directory(experimentId: DocumentId):
def upload_experiment_results(experiment: ExperimentData):
explogger.info('Uploading Experiment Results...')

try:
uploadBlob = firebaseBucket.blob(f"results/result{experiment.expId}.csv")
uploadBlob.upload_from_filename('results.csv')
except Exception as err:
raise DatabaseConnectionError("Error uploading aggregated experiment results to firebase") from err

explogger.info('Uploading to MongoDB')
verify_mongo_connection()

Expand All @@ -249,13 +251,6 @@ def upload_experiment_results(experiment: ExperimentData):
except Exception as err:
raise GladosInternalError("Error preparing experiment results zip") from err

# TODO remove firebase usage once transition to mongo file storage is complete
try:
uploadBlob = firebaseBucket.blob(f"results/result{experiment.expId}.zip")
uploadBlob.upload_from_filename('ResultCsvs.zip')
except Exception as err:
raise DatabaseConnectionError("Error uploading experiment results zip to firebase") from err

upload_experiment_zip(experiment, encoded)

def post_process_experiment(experiment: ExperimentData):
Expand Down
2 changes: 1 addition & 1 deletion development_scripts/Dev_pull_repo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ git checkout development
git pull origin development

sed -i 's/glados-backend:main/glados-backend:development/g' ./kubernetes_init/backend/deployment-backend.yaml && echo 'Sucessfully renamed backend image!'
sed -i 's/glados-frontend:mainglados-frontend:development/g' ./kubernetes_init/frontend/deployment-frontend.yaml && echo 'Sucessfully renamed frontend image!'
sed -i 's/glados-frontend:main/glados-frontend:development/g' ./kubernetes_init/frontend/deployment-frontend.yaml && echo 'Sucessfully renamed frontend image!'
2 changes: 1 addition & 1 deletion installation_scripts/Deploy_Cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# This script is meant to be run on the server to update the code and restart the docker containers

echo "Restarting cubed"
sudo kubeadm reset
sudo kubeadm reset --cri-socket unix:///var/run/containerd/containerd.sock
rm -f $HOME/.kube/config

sudo kubeadm init --config ../kube_config/kubeadm-config.yaml
Expand Down
2 changes: 1 addition & 1 deletion kubernetes_init/frontend/deployment-frontend.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ spec:
secretKeyRef:
name: secret-env
key: MONGODB_PASSWORD


0 comments on commit c27f260

Please sign in to comment.