Skip to content

Commit

Permalink
Merge branch 'dev' of https://github.com/Hexy00123/Sum2024MLOps into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
Brvcket committed Jul 22, 2024
2 parents 166f95c + 6837291 commit e832330
Show file tree
Hide file tree
Showing 11 changed files with 303 additions and 57 deletions.
56 changes: 56 additions & 0 deletions .github/workflows/test-code.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,62 @@ jobs:
python3.11 -m pip install --upgrade pip
pip3.11 install -r requirements.txt
- name: create-json
id: create-json
uses: jsdaniell/create-json@1.1.2
with:
name: "gdrive-credentials.json"
json: ${{ secrets.GDRIVE_CREDENTIALS_DATA }}

- name: Configure DVC remote with service account
run: |
dvc remote modify storage gdrive_use_service_account true
dvc remote modify storage --local gdrive_service_account_json_file_path gdrive-credentials.json
- name: Pull DVC data
run: |
dvc fetch
dvc pull
- name: Test git config
run: |
git status
git pull
git status
# - name: Git checkout
# run: |
# git checkout main

- name: Test tags version
run: |
git tag
- name: Test git checkout
run: |
git checkout v1.0 data/samples.dvc
dvc pull
git checkout v2.0 data/samples.dvc
dvc pull
git checkout v3.0 data/samples.dvc
dvc pull
git checkout v4.0 data/samples.dvc
dvc pull
git checkout v5.0 data/samples.dvc
dvc pull
- name: Test dvc checkout
run: |
dvc checkout data/samples.dvc
- name: Test checkout
run: |
git checkout HEAD data/samples.dvc
dvc checkout data/samples.dvc
- name: Run your app
run: python3.11 src/app.py &

Expand Down
123 changes: 123 additions & 0 deletions .github/workflows/test-dvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# .github/workflows/test-code.yaml

# Name of the workflow
name: Test dvc

# Trigger when?
on:
push: # this will trigger the workflow/pipeline only if there is push on `main` branch
branches:
- main
- dev
paths: # the push should be specifically to the folders `src` or `scripts` to trigger this workflow, otherwise, the workflow will not be triggered
- 'src/**'
- 'scripts/**'
- 'services/airflow/dags/**'
- 'tests/**'
- 'configs/**'
- '.github/**'

# Allows to only read the contents of the repository
# `contents: read` permits an action to list the commits
# `contents: write` allows the action to create a release
permissions:
contents: read

# Declare environment variables to be used in this workflow file
env:
message: "Testing dvc!"

# Tasks
jobs:
# Task name
test-code:
# OS to run the task
runs-on: ubuntu-latest # The ubuntu-latest label currently uses the Ubuntu 22.04 runner image

defaults: # Set working directory of the job
run:
shell: bash # Set the default shell
working-directory: .

# The steps of the task/job
steps:


- name: Checking out our code
uses: actions/checkout@v4

- name: setup python
uses: actions/setup-python@v5
with:
python-version: '3.11.0'
cache: 'pip' # caching pip dependencies


- name: install python packages
run: |
python3.11 -m pip install --upgrade pip
pip3.11 install dvc
pip3.11 install dvc-gdrive
- name: create-json
id: create-json
uses: jsdaniell/create-json@1.1.2
with:
name: "gdrive-credentials.json"
json: ${{ secrets.GDRIVE_CREDENTIALS_DATA }}

- name: Configure DVC remote with service account
run: |
dvc remote modify storage gdrive_use_service_account true
dvc remote modify storage --local gdrive_service_account_json_file_path gdrive-credentials.json
- name: Pull DVC data
run: |
dvc fetch
dvc pull
- name: Test git config
run: |
git status
git pull
git status
# - name: Git checkout
# run: |
# git checkout main

- name: Test tags version
run: |
git tag
- name: Test git checkout
run: |
git checkout v1.0 data/samples.dvc
dvc pull
dvc checkout data/samples.dvc
git checkout v2.0 data/samples.dvc
dvc pull
dvc checkout data/samples.dvc
git checkout v3.0 data/samples.dvc
dvc pull
dvc checkout data/samples.dvc
git checkout v4.0 data/samples.dvc
dvc pull
dvc checkout data/samples.dvc
git checkout v5.0 data/samples.dvc
dvc pull
dvc checkout data/samples.dvc
- name: Test dvc checkout
run: |
dvc checkout data/samples.dvc
- name: Test checkout
run: |
git checkout HEAD data/samples.dvc
dvc checkout data/samples.dvc
83 changes: 83 additions & 0 deletions .github/workflows/validate-model.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# .github/workflows/test-code.yaml

# Name of the workflow
name: Test code

# Trigger when?
on:
push: # this will trigger the workflow/pipeline only if there is push on `main` branch
branches:
- main
- dev
paths: # the push should be specifically to the folders `src` or `scripts` to trigger this workflow, otherwise, the workflow will not be triggered
- 'src/**'
- 'scripts/**'
- 'services/airflow/dags/**'
- 'tests/**'
- 'configs/**'
- '.github/**'

# Allows to only read the contents of the repository
# `contents: read` permits an action to list the commits
# `contents: write` allows the action to create a release
permissions:
contents: read

# Declare environment variables to be used in this workflow file
env:
message: "Testing code!"

# Tasks
jobs:
# Task name
test-code:
# OS to run the task
runs-on: ubuntu-latest # The ubuntu-latest label currently uses the Ubuntu 22.04 runner image

defaults: # Set working directory of the job
run:
shell: bash # Set the default shell
working-directory: .

# The steps of the task/job
steps:
- name: Checking out our code
uses: actions/checkout@v4

- name: setup python
uses: actions/setup-python@v5
with:
python-version: '3.11.0'
cache: 'pip' # caching pip dependencies

- name: install python packages
run: |
python3.11 -m pip install --upgrade pip
pip3.11 install -r requirements.txt
- name: Export variables
run: |
echo $PWD
export ZENML_CONFIG_PATH=$PWD/services/zenml
export PROJECTPATH=$PWD
export AIRFLOW_HOME=$PWD/services/airflow
export PYTHONPATH=$PWD/src
- name: Run ZenML server
run: zenml down && zenml up

- name: Run your app
run: python3.11 src/validate.py

# Another job
print_info:
runs-on: ubuntu-latest
needs: test-code
steps:
- name: print my password
run: |
echo My password is ${{ secrets.PASSWORD }}
echo My name is '${{ vars.NAME }}'
- name: print message
run: echo $message
Binary file modified .gitignore
Binary file not shown.
4 changes: 2 additions & 2 deletions services/gx/expectations/first_phase_expectation_suite.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"expectation_type": "expect_column_median_to_be_between",
"kwargs": {
"column": "price",
"max_value": 15000000.0,
"max_value": 17000000.0,
"min_value": 120000.0
},
"meta": {
Expand Down Expand Up @@ -241,6 +241,6 @@
],
"ge_cloud_id": null,
"meta": {
"great_expectations_version": "0.18.18"
"great_expectations_version": "0.18.19"
}
}
2 changes: 1 addition & 1 deletion services/gx/great_expectations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ fluent_datasources:
assets:
asset01:
type: csv
filepath_or_buffer: data\samples\sample.csv
filepath_or_buffer: data/samples/sample.csv
my_pandas_ds:
type: pandas
assets:
Expand Down
4 changes: 2 additions & 2 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ def predict(
gr.Text(label="province_name"),
gr.Number(label="latitude"),
gr.Number(label="longitude"),
gr.Number(label="baths"),
gr.Number(label="baths"), # slider
gr.Text(label="area"), # Marla or Kanal + size
gr.Dropdown(label="purpose", choices=["For Sale", "For Rent"]),
gr.Number(label="bedrooms"),
gr.Number(label="bedrooms"), #slider
gr.Textbox(label="date_added"), # TODO: How to add datetime?
gr.Text(label="agency"),
gr.Text(label="agent"),
Expand Down
4 changes: 3 additions & 1 deletion src/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,11 @@ def read_datastore():

version = cfg.test_data_version if cfg.test else cfg.index
try:
subprocess.run(["dvc", "fetch"])
subprocess.run(["dvc", "fetch"], check=True)
subprocess.run(["dvc", "pull"], check=True)
subprocess.run(
["git", "checkout", f"v{version}.0", f"{cfg.dvc_file_path}"], check=True)
subprocess.run(["dvc", "pull"], check=True)
subprocess.run(["dvc", "checkout", f"{cfg.dvc_file_path}"], check=True)

sample_path = cfg.sample_path
Expand All @@ -112,6 +113,7 @@ def read_datastore():
# Return to the HEAD state
subprocess.run(["git", "checkout", "HEAD",
f"{cfg.dvc_file_path}"], check=True)
subprocess.run(["dvc", "pull"], check=True)
subprocess.run(["dvc", "checkout", f"{cfg.dvc_file_path}"], check=True)


Expand Down
33 changes: 33 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import Tuple

import pandas as pd
import pytest
from hydra import initialize, compose
from omegaconf import DictConfig

from src.data import (
read_datastore,
preprocess_data,
)


@pytest.fixture
def cfg() -> DictConfig:
"""
Load the test_config.yaml configuration file
"""
with initialize(config_path="../configs", version_base=None):
cfg = compose(config_name="test_config")
return cfg


@pytest.fixture
def raw_sample() -> pd.DataFrame:
df = read_datastore()
return df


@pytest.fixture
def preprocessed_sample(raw_sample) -> Tuple[pd.DataFrame, pd.Series]:
X, y = preprocess_data(raw_sample)
return X, y
22 changes: 0 additions & 22 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,6 @@
read_datastore, preprocess_data, load_features


@pytest.fixture
def cfg() -> DictConfig:
"""
Load the test_config.yaml configuration file
"""
with initialize(config_path="../configs", version_base=None):
cfg = compose(config_name="test_config")
return cfg


@pytest.fixture
def raw_sample(cfg) -> pd.DataFrame:
df = read_datastore(cfg)
return df


@pytest.fixture
def preprocessed_sample(raw_sample) -> Tuple[pd.DataFrame, pd.Series]:
X, y = preprocess_data(raw_sample)
return X, y


def sample_data_stage(cfg: DictConfig, index: int, sample_file: str):
"""
Helper function to sample data for a specific project stage
Expand Down
Loading

0 comments on commit e832330

Please sign in to comment.