Skip to content

Commit

Permalink
Merge branch 'dev' of https://github.com/Hexy00123/Sum2024MLOps into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
Hexy00123 committed Jul 22, 2024
2 parents 26469a0 + 666134c commit 916d3c2
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 62 deletions.
17 changes: 17 additions & 0 deletions .github/workflows/test-code.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,23 @@ jobs:
run: |
python3.11 -m pip install --upgrade pip
pip3.11 install -r requirements.txt
- name: create-json
id: create-json
uses: jsdaniell/create-json@1.1.2
with:
name: "gdrive-credentials.json"
json: ${{ secrets.GDRIVE_CREDENTIALS_DATA }}

- name: Configure DVC remote with service account
run: |
dvc remote modify storage gdrive_use_service_account true
dvc remote modify storage --local gdrive_service_account_json_file_path gdrive-credentials.json
- name: Pull DVC data
run: |
dvc fetch
dvc pull
- name: Run your app
run: python3.11 src/app.py &
Expand Down
Empty file.
Binary file modified .gitignore
Binary file not shown.
4 changes: 2 additions & 2 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ def predict(
gr.Text(label="province_name"),
gr.Number(label="latitude"),
gr.Number(label="longitude"),
gr.Number(label="baths"),
gr.Number(label="baths"), # slider
gr.Text(label="area"), # Marla or Kanal + size
gr.Dropdown(label="purpose", choices=["For Sale", "For Rent"]),
gr.Number(label="bedrooms"),
gr.Number(label="bedrooms"), #slider
gr.Textbox(label="date_added"), # TODO: How to add datetime?
gr.Text(label="agency"),
gr.Text(label="agent"),
Expand Down
14 changes: 5 additions & 9 deletions src/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,13 @@ def format_area(value):
print(f"Refactored data saved to {data_path}")


def read_datastore():
with initialize(config_path="../configs", version_base=None):
cfg = compose(config_name="main")

@hydra.main(config_path="../configs", config_name="main", version_base=None)
def read_datastore(cfg: DictConfig):
version = cfg.test_data_version if cfg.test else cfg.index
try:
subprocess.run(["dvc", "fetch"])
subprocess.run(["dvc", "fetch", f"{cfg.dvc_file_path}"])
subprocess.run(["dvc", "pull"], check=True)
subprocess.run(
["git", "checkout", f"v{version}.0", f"{cfg.dvc_file_path}"], check=True)
subprocess.run(["git", "checkout", f"v{version}.0", f"{cfg.dvc_file_path}"], check=True)
subprocess.run(["dvc", "checkout", f"{cfg.dvc_file_path}"], check=True)

sample_path = cfg.sample_path
Expand All @@ -110,8 +107,7 @@ def read_datastore():
return sample
finally:
# Return to the HEAD state
subprocess.run(["git", "checkout", "HEAD",
f"{cfg.dvc_file_path}"], check=True)
subprocess.run(["git", "checkout", "HEAD", f"{cfg.dvc_file_path}"], check=True)
subprocess.run(["dvc", "checkout", f"{cfg.dvc_file_path}"], check=True)


Expand Down
33 changes: 33 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import Tuple

import pandas as pd
import pytest
from hydra import initialize, compose
from omegaconf import DictConfig

from src.data import (
read_datastore,
preprocess_data,
)


@pytest.fixture
def cfg() -> DictConfig:
"""
Load the test_config.yaml configuration file
"""
with initialize(config_path="../configs", version_base=None):
cfg = compose(config_name="test_config")
return cfg


@pytest.fixture
def raw_sample(cfg) -> pd.DataFrame:
df = read_datastore(cfg)
return df


@pytest.fixture
def preprocessed_sample(raw_sample) -> Tuple[pd.DataFrame, pd.Series]:
X, y = preprocess_data(raw_sample)
return X, y
22 changes: 0 additions & 22 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,6 @@
read_datastore, preprocess_data, load_features


@pytest.fixture
def cfg() -> DictConfig:
"""
Load the test_config.yaml configuration file
"""
with initialize(config_path="../configs", version_base=None):
cfg = compose(config_name="test_config")
return cfg


@pytest.fixture
def raw_sample(cfg) -> pd.DataFrame:
df = read_datastore(cfg)
return df


@pytest.fixture
def preprocessed_sample(raw_sample) -> Tuple[pd.DataFrame, pd.Series]:
X, y = preprocess_data(raw_sample)
return X, y


def sample_data_stage(cfg: DictConfig, index: int, sample_file: str):
"""
Helper function to sample data for a specific project stage
Expand Down
29 changes: 0 additions & 29 deletions tests/test_data_expectations.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,6 @@
import pandas as pd
import pytest
from hydra import compose, initialize
from omegaconf import DictConfig
from typing import Tuple


from src.data import read_datastore, preprocess_data
from src.data_expectations import validate_features, validate_initial_data


@pytest.fixture
def cfg() -> DictConfig:
"""
Load the test_config.yaml configuration file
"""
with initialize(config_path="../configs", version_base=None):
cfg = compose(config_name="test_config")
return cfg


@pytest.fixture
def raw_sample(cfg) -> pd.DataFrame:
df = read_datastore(cfg)
return df

@pytest.fixture
def preprocessed_sample(raw_sample) -> Tuple[pd.DataFrame, pd.Series]:
X, y = preprocess_data(raw_sample)
return X, y


def test_validate_initial_data():
try:
validate_initial_data()
Expand Down

0 comments on commit 916d3c2

Please sign in to comment.