diff --git a/.github/workflows/test-code.yaml b/.github/workflows/test-code.yaml index f4d0c32..f9562b3 100644 --- a/.github/workflows/test-code.yaml +++ b/.github/workflows/test-code.yaml @@ -55,6 +55,62 @@ jobs: python3.11 -m pip install --upgrade pip pip3.11 install -r requirements.txt + - name: create-json + id: create-json + uses: jsdaniell/create-json@1.1.2 + with: + name: "gdrive-credentials.json" + json: ${{ secrets.GDRIVE_CREDENTIALS_DATA }} + + - name: Configure DVC remote with service account + run: | + dvc remote modify storage gdrive_use_service_account true + dvc remote modify storage --local gdrive_service_account_json_file_path gdrive-credentials.json + + - name: Pull DVC data + run: | + dvc fetch + dvc pull + + - name: Test git config + run: | + git status + git pull + git status + + + # - name: Git checkout + # run: | + # git checkout main + + - name: Test tags version + run: | + git tag + + - name: Test git checkout + run: | + git checkout v1.0 data/samples.dvc + dvc pull + git checkout v2.0 data/samples.dvc + dvc pull + git checkout v3.0 data/samples.dvc + dvc pull + git checkout v4.0 data/samples.dvc + dvc pull + git checkout v5.0 data/samples.dvc + dvc pull + + + - name: Test dvc checkout + run: | + dvc checkout data/samples.dvc + + - name: Test checkout + run: | + git checkout HEAD data/samples.dvc + dvc checkout data/samples.dvc + + - name: Run your app run: python3.11 src/app.py & diff --git a/.github/workflows/test-dvc.yaml b/.github/workflows/test-dvc.yaml new file mode 100644 index 0000000..8cd7f2d --- /dev/null +++ b/.github/workflows/test-dvc.yaml @@ -0,0 +1,123 @@ +# .github/workflows/test-code.yaml + +# Name of the workflow +name: Test dvc + +# Trigger when? +on: + push: # this will trigger the workflow/pipeline only if there is push on `main` branch + branches: + - main + - dev + paths: # the push should be specifically to the folders `src` or `scripts` to trigger this workflow, otherwise, the workflow will not be triggered + - 'src/**' + - 'scripts/**' + - 'services/airflow/dags/**' + - 'tests/**' + - 'configs/**' + - '.github/**' + +# Allows to only read the contents of the repository +# `contents: read` permits an action to list the commits +# `contents: write` allows the action to create a release +permissions: + contents: read + +# Declare environment variables to be used in this workflow file +env: + message: "Testing dvc!" + +# Tasks +jobs: + # Task name + test-code: + # OS to run the task + runs-on: ubuntu-latest # The ubuntu-latest label currently uses the Ubuntu 22.04 runner image + + defaults: # Set working directory of the job + run: + shell: bash # Set the default shell + working-directory: . + + # The steps of the task/job + steps: + + + - name: Checking out our code + uses: actions/checkout@v4 + + - name: setup python + uses: actions/setup-python@v5 + with: + python-version: '3.11.0' + cache: 'pip' # caching pip dependencies + + + - name: install python packages + run: | + python3.11 -m pip install --upgrade pip + pip3.11 install dvc + pip3.11 install dvc-gdrive + + - name: create-json + id: create-json + uses: jsdaniell/create-json@1.1.2 + with: + name: "gdrive-credentials.json" + json: ${{ secrets.GDRIVE_CREDENTIALS_DATA }} + + - name: Configure DVC remote with service account + run: | + dvc remote modify storage gdrive_use_service_account true + dvc remote modify storage --local gdrive_service_account_json_file_path gdrive-credentials.json + + - name: Pull DVC data + run: | + dvc fetch + dvc pull + + - name: Test git config + run: | + git status + git pull + git status + + + # - name: Git checkout + # run: | + # git checkout main + + - name: Test tags version + run: | + git tag + + - name: Test git checkout + run: | + git checkout v1.0 data/samples.dvc + dvc pull + dvc checkout data/samples.dvc + git checkout v2.0 data/samples.dvc + dvc pull + dvc checkout data/samples.dvc + git checkout v3.0 data/samples.dvc + dvc pull + dvc checkout data/samples.dvc + git checkout v4.0 data/samples.dvc + dvc pull + dvc checkout data/samples.dvc + git checkout v5.0 data/samples.dvc + dvc pull + dvc checkout data/samples.dvc + + + - name: Test dvc checkout + run: | + dvc checkout data/samples.dvc + + - name: Test checkout + run: | + git checkout HEAD data/samples.dvc + dvc checkout data/samples.dvc + + + diff --git a/.github/workflows/validate-model.yaml b/.github/workflows/validate-model.yaml new file mode 100644 index 0000000..7674f1c --- /dev/null +++ b/.github/workflows/validate-model.yaml @@ -0,0 +1,83 @@ +# .github/workflows/test-code.yaml + +# Name of the workflow +name: Test code + +# Trigger when? +on: + push: # this will trigger the workflow/pipeline only if there is push on `main` branch + branches: + - main + - dev + paths: # the push should be specifically to the folders `src` or `scripts` to trigger this workflow, otherwise, the workflow will not be triggered + - 'src/**' + - 'scripts/**' + - 'services/airflow/dags/**' + - 'tests/**' + - 'configs/**' + - '.github/**' + +# Allows to only read the contents of the repository +# `contents: read` permits an action to list the commits +# `contents: write` allows the action to create a release +permissions: + contents: read + +# Declare environment variables to be used in this workflow file +env: + message: "Testing code!" + +# Tasks +jobs: + # Task name + test-code: + # OS to run the task + runs-on: ubuntu-latest # The ubuntu-latest label currently uses the Ubuntu 22.04 runner image + + defaults: # Set working directory of the job + run: + shell: bash # Set the default shell + working-directory: . + + # The steps of the task/job + steps: + - name: Checking out our code + uses: actions/checkout@v4 + + - name: setup python + uses: actions/setup-python@v5 + with: + python-version: '3.11.0' + cache: 'pip' # caching pip dependencies + + - name: install python packages + run: | + python3.11 -m pip install --upgrade pip + pip3.11 install -r requirements.txt + + - name: Export variables + run: | + echo $PWD + export ZENML_CONFIG_PATH=$PWD/services/zenml + export PROJECTPATH=$PWD + export AIRFLOW_HOME=$PWD/services/airflow + export PYTHONPATH=$PWD/src + + - name: Run ZenML server + run: zenml down && zenml up + + - name: Run your app + run: python3.11 src/validate.py + + # Another job + print_info: + runs-on: ubuntu-latest + needs: test-code + steps: + - name: print my password + run: | + echo My password is ${{ secrets.PASSWORD }} + echo My name is '${{ vars.NAME }}' + + - name: print message + run: echo $message diff --git a/.gitignore b/.gitignore index 839b567..140ae40 100644 Binary files a/.gitignore and b/.gitignore differ diff --git a/services/gx/expectations/first_phase_expectation_suite.json b/services/gx/expectations/first_phase_expectation_suite.json index 860e5a4..0e92f84 100644 --- a/services/gx/expectations/first_phase_expectation_suite.json +++ b/services/gx/expectations/first_phase_expectation_suite.json @@ -43,7 +43,7 @@ "expectation_type": "expect_column_median_to_be_between", "kwargs": { "column": "price", - "max_value": 15000000.0, + "max_value": 17000000.0, "min_value": 120000.0 }, "meta": { @@ -241,6 +241,6 @@ ], "ge_cloud_id": null, "meta": { - "great_expectations_version": "0.18.18" + "great_expectations_version": "0.18.19" } } \ No newline at end of file diff --git a/services/gx/great_expectations.yml b/services/gx/great_expectations.yml index c519afb..32705c2 100644 --- a/services/gx/great_expectations.yml +++ b/services/gx/great_expectations.yml @@ -102,7 +102,7 @@ fluent_datasources: assets: asset01: type: csv - filepath_or_buffer: data\samples\sample.csv + filepath_or_buffer: data/samples/sample.csv my_pandas_ds: type: pandas assets: diff --git a/src/app.py b/src/app.py index feb7ea5..b15430b 100644 --- a/src/app.py +++ b/src/app.py @@ -116,10 +116,10 @@ def predict( gr.Text(label="province_name"), gr.Number(label="latitude"), gr.Number(label="longitude"), - gr.Number(label="baths"), + gr.Number(label="baths"), # slider gr.Text(label="area"), # Marla or Kanal + size gr.Dropdown(label="purpose", choices=["For Sale", "For Rent"]), - gr.Number(label="bedrooms"), + gr.Number(label="bedrooms"), #slider gr.Textbox(label="date_added"), # TODO: How to add datetime? gr.Text(label="agency"), gr.Text(label="agent"), diff --git a/src/data.py b/src/data.py index 208d1df..96fa647 100644 --- a/src/data.py +++ b/src/data.py @@ -97,10 +97,11 @@ def read_datastore(): version = cfg.test_data_version if cfg.test else cfg.index try: - subprocess.run(["dvc", "fetch"]) + subprocess.run(["dvc", "fetch"], check=True) subprocess.run(["dvc", "pull"], check=True) subprocess.run( ["git", "checkout", f"v{version}.0", f"{cfg.dvc_file_path}"], check=True) + subprocess.run(["dvc", "pull"], check=True) subprocess.run(["dvc", "checkout", f"{cfg.dvc_file_path}"], check=True) sample_path = cfg.sample_path @@ -112,6 +113,7 @@ def read_datastore(): # Return to the HEAD state subprocess.run(["git", "checkout", "HEAD", f"{cfg.dvc_file_path}"], check=True) + subprocess.run(["dvc", "pull"], check=True) subprocess.run(["dvc", "checkout", f"{cfg.dvc_file_path}"], check=True) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..18d0702 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,33 @@ +from typing import Tuple + +import pandas as pd +import pytest +from hydra import initialize, compose +from omegaconf import DictConfig + +from src.data import ( + read_datastore, + preprocess_data, +) + + +@pytest.fixture +def cfg() -> DictConfig: + """ + Load the test_config.yaml configuration file + """ + with initialize(config_path="../configs", version_base=None): + cfg = compose(config_name="test_config") + return cfg + + +@pytest.fixture +def raw_sample() -> pd.DataFrame: + df = read_datastore() + return df + + +@pytest.fixture +def preprocessed_sample(raw_sample) -> Tuple[pd.DataFrame, pd.Series]: + X, y = preprocess_data(raw_sample) + return X, y diff --git a/tests/test_data.py b/tests/test_data.py index fc2e69f..9a204f0 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -12,28 +12,6 @@ read_datastore, preprocess_data, load_features -@pytest.fixture -def cfg() -> DictConfig: - """ - Load the test_config.yaml configuration file - """ - with initialize(config_path="../configs", version_base=None): - cfg = compose(config_name="test_config") - return cfg - - -@pytest.fixture -def raw_sample(cfg) -> pd.DataFrame: - df = read_datastore(cfg) - return df - - -@pytest.fixture -def preprocessed_sample(raw_sample) -> Tuple[pd.DataFrame, pd.Series]: - X, y = preprocess_data(raw_sample) - return X, y - - def sample_data_stage(cfg: DictConfig, index: int, sample_file: str): """ Helper function to sample data for a specific project stage diff --git a/tests/test_data_expectations.py b/tests/test_data_expectations.py index 436e5f1..08ecee0 100644 --- a/tests/test_data_expectations.py +++ b/tests/test_data_expectations.py @@ -1,35 +1,6 @@ -import pandas as pd -import pytest -from hydra import compose, initialize -from omegaconf import DictConfig -from typing import Tuple - - -from src.data import read_datastore, preprocess_data from src.data_expectations import validate_features, validate_initial_data -@pytest.fixture -def cfg() -> DictConfig: - """ - Load the test_config.yaml configuration file - """ - with initialize(config_path="../configs", version_base=None): - cfg = compose(config_name="test_config") - return cfg - - -@pytest.fixture -def raw_sample(cfg) -> pd.DataFrame: - df = read_datastore(cfg) - return df - -@pytest.fixture -def preprocessed_sample(raw_sample) -> Tuple[pd.DataFrame, pd.Series]: - X, y = preprocess_data(raw_sample) - return X, y - - def test_validate_initial_data(): try: validate_initial_data()