From 4b07ce4e4b99bd5448baf88ab654b81ac46ec661 Mon Sep 17 00:00:00 2001 From: Sanjay Dasgupta Date: Sun, 24 Mar 2024 10:37:22 +0530 Subject: [PATCH 1/2] Fix for 'upload_to_hf_hub()' path mismatch with 'save()' (#3977) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- ludwig/api.py | 40 +++++++++++++------ ludwig/automl/automl.py | 6 +-- ludwig/benchmarking/artifacts.py | 11 +++-- ludwig/benchmarking/summary_dataclasses.py | 4 +- ludwig/benchmarking/utils.py | 8 ++-- ludwig/contribs/mlflow/__init__.py | 4 +- ludwig/contribs/mlflow/model.py | 8 ++-- ludwig/globals.py | 1 + ludwig/hyperopt/execution.py | 7 ++-- ludwig/trainers/base.py | 3 +- ludwig/trainers/trainer.py | 3 +- ludwig/trainers/trainer_lightgbm.py | 9 ++++- ludwig/trainers/trainer_llm.py | 3 +- ludwig/upload.py | 25 +++++++++--- ludwig/utils/checkpoint_utils.py | 7 ++-- ludwig/utils/misc_utils.py | 4 +- ludwig/utils/upload_utils.py | 30 ++++++++------ .../scripts/run_train_horovod.py | 3 +- tests/integration_tests/test_api.py | 4 +- tests/integration_tests/test_cli.py | 13 +++--- tests/integration_tests/test_collect.py | 7 ++-- tests/integration_tests/test_experiment.py | 11 +++-- tests/integration_tests/test_gbm.py | 3 +- tests/integration_tests/test_hyperopt.py | 11 +++-- tests/integration_tests/test_hyperopt_ray.py | 6 +-- tests/integration_tests/test_llm.py | 15 ++++--- .../test_missing_value_strategy.py | 3 +- tests/integration_tests/test_mlflow.py | 5 ++- .../test_model_save_and_load.py | 11 ++--- .../test_model_training_options.py | 8 ++-- .../integration_tests/test_postprocessing.py | 3 +- tests/integration_tests/test_ray.py | 3 +- tests/integration_tests/test_remote.py | 6 +-- tests/integration_tests/test_trainer.py | 3 +- tests/integration_tests/utils.py | 8 ++-- tests/ludwig/encoders/test_text_encoders.py | 6 +-- tests/ludwig/utils/test_upload_utils.py | 8 ++-- .../regression_tests/model/test_old_models.py | 3 +- 38 files changed, 192 insertions(+), 121 deletions(-) diff --git a/ludwig/api.py b/ludwig/api.py index 540c5797d6d..691219e201a 100644 --- a/ludwig/api.py +++ b/ludwig/api.py @@ -64,7 +64,9 @@ from ludwig.features.feature_registries import update_config_with_metadata, update_config_with_model from ludwig.globals import ( LUDWIG_VERSION, + MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, + MODEL_WEIGHTS_FILE_NAME, set_disable_progressbar, TRAIN_SET_METADATA_FILE_NAME, TRAINING_CHECKPOINTS_DIR_PATH, @@ -110,6 +112,7 @@ from ludwig.utils.torch_utils import DEVICE from ludwig.utils.trainer_utils import get_training_report from ludwig.utils.types import DataFrame, TorchDevice +from ludwig.utils.upload_utils import HuggingFaceHub logger = logging.getLogger(__name__) @@ -1938,35 +1941,48 @@ def upload_to_hf_hub( # Inputs - :param repo_id (`str`): + :param repo_id: (`str`) A namespace (user or an organization) and a repo name separated by a `/`. - :param model_path (`str`): - The path of the saved model. This is the top level directory where - the models weights as well as other associated training artifacts - are saved. - :param private (`bool`, *optional*, defaults to `False`): + :param model_path: (`str`) + The path of the saved model. This is either (a) the folder where + the 'model_weights' folder and the 'model_hyperparameters.json' file + are stored, or (b) the parent of that folder. + :param private: (`bool`, *optional*, defaults to `False`) Whether the model repo should be private. - :param repo_type (`str`, *optional*): + :param repo_type: (`str`, *optional*) Set to `"dataset"` or `"space"` if uploading to a dataset or space, `None` or `"model"` if uploading to a model. Default is `None`. - :param commit_message (`str`, *optional*): + :param commit_message: (`str`, *optional*) The summary / title / first line of the generated commit. Defaults to: `f"Upload {path_in_repo} with huggingface_hub"` - :param commit_description (`str` *optional*): + :param commit_description: (`str` *optional*) The description of the generated commit # Returns :return: (bool) True for success, False for failure. """ + if os.path.exists(os.path.join(model_path, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists( + os.path.join(model_path, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME) + ): + experiment_path = model_path + elif os.path.exists(os.path.join(model_path, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists( + os.path.join(model_path, MODEL_HYPERPARAMETERS_FILE_NAME) + ): + experiment_path = os.path.dirname(model_path) + else: + raise ValueError( + f"Can't find 'model_weights' and '{MODEL_HYPERPARAMETERS_FILE_NAME}' either at " + f"'{model_path}' or at '{model_path}/model'" + ) model_service = get_upload_registry()["hf_hub"] - hub = model_service() + hub: HuggingFaceHub = model_service() hub.login() - upload_status = hub.upload( + upload_status: bool = hub.upload( repo_id=repo_id, - model_path=model_path, + model_path=experiment_path, repo_type=repo_type, private=private, commit_message=commit_message, diff --git a/ludwig/automl/automl.py b/ludwig/automl/automl.py index 8ce1be61f9f..3b0c878c073 100644 --- a/ludwig/automl/automl.py +++ b/ludwig/automl/automl.py @@ -50,7 +50,7 @@ from ludwig.contrib import add_contrib_callback_args from ludwig.data.cache.types import CacheableDataset from ludwig.datasets import load_dataset_uris -from ludwig.globals import LUDWIG_VERSION +from ludwig.globals import LUDWIG_VERSION, MODEL_FILE_NAME from ludwig.hyperopt.run import hyperopt from ludwig.schema.model_config import ModelConfig from ludwig.types import ModelConfigDict @@ -101,10 +101,10 @@ def best_model(self) -> Optional[LudwigModel]: # Read remote URIs using Ludwig's internal remote file loading APIs, as # Ray's do not handle custom credentials at the moment. with use_credentials(self._creds): - return LudwigModel.load(os.path.join(ckpt_path, "model")) + return LudwigModel.load(os.path.join(ckpt_path, MODEL_FILE_NAME)) else: with checkpoint.as_directory() as ckpt_path: - return LudwigModel.load(os.path.join(ckpt_path, "model")) + return LudwigModel.load(os.path.join(ckpt_path, MODEL_FILE_NAME)) @PublicAPI diff --git a/ludwig/benchmarking/artifacts.py b/ludwig/benchmarking/artifacts.py index 4bdf58a0126..cf55c7c0d57 100644 --- a/ludwig/benchmarking/artifacts.py +++ b/ludwig/benchmarking/artifacts.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from typing import Any, Dict +from ludwig.globals import MODEL_FILE_NAME from ludwig.types import ModelConfigDict, TrainingSetMetadataDict from ludwig.utils.data_utils import load_json, load_yaml @@ -55,7 +56,11 @@ def build_benchmarking_result(benchmarking_config: dict, experiment_idx: int): description=load_json(os.path.join(experiment_run_path, "description.json")), test_statistics=load_json(os.path.join(experiment_run_path, "test_statistics.json")), training_statistics=load_json(os.path.join(experiment_run_path, "training_statistics.json")), - model_hyperparameters=load_json(os.path.join(experiment_run_path, "model", "model_hyperparameters.json")), - training_progress=load_json(os.path.join(experiment_run_path, "model", "training_progress.json")), - training_set_metadata=load_json(os.path.join(experiment_run_path, "model", "training_set_metadata.json")), + model_hyperparameters=load_json( + os.path.join(experiment_run_path, MODEL_FILE_NAME, "model_hyperparameters.json") + ), + training_progress=load_json(os.path.join(experiment_run_path, MODEL_FILE_NAME, "training_progress.json")), + training_set_metadata=load_json( + os.path.join(experiment_run_path, MODEL_FILE_NAME, "training_set_metadata.json") + ), ) diff --git a/ludwig/benchmarking/summary_dataclasses.py b/ludwig/benchmarking/summary_dataclasses.py index 9a93e09ea14..af18e5bc80d 100644 --- a/ludwig/benchmarking/summary_dataclasses.py +++ b/ludwig/benchmarking/summary_dataclasses.py @@ -7,7 +7,7 @@ import ludwig.modules.metric_modules # noqa: F401 from ludwig.benchmarking.utils import format_memory, format_time -from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME +from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME from ludwig.modules.metric_registry import get_metric_classes, metric_feature_type_registry # noqa: F401 from ludwig.types import ModelConfigDict from ludwig.utils.data_utils import load_json @@ -209,7 +209,7 @@ def build_metrics_summary(experiment_local_directory: str) -> MetricsSummary: e.g. local_experiment_repo/ames_housing/some_experiment/ """ config = load_json( - os.path.join(experiment_local_directory, "experiment_run", "model", MODEL_HYPERPARAMETERS_FILE_NAME) + os.path.join(experiment_local_directory, "experiment_run", MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME) ) report = load_json(os.path.join(experiment_local_directory, "experiment_run", "test_statistics.json")) output_feature_type: str = config["output_features"][0]["type"] diff --git a/ludwig/benchmarking/utils.py b/ludwig/benchmarking/utils.py index 0b49b8bd870..87fbe0d2cb4 100644 --- a/ludwig/benchmarking/utils.py +++ b/ludwig/benchmarking/utils.py @@ -16,7 +16,7 @@ from ludwig.constants import BINARY, CATEGORY from ludwig.datasets import model_configs_for_dataset from ludwig.datasets.loaders.dataset_loader import DatasetLoader -from ludwig.globals import CONFIG_YAML +from ludwig.globals import CONFIG_YAML, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME from ludwig.utils.data_utils import load_yaml from ludwig.utils.dataset_utils import get_repeatable_train_val_test_split from ludwig.utils.defaults import default_random_seed @@ -251,9 +251,9 @@ def delete_model_checkpoints(output_directory: str): Args: output_directory: output directory of the hyperopt run. """ - shutil.rmtree(os.path.join(output_directory, "model", "training_checkpoints"), ignore_errors=True) - if os.path.isfile(os.path.join(output_directory, "model", "model_weights")): - os.remove(os.path.join(output_directory, "model", "model_weights")) + shutil.rmtree(os.path.join(output_directory, MODEL_FILE_NAME, "training_checkpoints"), ignore_errors=True) + if os.path.isfile(os.path.join(output_directory, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)): + os.remove(os.path.join(output_directory, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)) def delete_hyperopt_outputs(output_directory: str): diff --git a/ludwig/contribs/mlflow/__init__.py b/ludwig/contribs/mlflow/__init__.py index ee4314ed239..55c51a9ac88 100644 --- a/ludwig/contribs/mlflow/__init__.py +++ b/ludwig/contribs/mlflow/__init__.py @@ -6,7 +6,7 @@ from ludwig.api_annotations import DeveloperAPI, PublicAPI from ludwig.callbacks import Callback from ludwig.constants import TRAINER -from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME +from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME from ludwig.types import TrainingSetMetadataDict from ludwig.utils.data_utils import chunk_dict, flatten_dict, save_json, to_json_dict from ludwig.utils.package_utils import LazyLoader @@ -258,7 +258,7 @@ def _log_mlflow(log_metrics, steps, save_path, should_continue, log_artifacts: b def _log_artifacts(output_directory): for fname in os.listdir(output_directory): lpath = os.path.join(output_directory, fname) - if fname == "model": + if fname == MODEL_FILE_NAME: _log_model(lpath) else: mlflow.log_artifact(lpath) diff --git a/ludwig/contribs/mlflow/model.py b/ludwig/contribs/mlflow/model.py index c1f2c5f6962..16403c7afdd 100644 --- a/ludwig/contribs/mlflow/model.py +++ b/ludwig/contribs/mlflow/model.py @@ -16,7 +16,7 @@ from mlflow.utils.model_utils import _get_flavor_configuration from ludwig.api_annotations import DeveloperAPI -from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME +from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME from ludwig.utils.data_utils import load_json FLAVOR_NAME = "ludwig" @@ -97,7 +97,7 @@ def save_model( path = os.path.abspath(path) if os.path.exists(path): raise MlflowException(f"Path '{path}' already exists") - model_data_subpath = "model" + model_data_subpath = MODEL_FILE_NAME model_data_path = os.path.join(path, model_data_subpath) os.makedirs(path) if mlflow_model is None: @@ -267,7 +267,7 @@ def export_model(model_path, output_path, registered_model_name=None): if not model_path.startswith("runs:/") or output_path is not None: # No run specified, so in order to register the model in mlflow, we need # to create a new run and upload the model as an artifact first - output_path = output_path or "model" + output_path = output_path or MODEL_FILE_NAME log_model( _CopyModel(model_path), artifact_path=output_path, @@ -295,7 +295,7 @@ def log_saved_model(lpath): """ log_model( _CopyModel(lpath), - artifact_path="model", + artifact_path=MODEL_FILE_NAME, ) diff --git a/ludwig/globals.py b/ludwig/globals.py index 751a0be780c..199f3188af2 100644 --- a/ludwig/globals.py +++ b/ludwig/globals.py @@ -16,6 +16,7 @@ LUDWIG_VERSION = "0.10.2.dev" +MODEL_FILE_NAME = "model" MODEL_WEIGHTS_FILE_NAME = "model_weights" MODEL_HYPERPARAMETERS_FILE_NAME = "model_hyperparameters.json" TRAIN_SET_METADATA_FILE_NAME = "training_set_metadata.json" diff --git a/ludwig/hyperopt/execution.py b/ludwig/hyperopt/execution.py index dc479e2a364..c1ad226a116 100644 --- a/ludwig/hyperopt/execution.py +++ b/ludwig/hyperopt/execution.py @@ -35,6 +35,7 @@ from ludwig.backend.ray import initialize_ray from ludwig.callbacks import Callback from ludwig.constants import MAXIMIZE, TEST, TRAINER, TRAINING, TYPE, VALIDATION +from ludwig.globals import MODEL_FILE_NAME from ludwig.hyperopt.results import HyperoptResults, TrialResults from ludwig.hyperopt.syncer import RemoteSyncer from ludwig.hyperopt.utils import load_json_values, substitute_parameters @@ -125,7 +126,7 @@ def ignore_dot_files(src, files): return [f for f in files if f.startswith(".")] with tune.checkpoint_dir(step=progress_tracker.tune_checkpoint_num) as checkpoint_dir: - checkpoint_model = os.path.join(checkpoint_dir, "model") + checkpoint_model = os.path.join(checkpoint_dir, MODEL_FILE_NAME) # Atomic copying of the checkpoints if not os.path.isdir(checkpoint_model): copy_id = uuid.uuid4() @@ -411,7 +412,7 @@ def _evaluate_best_model( debug, ): best_model = LudwigModel.load( - os.path.join(best_model_path, "model"), + os.path.join(best_model_path, MODEL_FILE_NAME), backend=backend, gpus=gpus, gpu_memory_limit=gpu_memory_limit, @@ -549,7 +550,7 @@ def on_trainer_train_setup(self, trainer, save_path, is_coordinator): os.rename(save_path, tmp_path) try: - safe_move_file(os.path.join(ckpt_path, "model"), save_path) + safe_move_file(os.path.join(ckpt_path, MODEL_FILE_NAME), save_path) except Exception: # Rollback from partial changes. Remove the save_path # and move the original save_path back. diff --git a/ludwig/trainers/base.py b/ludwig/trainers/base.py index 1d56461aba4..e6939ca2764 100644 --- a/ludwig/trainers/base.py +++ b/ludwig/trainers/base.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod from ludwig.data.dataset.base import Dataset +from ludwig.globals import MODEL_FILE_NAME from ludwig.schema.trainer import BaseTrainerConfig from ludwig.types import ModelConfigDict from ludwig.utils.defaults import default_random_seed @@ -8,7 +9,7 @@ class BaseTrainer(ABC): @abstractmethod - def train(self, training_set, validation_set=None, test_set=None, save_path="model", **kwargs): + def train(self, training_set, validation_set=None, test_set=None, save_path=MODEL_FILE_NAME, **kwargs): raise NotImplementedError() @abstractmethod diff --git a/ludwig/trainers/trainer.py b/ludwig/trainers/trainer.py index b4a66af14cf..9edb6507fe5 100644 --- a/ludwig/trainers/trainer.py +++ b/ludwig/trainers/trainer.py @@ -50,6 +50,7 @@ from ludwig.distributed.base import DistributedStrategy, LocalStrategy from ludwig.globals import ( is_progressbar_disabled, + MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, TRAINING_CHECKPOINTS_DIR_PATH, TRAINING_PROGRESS_TRACKER_FILE_NAME, @@ -831,7 +832,7 @@ def train( training_set, validation_set=None, test_set=None, - save_path="model", + save_path=MODEL_FILE_NAME, return_state_dict: bool = False, **kwargs, ): diff --git a/ludwig/trainers/trainer_lightgbm.py b/ludwig/trainers/trainer_lightgbm.py index 2a8174bc4f1..74f0df1d98d 100644 --- a/ludwig/trainers/trainer_lightgbm.py +++ b/ludwig/trainers/trainer_lightgbm.py @@ -15,7 +15,12 @@ from ludwig.distributed import init_dist_strategy from ludwig.distributed.base import DistributedStrategy, LocalStrategy from ludwig.features.feature_utils import LudwigFeatureDict -from ludwig.globals import is_progressbar_disabled, TRAINING_CHECKPOINTS_DIR_PATH, TRAINING_PROGRESS_TRACKER_FILE_NAME +from ludwig.globals import ( + is_progressbar_disabled, + MODEL_FILE_NAME, + TRAINING_CHECKPOINTS_DIR_PATH, + TRAINING_PROGRESS_TRACKER_FILE_NAME, +) from ludwig.models.gbm import GBM from ludwig.models.predictor import Predictor from ludwig.modules.metric_modules import get_improved_fn, get_initial_validation_value @@ -562,7 +567,7 @@ def train( training_set: Union["Dataset", "RayDataset"], # noqa: F821 validation_set: Optional[Union["Dataset", "RayDataset"]], # noqa: F821 test_set: Optional[Union["Dataset", "RayDataset"]], # noqa: F821 - save_path="model", + save_path=MODEL_FILE_NAME, **kwargs, ): # ====== General setup ======= diff --git a/ludwig/trainers/trainer_llm.py b/ludwig/trainers/trainer_llm.py index 18f70e17198..727257c1375 100644 --- a/ludwig/trainers/trainer_llm.py +++ b/ludwig/trainers/trainer_llm.py @@ -9,6 +9,7 @@ from ludwig.data.dataset.base import Dataset from ludwig.distributed.base import DistributedStrategy, LocalStrategy from ludwig.features.feature_utils import LudwigFeatureDict +from ludwig.globals import MODEL_FILE_NAME from ludwig.models.llm import LLM from ludwig.models.predictor import LlmFineTunePredictor, LlmPredictor from ludwig.modules.metric_modules import get_initial_validation_value @@ -140,7 +141,7 @@ def train( training_set: Dataset, validation_set: Optional[Dataset] = None, test_set: Optional[Dataset] = None, - save_path: str = "model", + save_path: str = MODEL_FILE_NAME, return_state_dict: bool = False, **kwargs, ): diff --git a/ludwig/upload.py b/ludwig/upload.py index fa626f3a1e1..acb325046f0 100644 --- a/ludwig/upload.py +++ b/ludwig/upload.py @@ -1,8 +1,10 @@ import argparse import logging +import os import sys from typing import Optional +from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME from ludwig.utils.print_utils import get_logging_level_registry from ludwig.utils.upload_utils import HuggingFaceHub, Predibase @@ -38,9 +40,9 @@ def upload_cli( A namespace (user or an organization) and a repo name separated by a `/`. model_path (`str`): - The path of the saved model. This is the top level directory where - the models weights as well as other associated training artifacts - are saved. + The path of the saved model. This is the parent-folder of the folder + where the 'model_weights' folder and the 'model_hyperparameters.json' file + are stored. private (`bool`, *optional*, defaults to `False`): Whether the model repo should be private. repo_type (`str`, *optional*): @@ -60,10 +62,23 @@ def upload_cli( `"predibase"`. """ model_service = get_upload_registry().get(service, "hf_hub") - hub = model_service() + hub: HuggingFaceHub = model_service() + if os.path.exists(os.path.join(model_path, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists( + os.path.join(model_path, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME) + ): + experiment_path = model_path + elif os.path.exists(os.path.join(model_path, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists( + os.path.join(model_path, MODEL_HYPERPARAMETERS_FILE_NAME) + ): + experiment_path = os.path.normpath(os.path.join(model_path, "..")) + else: + raise ValueError( + f"Can't find 'model_weights' and '{MODEL_HYPERPARAMETERS_FILE_NAME}' either at " + f"'{model_path}' or at '{model_path}/model'" + ) hub.upload( repo_id=repo_id, - model_path=model_path, + model_path=experiment_path, repo_type=repo_type, private=private, commit_message=commit_message, diff --git a/ludwig/utils/checkpoint_utils.py b/ludwig/utils/checkpoint_utils.py index 6988347a19c..d117a053443 100644 --- a/ludwig/utils/checkpoint_utils.py +++ b/ludwig/utils/checkpoint_utils.py @@ -19,6 +19,7 @@ from torch.optim import Optimizer from ludwig.api_annotations import DeveloperAPI +from ludwig.globals import MODEL_WEIGHTS_FILE_NAME from ludwig.modules.lr_scheduler import LRScheduler if TYPE_CHECKING: @@ -139,7 +140,7 @@ def load(self, save_path: str, device: Optional[torch.device] = None) -> bool: state = torch.load(save_path, map_location=device) try: self.global_step = self._get_global_step(state, save_path) - _, unexpected_keys = self.model.load_state_dict(state["model_weights"], strict=False) + _, unexpected_keys = self.model.load_state_dict(state[MODEL_WEIGHTS_FILE_NAME], strict=False) assert unexpected_keys == [], f"Unexpected keys found in state dict: {unexpected_keys}" if self.optimizer is not None: self.optimizer.load_state_dict(state["optim_state"]) @@ -161,7 +162,7 @@ def load(self, save_path: str, device: Optional[torch.device] = None) -> bool: def get_state_for_inference(self, save_path: str, device: Optional[torch.device] = None) -> Mapping[str, Any]: state = torch.load(save_path, map_location=device) - return state["model_weights"] + return state[MODEL_WEIGHTS_FILE_NAME] def save(self, save_path: str, global_step: int): """Save a state to disk. @@ -176,7 +177,7 @@ def save(self, save_path: str, global_step: int): if self.is_local_rank_0(): state = { "global_step": global_step, - "model_weights": self.get_model_state_dict(), + MODEL_WEIGHTS_FILE_NAME: self.get_model_state_dict(), } if self.optimizer is not None: state["optim_state"] = self.optimizer.state_dict() diff --git a/ludwig/utils/misc_utils.py b/ludwig/utils/misc_utils.py index 22bc09ab8d2..6949d1bdf1a 100644 --- a/ludwig/utils/misc_utils.py +++ b/ludwig/utils/misc_utils.py @@ -28,7 +28,7 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.constants import PROC_COLUMN -from ludwig.globals import DESCRIPTION_FILE_NAME +from ludwig.globals import DESCRIPTION_FILE_NAME, MODEL_FILE_NAME from ludwig.utils import fs_utils from ludwig.utils.fs_utils import find_non_existing_dir_by_adding_suffix @@ -136,7 +136,7 @@ def get_file_names(output_directory): description_fn = os.path.join(output_directory, DESCRIPTION_FILE_NAME) training_stats_fn = os.path.join(output_directory, "training_statistics.json") - model_dir = os.path.join(output_directory, "model") + model_dir = os.path.join(output_directory, MODEL_FILE_NAME) return description_fn, training_stats_fn, model_dir diff --git a/ludwig/utils/upload_utils.py b/ludwig/utils/upload_utils.py index bdcc6df9c13..f3aed5f8bea 100644 --- a/ludwig/utils/upload_utils.py +++ b/ludwig/utils/upload_utils.py @@ -7,7 +7,7 @@ from huggingface_hub import HfApi, login from huggingface_hub.hf_api import CommitInfo -from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME +from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME logger = logging.getLogger(__name__) @@ -75,8 +75,9 @@ def _validate_upload_parameters( Args: repo_id (str): The ID of the target repository. Each provider will verify their specific rules. - model_path (str): The path to the directory containing the trained model artifacts. It should contain - the model's weights, usually saved under 'model/model_weights'. + model_path (str): The path to the directory containing the trained model artifacts. + This is the parent-folder of the folder where the 'model_weights' folder and the + 'model_hyperparameters.json' file are stored. repo_type (str, optional): The type of the repository. Not used in the base class, but subclasses may use it for specific repository implementations. Defaults to None. private (bool, optional): Whether the repository should be private or not. Not used in the base class, @@ -99,7 +100,7 @@ def _validate_upload_parameters( raise FileNotFoundError(f"The path '{model_path}' does not exist.") # Make sure the model is actually trained - trained_model_artifacts_path = os.path.join(model_path, "model", "model_weights") + trained_model_artifacts_path = os.path.join(model_path, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME) if not os.path.exists(trained_model_artifacts_path): raise Exception( f"Model artifacts not found at {trained_model_artifacts_path}. " @@ -153,8 +154,9 @@ def _validate_upload_parameters( repo_id (str): The ID of the target repository. It must be a namespace (user or an organization) and a repository name separated by a '/'. For example, if your HF username is 'johndoe' and you want to create a repository called 'test', the repo_id should be 'johndoe/test'. - model_path (str): The path to the directory containing the trained model artifacts. It should contain - the model's weights, usually saved under 'model/model_weights'. + model_path (str): The path to the directory containing the trained model artifacts. + This is the parent-folder of the folder where the 'model_weights' folder and the + 'model_hyperparameters.json' file are stored. repo_type (str, optional): The type of the repository. Not used in the base class, but subclasses may use it for specific repository implementations. Defaults to None. private (bool, optional): Whether the repository should be private or not. Not used in the base class, @@ -185,7 +187,7 @@ def _validate_upload_parameters( commit_description, ) - trained_model_artifacts_path = os.path.join(model_path, "model", "model_weights") + trained_model_artifacts_path = os.path.join(model_path, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME) """ Make sure the model's saved artifacts either contain: 1. pytorch_model.bin -> regular model training, such as ECD or for LLMs @@ -207,7 +209,7 @@ def _validate_upload_parameters( "either be saved as `pytorch_model.bin` for regular model training, or have `adapter_model.bin`" "or `adapter_model.safetensors` if using parameter efficient fine-tuning methods like LoRA." ) - model_hyperparameters_path: str = os.path.join(model_path, "model") + model_hyperparameters_path: str = os.path.join(model_path, MODEL_FILE_NAME) if MODEL_HYPERPARAMETERS_FILE_NAME not in os.listdir(model_hyperparameters_path): raise ValueError(f"Can't find '{MODEL_HYPERPARAMETERS_FILE_NAME}' at {model_hyperparameters_path}.") @@ -228,9 +230,9 @@ def upload( A namespace (user or an organization) and a repo name separated by a `/`. model_path (`str`): - The path of the saved model. This is the top level directory where - the models weights as well as other associated training artifacts - are saved. + The path of the saved model. This is the parent-folder of the folder + where the 'model_weights' folder and the 'model_hyperparameters.json' file + are stored. repo_type (`str`, *optional*): Set to `"dataset"` or `"space"` if uploading to a dataset or space, `None` or `"model"` if uploading to a model. Default is @@ -266,8 +268,9 @@ def upload( commit_description_weights: str | None = ( f"{commit_description} (weights)" if commit_description else commit_description ) + folder_path = os.path.join(model_path, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME) upload_path_weights: CommitInfo = self.api.upload_folder( - folder_path=os.path.join(model_path, "model", "model_weights"), + folder_path=folder_path, repo_id=repo_id, repo_type=repo_type, commit_message=commit_message_weights, @@ -281,8 +284,9 @@ def upload( commit_description_config: str | None = ( f"{commit_description} (config)" if commit_description else commit_description ) + path_or_fileobj = os.path.join(model_path, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME) upload_path_config: CommitInfo = self.api.upload_file( - path_or_fileobj=os.path.join(model_path, "model", MODEL_HYPERPARAMETERS_FILE_NAME), + path_or_fileobj=path_or_fileobj, path_in_repo="ludwig_config.json", repo_id=repo_id, repo_type=repo_type, diff --git a/tests/integration_tests/scripts/run_train_horovod.py b/tests/integration_tests/scripts/run_train_horovod.py index f4204c080d3..a40beabe5ef 100644 --- a/tests/integration_tests/scripts/run_train_horovod.py +++ b/tests/integration_tests/scripts/run_train_horovod.py @@ -24,6 +24,7 @@ import ludwig.utils.horovod_utils from ludwig.api import LudwigModel from ludwig.constants import BATCH_SIZE, TRAINER +from ludwig.globals import MODEL_FILE_NAME PATH_HERE = os.path.abspath(os.path.dirname(__file__)) PATH_ROOT = os.path.join(PATH_HERE, "..", "..", "..") @@ -54,7 +55,7 @@ def run_api_experiment(input_features, output_features, dataset, **kwargs): model.predict(dataset=dataset) # Attempt loading saved model, should broadcast successfully - model_dir = os.path.join(output_dir, "model") if output_dir else None + model_dir = os.path.join(output_dir, MODEL_FILE_NAME) if output_dir else None loaded_model = LudwigModel.load(model_dir) # Model loading should broadcast weights from coordinator diff --git a/tests/integration_tests/test_api.py b/tests/integration_tests/test_api.py index 9b4315f2d3d..21e81f08202 100644 --- a/tests/integration_tests/test_api.py +++ b/tests/integration_tests/test_api.py @@ -25,7 +25,7 @@ from ludwig.api import LudwigModel from ludwig.callbacks import Callback from ludwig.constants import BATCH_SIZE, ENCODER, TRAINER, TYPE -from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME +from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME from ludwig.models.inference import InferenceModule from ludwig.utils.data_utils import read_csv from tests.integration_tests.utils import ( @@ -732,7 +732,7 @@ def test_saved_weights_in_checkpoint(tmpdir): training_set=data_csv, validation_set=val_csv, test_set=test_csv, output_directory=tmpdir ) - config_save_path = os.path.join(output_dir, "model", MODEL_HYPERPARAMETERS_FILE_NAME) + config_save_path = os.path.join(output_dir, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME) with open(config_save_path) as f: saved_config = json.load(f) saved_input_features = saved_config["input_features"] diff --git a/tests/integration_tests/test_cli.py b/tests/integration_tests/test_cli.py index 4bdbc50200d..16791ee6e3b 100644 --- a/tests/integration_tests/test_cli.py +++ b/tests/integration_tests/test_cli.py @@ -34,6 +34,7 @@ PREPROCESSING, TRAINER, ) +from ludwig.globals import MODEL_FILE_NAME from ludwig.types import FeatureConfigDict from ludwig.utils.data_utils import load_yaml from tests.integration_tests.utils import category_feature, generate_data, number_feature, sequence_feature @@ -177,7 +178,7 @@ def test_train_cli_horovod(tmpdir, csv_filename): dataset=dataset_filename, config=config_filename, output_directory=str(tmpdir), - model_load_path=os.path.join(tmpdir, "horovod_experiment_run", "model"), + model_load_path=os.path.join(tmpdir, "horovod_experiment_run", MODEL_FILE_NAME), ) @@ -188,7 +189,7 @@ def test_export_torchscript_cli(tmpdir, csv_filename): _run_ludwig("train", dataset=dataset_filename, config=config_filename, output_directory=str(tmpdir)) _run_ludwig( "export_torchscript", - model_path=os.path.join(tmpdir, "experiment_run", "model"), + model_path=os.path.join(tmpdir, "experiment_run", MODEL_FILE_NAME), output_path=os.path.join(tmpdir, "torchscript"), ) @@ -200,7 +201,7 @@ def test_export_mlflow_cli(tmpdir, csv_filename): _run_ludwig("train", dataset=dataset_filename, config=config_filename, output_directory=str(tmpdir)) _run_ludwig( "export_mlflow", - model_path=os.path.join(tmpdir, "experiment_run", "model"), + model_path=os.path.join(tmpdir, "experiment_run", MODEL_FILE_NAME), output_path=os.path.join(tmpdir, "data/results/mlflow"), ) @@ -220,7 +221,7 @@ def test_predict_cli(tmpdir, csv_filename): _run_ludwig( "predict", dataset=dataset_filename, - model=os.path.join(tmpdir, "experiment_run", "model"), + model=os.path.join(tmpdir, "experiment_run", MODEL_FILE_NAME), output_directory=os.path.join(tmpdir, "predictions"), ) @@ -233,7 +234,7 @@ def test_evaluate_cli(tmpdir, csv_filename): _run_ludwig( "evaluate", dataset=dataset_filename, - model=os.path.join(tmpdir, "experiment_run", "model"), + model=os.path.join(tmpdir, "experiment_run", MODEL_FILE_NAME), output_directory=os.path.join(tmpdir, "predictions"), ) @@ -265,7 +266,7 @@ def test_collect_summary_activations_weights_cli(tmpdir, csv_filename): config_filename = os.path.join(tmpdir, "config.yaml") dataset_filename = _prepare_data(csv_filename, config_filename) _run_ludwig("train", dataset=dataset_filename, config=config_filename, output_directory=str(tmpdir)) - assert _run_ludwig("collect_summary", model=os.path.join(tmpdir, "experiment_run", "model")) + assert _run_ludwig("collect_summary", model=os.path.join(tmpdir, "experiment_run", MODEL_FILE_NAME)) def test_synthesize_dataset_cli(tmpdir, csv_filename): diff --git a/tests/integration_tests/test_collect.py b/tests/integration_tests/test_collect.py index 95869eac8c8..a3d4574152f 100644 --- a/tests/integration_tests/test_collect.py +++ b/tests/integration_tests/test_collect.py @@ -21,6 +21,7 @@ from ludwig.api import LudwigModel from ludwig.collect import collect_activations, collect_weights, print_model_summary from ludwig.constants import BATCH_SIZE, ENCODER, TRAINER, TYPE +from ludwig.globals import MODEL_FILE_NAME from ludwig.utils.torch_utils import get_torch_device from tests.integration_tests.utils import category_feature, ENCODERS, generate_data, sequence_feature @@ -65,7 +66,7 @@ def test_collect_weights(tmpdir, csv_filename): output_dir = None try: model, output_dir = _train(*_prepare_data(csv_filename)) - model_path = os.path.join(output_dir, "model") + model_path = os.path.join(output_dir, MODEL_FILE_NAME) # 1 for the encoder (embeddings). # 2 for the decoder classifier (w and b). @@ -92,7 +93,7 @@ def test_collect_activations(tmpdir, csv_filename): output_dir = None try: model, output_dir = _train(*_prepare_data(csv_filename)) - model_path = os.path.join(output_dir, "model") + model_path = os.path.join(output_dir, MODEL_FILE_NAME) # [last_hidden, logits, projection_input] filenames = _collect_activations( @@ -107,5 +108,5 @@ def test_collect_activations(tmpdir, csv_filename): def test_print_model_summary(csv_filename): output_dir = None model, output_dir = _train(*_prepare_data(csv_filename)) - model_path = os.path.join(output_dir, "model") + model_path = os.path.join(output_dir, MODEL_FILE_NAME) print_model_summary(model_path) diff --git a/tests/integration_tests/test_experiment.py b/tests/integration_tests/test_experiment.py index cd409b5956d..cf46c1672cc 100644 --- a/tests/integration_tests/test_experiment.py +++ b/tests/integration_tests/test_experiment.py @@ -35,6 +35,7 @@ from ludwig.encoders.registry import get_encoder_classes from ludwig.error import ConfigValidationError from ludwig.experiment import experiment_cli +from ludwig.globals import MODEL_FILE_NAME from ludwig.predict import predict_cli from ludwig.utils.data_utils import read_csv from ludwig.utils.defaults import default_random_seed @@ -713,7 +714,7 @@ def test_experiment_model_resume(tmpdir): experiment_cli(config, dataset=rel_path, model_resume_path=output_dir) - predict_cli(os.path.join(output_dir, "model"), dataset=rel_path) + predict_cli(os.path.join(output_dir, MODEL_FILE_NAME), dataset=rel_path) shutil.rmtree(output_dir, ignore_errors=True) @@ -771,7 +772,9 @@ def _run_experiment_model_resume_distributed(tmpdir, dist_strategy): config, dataset=rel_path, model_resume_path=output_dir, output_directory=os.path.join(tmpdir, "results2") ) - predict_cli(os.path.join(output_dir, "model"), dataset=rel_path, output_directory=os.path.join(tmpdir, "results3")) + predict_cli( + os.path.join(output_dir, MODEL_FILE_NAME), dataset=rel_path, output_directory=os.path.join(tmpdir, "results3") + ) @pytest.mark.parametrize( @@ -800,7 +803,7 @@ def test_experiment_model_resume_missing_file(tmpdir, missing_file): try: # Remove file to simulate failure during first epoch of training which prevents # training_checkpoints to be empty and training_progress.json to not be created - missing_file_path = os.path.join(output_dir, "model", missing_file) + missing_file_path = os.path.join(output_dir, MODEL_FILE_NAME, missing_file) if missing_file == "training_progress.json": os.remove(missing_file_path) else: @@ -809,7 +812,7 @@ def test_experiment_model_resume_missing_file(tmpdir, missing_file): # Training should start a fresh model training run without any errors experiment_cli(config, dataset=rel_path, model_resume_path=output_dir) - predict_cli(os.path.join(output_dir, "model"), dataset=rel_path) + predict_cli(os.path.join(output_dir, MODEL_FILE_NAME), dataset=rel_path) shutil.rmtree(output_dir, ignore_errors=True) diff --git a/tests/integration_tests/test_gbm.py b/tests/integration_tests/test_gbm.py index dd72a817625..bad65f375a7 100644 --- a/tests/integration_tests/test_gbm.py +++ b/tests/integration_tests/test_gbm.py @@ -6,6 +6,7 @@ from ludwig.api import LudwigModel from ludwig.constants import INPUT_FEATURES, MODEL_TYPE, OUTPUT_FEATURES, TRAINER from ludwig.error import ConfigValidationError +from ludwig.globals import MODEL_FILE_NAME from ludwig.schema.model_types.base import ModelConfig from tests.integration_tests import synthetic_test_data from tests.integration_tests.utils import binary_feature @@ -77,7 +78,7 @@ def _train_and_predict_gbm(input_features, output_features, tmpdir, backend_conf skip_save_unprocessed_output=True, skip_save_log=True, ) - model.load(os.path.join(tmpdir, "api_experiment_run", "model")) + model.load(os.path.join(tmpdir, "api_experiment_run", MODEL_FILE_NAME)) preds, _ = model.predict(dataset=dataset_filename, output_directory=output_directory, split="test") return preds, model diff --git a/tests/integration_tests/test_hyperopt.py b/tests/integration_tests/test_hyperopt.py index fc6643783ff..42f4e73c645 100644 --- a/tests/integration_tests/test_hyperopt.py +++ b/tests/integration_tests/test_hyperopt.py @@ -44,7 +44,7 @@ TYPE, VALIDATION, ) -from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME +from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME, MODEL_FILE_NAME from ludwig.hyperopt.results import HyperoptResults from ludwig.hyperopt.run import hyperopt from ludwig.hyperopt.utils import update_hyperopt_params_with_defaults @@ -365,7 +365,7 @@ def _run_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, backend, ray_ ) as path: assert path is not None assert isinstance(path, str) - assert "model" in os.listdir(path) + assert MODEL_FILE_NAME in os.listdir(path) @pytest.mark.slow @@ -432,7 +432,10 @@ def test_hyperopt_with_feature_specific_parameters(csv_filename, tmpdir, ray_clu model_parameters = json.load( open( os.path.join( - hyperopt_results_df.iloc[0]["trial_dir"], "test_hyperopt_run", "model", "model_hyperparameters.json" + hyperopt_results_df.iloc[0]["trial_dir"], + "test_hyperopt_run", + MODEL_FILE_NAME, + "model_hyperparameters.json", ) ) ) @@ -563,7 +566,7 @@ def test_hyperopt_nested_parameters(csv_filename, tmpdir, ray_cluster_7cpu): for _, trial_meta in results_df.iterrows(): trial_dir = trial_meta["trial_dir"] trial_config = load_json( - os.path.join(trial_dir, "test_hyperopt_nested_params_run", "model", "model_hyperparameters.json") + os.path.join(trial_dir, "test_hyperopt_nested_params_run", MODEL_FILE_NAME, "model_hyperparameters.json") ) assert len(trial_config[INPUT_FEATURES]) == len(config[INPUT_FEATURES]) diff --git a/tests/integration_tests/test_hyperopt_ray.py b/tests/integration_tests/test_hyperopt_ray.py index a34f215037a..07e3374d1c6 100644 --- a/tests/integration_tests/test_hyperopt_ray.py +++ b/tests/integration_tests/test_hyperopt_ray.py @@ -26,7 +26,7 @@ from ludwig.callbacks import Callback from ludwig.constants import ACCURACY, AUTO, BATCH_SIZE, EXECUTOR, MAX_CONCURRENT_TRIALS, TRAINER from ludwig.contribs.mlflow import MlflowCallback -from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME +from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME from ludwig.hyperopt.results import HyperoptResults from ludwig.hyperopt.run import hyperopt from ludwig.hyperopt.utils import update_hyperopt_params_with_defaults @@ -130,7 +130,7 @@ def on_trial_complete(self, iteration: int, trials: List["Trial"], trial: "Trial self.trial_status[trial.trial_id] = trial.status model_hyperparameters = os.path.join( - trial.logdir, f"{self.exp_name}_{self.model_type}", "model", MODEL_HYPERPARAMETERS_FILE_NAME + trial.logdir, f"{self.exp_name}_{self.model_type}", MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME ) if os.path.isfile(model_hyperparameters): try: @@ -331,7 +331,7 @@ def test_hyperopt_ray_mlflow(csv_filename, tmpdir, ray_cluster_4cpu): for run in runs: artifacts = [f.path for f in client.list_artifacts(run.info.run_id, "")] assert "config.yaml" in artifacts - assert "model" in artifacts + assert MODEL_FILE_NAME in artifacts def run_hyperopt( diff --git a/tests/integration_tests/test_llm.py b/tests/integration_tests/test_llm.py index de53a4be29c..ed377b48ca3 100644 --- a/tests/integration_tests/test_llm.py +++ b/tests/integration_tests/test_llm.py @@ -39,6 +39,7 @@ TRAINER, TYPE, ) +from ludwig.globals import MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME from ludwig.models.llm import LLM from ludwig.schema.model_types.base import ModelConfig from ludwig.utils.fs_utils import list_file_names_in_directory @@ -613,7 +614,7 @@ def test_llm_finetuning_strategies(tmpdir, csv_filename, backend, finetune_strat train_df, prediction_df, config = _prepare_finetuning_test(csv_filename, finetune_strategy, backend, adapter_args) output_directory: str = str(tmpdir) - model_directory: str = pathlib.Path(output_directory) / "api_experiment_run" / "model" + model_directory: str = pathlib.Path(output_directory) / "api_experiment_run" / MODEL_FILE_NAME model = LudwigModel(config) model.train(dataset=train_df, output_directory=output_directory, skip_save_processed_input=False) @@ -665,7 +666,7 @@ def test_llm_finetuning_strategies_quantized(tmpdir, csv_filename, finetune_stra model.train(dataset=train_df, output_directory=str(tmpdir), skip_save_processed_input=False) # Make sure we can load the saved model and then use it for predictions - model = LudwigModel.load(os.path.join(str(tmpdir), "api_experiment_run", "model")) + model = LudwigModel.load(os.path.join(str(tmpdir), "api_experiment_run", MODEL_FILE_NAME)) base_model = LLM(ModelConfig.from_dict(config)) assert not _compare_models(base_model, model.model) # noqa F821 @@ -893,8 +894,10 @@ def test_llm_lora_finetuning_merge_and_unload( ) output_directory: str = str(tmpdir) - model_directory: str = pathlib.Path(output_directory) / "api_experiment_run" / "model" - model_weights_directory: str = pathlib.Path(output_directory) / "api_experiment_run" / "model" / "model_weights" + model_directory: str = pathlib.Path(output_directory) / "api_experiment_run" / MODEL_FILE_NAME + model_weights_directory: str = ( + pathlib.Path(output_directory) / "api_experiment_run" / MODEL_FILE_NAME / MODEL_WEIGHTS_FILE_NAME + ) model = LudwigModel(config) model.train(dataset=train_df, output_directory=output_directory, skip_save_processed_input=False) @@ -1216,7 +1219,7 @@ def test_llm_finetuning_with_embedding_noise( assert model.config_obj.model_parameters.neftune_noise_alpha == embedding_noise output_directory: str = str(tmpdir) - model_directory: str = pathlib.Path(output_directory) / "api_experiment_run" / "model" + model_directory: str = pathlib.Path(output_directory) / "api_experiment_run" / MODEL_FILE_NAME model.train(dataset=train_df, output_directory=output_directory, skip_save_processed_input=False) # Make sure we can load the saved model and then use it for predictions @@ -1354,7 +1357,7 @@ def test_llm_used_tokens(tmpdir): model.train(dataset=df, output_directory=str(tmpdir), skip_save_processed_input=False) with open( - os.path.join(str(tmpdir), "api_experiment_run", "model", "training_progress.json"), encoding="utf-8" + os.path.join(str(tmpdir), "api_experiment_run", MODEL_FILE_NAME, "training_progress.json"), encoding="utf-8" ) as f: progress_tracker = json.load(f) diff --git a/tests/integration_tests/test_missing_value_strategy.py b/tests/integration_tests/test_missing_value_strategy.py index b9ab7a1650e..fd32b396603 100644 --- a/tests/integration_tests/test_missing_value_strategy.py +++ b/tests/integration_tests/test_missing_value_strategy.py @@ -21,6 +21,7 @@ from ludwig.api import LudwigModel from ludwig.constants import BATCH_SIZE, COLUMN, DROP_ROW, FILL_WITH_MEAN, PREPROCESSING, PROC_COLUMN, TRAINER +from ludwig.globals import MODEL_FILE_NAME from tests.integration_tests.utils import ( binary_feature, category_feature, @@ -60,7 +61,7 @@ def test_missing_value_prediction(tmpdir, csv_filename): dataset[input_features[0]["name"]] = None model.predict(dataset=dataset) - model = LudwigModel.load(os.path.join(output_dir, "model")) + model = LudwigModel.load(os.path.join(output_dir, MODEL_FILE_NAME)) model.predict(dataset=dataset) diff --git a/tests/integration_tests/test_mlflow.py b/tests/integration_tests/test_mlflow.py index c04876905b5..bac996a781a 100644 --- a/tests/integration_tests/test_mlflow.py +++ b/tests/integration_tests/test_mlflow.py @@ -13,6 +13,7 @@ from ludwig.constants import TRAINER from ludwig.contribs.mlflow import MlflowCallback from ludwig.export import export_mlflow +from ludwig.globals import MODEL_FILE_NAME from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version from tests.integration_tests.utils import category_feature, FakeRemoteBackend, generate_data, sequence_feature @@ -167,10 +168,10 @@ def test_export_mlflow_local(tmpdir): model = LudwigModel(config, backend=FakeRemoteBackend()) _, _, output_directory = model.train(training_set=data_csv, experiment_name=exp_name, output_directory=output_dir) - model_path = os.path.join(output_directory, "model") + model_path = os.path.join(output_directory, MODEL_FILE_NAME) output_path = os.path.join(tmpdir, "data/results/mlflow") export_mlflow(model_path, output_path) - assert set(os.listdir(output_path)) == {"MLmodel", "model", "conda.yaml"} + assert set(os.listdir(output_path)) == {"MLmodel", MODEL_FILE_NAME, "conda.yaml"} @pytest.mark.distributed diff --git a/tests/integration_tests/test_model_save_and_load.py b/tests/integration_tests/test_model_save_and_load.py index 8f2eaab372e..ffba4ab72ff 100644 --- a/tests/integration_tests/test_model_save_and_load.py +++ b/tests/integration_tests/test_model_save_and_load.py @@ -10,6 +10,7 @@ from ludwig.api import LudwigModel from ludwig.constants import BATCH_SIZE, ENCODER, LOSS, NAME, PREPROCESSING, TRAINER, TRAINING, TYPE from ludwig.data.split import get_splitter +from ludwig.globals import MODEL_FILE_NAME from ludwig.modules.loss_modules import MSELoss from ludwig.schema.features.loss.loss import MSELossConfig from ludwig.utils.data_utils import read_csv @@ -70,7 +71,7 @@ def test_model_load_from_checkpoint(tmpdir, csv_filename, tmp_path): output_directory="results", # results_dir ) - model_dir = os.path.join(output_dir, "model") + model_dir = os.path.join(output_dir, MODEL_FILE_NAME) ludwig_model_loaded = LudwigModel.load(model_dir, backend=backend, from_checkpoint=True) preds_1, _ = ludwig_model1.predict(dataset=validation_set) @@ -207,7 +208,7 @@ def check_model_equal(ludwig_model2): check_model_equal(ludwig_model_loaded) # Test loading the model from the experiment directory - ludwig_model_exp = LudwigModel.load(os.path.join(output_dir, "model"), backend=backend) + ludwig_model_exp = LudwigModel.load(os.path.join(output_dir, MODEL_FILE_NAME), backend=backend) check_model_equal(ludwig_model_exp) @@ -297,7 +298,7 @@ def check_model_equal(ludwig_model2): check_model_equal(ludwig_model_loaded) # Test loading the model from the experiment directory - ludwig_model_exp = LudwigModel.load(os.path.join(output_dir, "model"), backend=backend) + ludwig_model_exp = LudwigModel.load(os.path.join(output_dir, MODEL_FILE_NAME), backend=backend) check_model_equal(ludwig_model_exp) @@ -437,7 +438,7 @@ def check_model_equal(ludwig_model2): check_model_equal(ludwig_model_loaded) # Test loading the model from the experiment directory - ludwig_model_exp = LudwigModel.load(os.path.join(output_dir, "model"), backend=backend) + ludwig_model_exp = LudwigModel.load(os.path.join(output_dir, MODEL_FILE_NAME), backend=backend) # confirm model structure and weights are the same check_model_equal(ludwig_model_exp) @@ -529,7 +530,7 @@ def check_model_equal(ludwig_model2): check_model_equal(ludwig_model_loaded) # Test loading the model from the experiment directory - ludwig_model_exp = LudwigModel.load(os.path.join(output_dir, "model"), backend=backend) + ludwig_model_exp = LudwigModel.load(os.path.join(output_dir, MODEL_FILE_NAME), backend=backend) # confirm model structure and weights are the same check_model_equal(ludwig_model_exp) diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index 96c1903e33e..9df49906c2a 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -25,7 +25,7 @@ from ludwig.contribs.mlflow import MlflowCallback from ludwig.experiment import experiment_cli from ludwig.features.number_feature import numeric_transformation_registry -from ludwig.globals import DESCRIPTION_FILE_NAME, TRAINING_PREPROC_FILE_NAME +from ludwig.globals import DESCRIPTION_FILE_NAME, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME, TRAINING_PREPROC_FILE_NAME from ludwig.schema.optimizers import optimizer_registry from ludwig.utils.data_utils import load_json, replace_file_extension from ludwig.utils.misc_utils import get_from_registry @@ -124,14 +124,14 @@ def test_model_progress_save(skip_save_progress, skip_save_model, tmp_path): ) # ========== Check for required result data sets ============= - model_dir = os.path.join(output_dir, "model") - files = [f for f in os.listdir(model_dir) if re.match(r"model_weights", f)] + model_dir = os.path.join(output_dir, MODEL_FILE_NAME) + files = [f for f in os.listdir(model_dir) if re.match(MODEL_WEIGHTS_FILE_NAME, f)] if skip_save_model: assert len(files) == 0 else: assert len(files) == 1 - training_checkpoints_dir = os.path.join(output_dir, "model", "training_checkpoints") + training_checkpoints_dir = os.path.join(output_dir, MODEL_FILE_NAME, "training_checkpoints") training_checkpoints = os.listdir(training_checkpoints_dir) if skip_save_progress: assert len(training_checkpoints) == 0 diff --git a/tests/integration_tests/test_postprocessing.py b/tests/integration_tests/test_postprocessing.py index b859b304ea2..3990c44b831 100644 --- a/tests/integration_tests/test_postprocessing.py +++ b/tests/integration_tests/test_postprocessing.py @@ -24,6 +24,7 @@ from ludwig.api import LudwigModel from ludwig.constants import BATCH_SIZE, DECODER, NAME, TRAINER +from ludwig.globals import MODEL_FILE_NAME from tests.integration_tests.utils import ( binary_feature, category_feature, @@ -235,7 +236,7 @@ def predict_with_backend(tmpdir, config, data_csv_path, backend, patch_args=None output_directory=os.path.join(tmpdir, "output"), ) # Check that metadata JSON saves and loads correctly - ludwig_model = LudwigModel.load(os.path.join(output_directory, "model")) + ludwig_model = LudwigModel.load(os.path.join(output_directory, MODEL_FILE_NAME)) if patch_args is not None: with mock.patch(*patch_args): diff --git a/tests/integration_tests/test_ray.py b/tests/integration_tests/test_ray.py index cd6007cd9ca..02407e318ff 100644 --- a/tests/integration_tests/test_ray.py +++ b/tests/integration_tests/test_ray.py @@ -50,6 +50,7 @@ ) from ludwig.data.preprocessing import balance_data from ludwig.data.split import DEFAULT_PROBABILITIES +from ludwig.globals import MODEL_FILE_NAME from ludwig.utils.data_utils import read_parquet from ludwig.utils.misc_utils import merge_dict from tests.integration_tests.utils import ( @@ -98,7 +99,7 @@ def train_gpu(config, dataset, output_directory): model = LudwigModel(config, backend="local") _, _, output_dir = model.train(dataset, output_directory=output_directory) - return os.path.join(output_dir, "model") + return os.path.join(output_dir, MODEL_FILE_NAME) @ray.remote(num_cpus=1, num_gpus=0) diff --git a/tests/integration_tests/test_remote.py b/tests/integration_tests/test_remote.py index e9eeb37df2e..e9f38e101a8 100644 --- a/tests/integration_tests/test_remote.py +++ b/tests/integration_tests/test_remote.py @@ -6,7 +6,7 @@ from ludwig.api import LudwigModel from ludwig.backend import initialize_backend from ludwig.constants import BATCH_SIZE, TRAINER -from ludwig.globals import DESCRIPTION_FILE_NAME +from ludwig.globals import DESCRIPTION_FILE_NAME, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME from ludwig.utils import fs_utils from ludwig.utils.data_utils import use_credentials from tests.integration_tests.utils import ( @@ -74,8 +74,8 @@ def test_remote_training_set(csv_filename, fs_protocol, bucket, creds, backend, assert os.path.join(output_directory, "api_experiment_run") == output_run_directory assert fs_utils.path_exists(os.path.join(output_run_directory, DESCRIPTION_FILE_NAME)) assert fs_utils.path_exists(os.path.join(output_run_directory, "training_statistics.json")) - assert fs_utils.path_exists(os.path.join(output_run_directory, "model")) - assert fs_utils.path_exists(os.path.join(output_run_directory, "model", "model_weights")) + assert fs_utils.path_exists(os.path.join(output_run_directory, MODEL_FILE_NAME)) + assert fs_utils.path_exists(os.path.join(output_run_directory, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)) model.predict(dataset=test_csv, output_directory=output_directory) diff --git a/tests/integration_tests/test_trainer.py b/tests/integration_tests/test_trainer.py index 29151b878ae..7de2dfd7c7a 100644 --- a/tests/integration_tests/test_trainer.py +++ b/tests/integration_tests/test_trainer.py @@ -22,6 +22,7 @@ TRAINER, ) from ludwig.distributed import init_dist_strategy +from ludwig.globals import MODEL_FILE_NAME from tests.integration_tests.utils import ( binary_feature, category_feature, @@ -177,7 +178,7 @@ def check_postconditions(model): check_postconditions(model) - model = LudwigModel.load(os.path.join(output_directory, "model")) + model = LudwigModel.load(os.path.join(output_directory, MODEL_FILE_NAME)) # loaded model should retain the tuned params check_postconditions(model) diff --git a/tests/integration_tests/utils.py b/tests/integration_tests/utils.py index 4e7e0c1dd0c..6eb6f23f564 100644 --- a/tests/integration_tests/utils.py +++ b/tests/integration_tests/utils.py @@ -64,7 +64,7 @@ from ludwig.data.dataset_synthesizer import build_synthetic_dataset, DATETIME_FORMATS from ludwig.experiment import experiment_cli from ludwig.features.feature_utils import compute_feature_hash -from ludwig.globals import PREDICTIONS_PARQUET_FILE_NAME +from ludwig.globals import MODEL_FILE_NAME, PREDICTIONS_PARQUET_FILE_NAME from ludwig.schema.encoders.text_encoders import HFEncoderConfig from ludwig.schema.encoders.utils import get_encoder_classes from ludwig.trainers.trainer import Trainer @@ -119,7 +119,7 @@ def supports_multiprocessing(self): class FakeRemoteTrainer(Trainer): - def train(self, *args, save_path="model", **kwargs): + def train(self, *args, save_path=MODEL_FILE_NAME, **kwargs): with tempfile.TemporaryDirectory() as tmpdir: return super().train(*args, save_path=tmpdir, **kwargs) @@ -726,7 +726,7 @@ def run_api_experiment(input_features, output_features, data_csv): ) model.predict(dataset=data_csv) - model_dir = os.path.join(output_dir, "model") + model_dir = os.path.join(output_dir, MODEL_FILE_NAME) loaded_model = LudwigModel.load(model_dir) # Necessary before call to get_weights() to materialize the weights @@ -1159,7 +1159,7 @@ def run_test_suite(config, dataset, backend): model = LudwigModel(config, backend=backend) _, _, output_dir = model.train(dataset=dataset, output_directory=tmpdir) - model_dir = os.path.join(output_dir, "model") + model_dir = os.path.join(output_dir, MODEL_FILE_NAME) loaded_model = LudwigModel.load(model_dir, backend=backend) loaded_model.predict(dataset=dataset) return loaded_model diff --git a/tests/ludwig/encoders/test_text_encoders.py b/tests/ludwig/encoders/test_text_encoders.py index 1e26799ebdc..2bd4c2ab840 100644 --- a/tests/ludwig/encoders/test_text_encoders.py +++ b/tests/ludwig/encoders/test_text_encoders.py @@ -11,7 +11,7 @@ from ludwig.constants import ENCODER, ENCODER_OUTPUT, MODEL_ECD, NAME, TEXT, TRAINER from ludwig.encoders import text_encoders from ludwig.error import ConfigValidationError -from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME +from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME from ludwig.schema.model_config import ModelConfig from ludwig.utils.data_utils import load_json from ludwig.utils.torch_utils import get_torch_device @@ -42,7 +42,7 @@ def _load_pretrained_hf_model_no_weights( def get_mismatched_config_params(ludwig_results_dir, ludwig_model): - saved_config_dict = load_json(os.path.join(ludwig_results_dir, "model", MODEL_HYPERPARAMETERS_FILE_NAME)) + saved_config_dict = load_json(os.path.join(ludwig_results_dir, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME)) saved_config_obj = ModelConfig.from_dict(saved_config_dict) mismatches = [] @@ -120,7 +120,7 @@ def test_hf_ludwig_model_e2e(tmpdir, csv_filename, encoder_name): # Validate the model can be loaded. # This ensures that the config reflects the internal architecture of the encoder. - LudwigModel.load(os.path.join(results_dir, "model")) + LudwigModel.load(os.path.join(results_dir, MODEL_FILE_NAME)) clear_huggingface_cache() diff --git a/tests/ludwig/utils/test_upload_utils.py b/tests/ludwig/utils/test_upload_utils.py index 672488459fc..548713233d4 100644 --- a/tests/ludwig/utils/test_upload_utils.py +++ b/tests/ludwig/utils/test_upload_utils.py @@ -6,7 +6,7 @@ import pytest -from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME +from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME from ludwig.utils.upload_utils import HuggingFaceHub logger = logging.getLogger(__name__) @@ -17,8 +17,8 @@ def _build_fake_model_repo( experiment_name: str, file_names: list[str], *, - model_directory_name: str = "model", - model_weights_directory_name: str = "model_weights", + model_directory_name: str = MODEL_FILE_NAME, + model_weights_directory_name: str = MODEL_WEIGHTS_FILE_NAME, ) -> None: """This utility function accepts the "destination_directory" and list of file names on input. @@ -153,7 +153,7 @@ def test_upload_to_hf_hub__validate_upload_parameters( ) model_path: pathlib.Path = pathlib.Path(output_directory) / "my_simple_experiment_run" - model_weights_path: pathlib.Path = pathlib.Path(model_path / "model" / "model_weights") + model_weights_path: pathlib.Path = pathlib.Path(model_path / MODEL_FILE_NAME / MODEL_WEIGHTS_FILE_NAME) repo_id: str = "test_account/test_repo" model_path: str = str(model_path) diff --git a/tests/regression_tests/model/test_old_models.py b/tests/regression_tests/model/test_old_models.py index b080aa5f3ac..fa2ca1dd62e 100644 --- a/tests/regression_tests/model/test_old_models.py +++ b/tests/regression_tests/model/test_old_models.py @@ -7,6 +7,7 @@ from ludwig.api import LudwigModel from ludwig.data.dataset_synthesizer import build_synthetic_dataset_df +from ludwig.globals import MODEL_FILE_NAME NUM_EXAMPLES = 25 @@ -51,7 +52,7 @@ def test_model_loaded_from_old_config_prediction_works(tmpdir): ids=["titanic", "twitter_bots", "respiratory"], # , "gbm_adult_census_income"], ) def test_predict_deprecated_model(model_url, tmpdir): - model_dir = os.path.join(tmpdir, "model") + model_dir = os.path.join(tmpdir, MODEL_FILE_NAME) os.makedirs(model_dir) archive_path = wget.download(model_url, tmpdir) From 337f407465af6db7aec4d435309122af5fac5314 Mon Sep 17 00:00:00 2001 From: Arnav Garg <106701836+arnavgarg1@users.noreply.github.com> Date: Fri, 5 Apr 2024 13:44:46 -0700 Subject: [PATCH 2/2] Actually add support for RSLoRA and DoRA (#3984) --- ludwig/schema/llms/peft.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ludwig/schema/llms/peft.py b/ludwig/schema/llms/peft.py index 6245b40d48e..104a316179c 100644 --- a/ludwig/schema/llms/peft.py +++ b/ludwig/schema/llms/peft.py @@ -151,6 +151,8 @@ def to_config(self, task_type: str = None, **kwargs) -> "PeftConfig": bias=self.bias_type, target_modules=self.target_modules, task_type=task_type, + use_rslora=self.use_rslora, + use_dora=self.use_dora, ) @classmethod