Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove the need for the config to be in the subfolder #2044

Merged
merged 8 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 22 additions & 14 deletions optimum/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,27 +380,35 @@ def from_pretrained(
)
model_id, revision = model_id.split("@")

all_files, _ = TasksManager.get_model_files(
model_id,
subfolder=subfolder,
cache_dir=cache_dir,
revision=revision,
token=token,
)

config_folder = subfolder
if cls.config_name not in all_files:
logger.info(
f"{cls.config_name} not found in the specified subfolder {subfolder}. Using the top level {cls.config_name}."
)
Comment on lines +393 to +395
Copy link
Member

@tomaarsen tomaarsen Oct 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Edit: Nevermind, I think we should remove the logger.setLevel(logging.INFO) that exist in 2 places in optimum:
Otherwise I always see e.g.

Framework not specified. Using pt to export the model.
Using the export variant default. Available variants are:
    - default: The default ONNX variant.

***** Exporting submodel 1/1: MPNetModel *****
Using framework PyTorch: 2.5.0.dev20240807+cu121
config.json not found in the specified subfolder onnx. Using the top level config.json.
Compiling the model to CPU ...
See the original comment here:
Suggested change
logger.info(
f"{cls.config_name} not found in the specified subfolder {subfolder}. Using the top level {cls.config_name}."
)
logger.debug(
f"{cls.config_name} not found in the specified subfolder {subfolder}. Using the top level {cls.config_name}."
)

Any chance I can convince you to reduce this to debug?

I recognize that it's taken from

logger.info(
f"config.json not found in the specified subfolder {subfolder}. Using the top level config.json."
)

where info is used, but I feel like the setting is a bit different. This new info will be triggered every time someone loads an ONNX or OpenVINO model with Sentence Transformers (as those automatically save their modeling files in subfolders away from the configuration).

Another reason that I'd like it to be reduced to debug is because (for some reason?) the info-level logs are shown by default in Optimum. Or do I just have a weird/non-normal setup? I don't think I'm setting set_verbosity_info() or TRANSFORMERS_VERBOSITY.

<Logger optimum (INFO)>
<Logger transformers (WARNING)>
<Logger accelerate (WARNING)>

I'm usually getting quite a lot of logs from optimum already.

config_folder = ""

library_name = TasksManager.infer_library_from_model(
model_id, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token
model_id, subfolder=config_folder, revision=revision, cache_dir=cache_dir, token=token
)

if library_name == "timm":
config = PretrainedConfig.from_pretrained(
model_id, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token
model_id, subfolder=config_folder, revision=revision, cache_dir=cache_dir, token=token
)

if config is None:
if os.path.isdir(os.path.join(model_id, subfolder)) and cls.config_name == CONFIG_NAME:
if CONFIG_NAME in os.listdir(os.path.join(model_id, subfolder)):
config = AutoConfig.from_pretrained(
os.path.join(model_id, subfolder), trust_remote_code=trust_remote_code
)
elif CONFIG_NAME in os.listdir(model_id):
if os.path.isdir(os.path.join(model_id, config_folder)) and cls.config_name == CONFIG_NAME:
if CONFIG_NAME in os.listdir(os.path.join(model_id, config_folder)):
config = AutoConfig.from_pretrained(
os.path.join(model_id, CONFIG_NAME), trust_remote_code=trust_remote_code
)
logger.info(
f"config.json not found in the specified subfolder {subfolder}. Using the top level config.json."
os.path.join(model_id, config_folder), trust_remote_code=trust_remote_code
)
else:
raise OSError(f"config.json not found in {model_id} local folder")
Expand All @@ -411,7 +419,7 @@ def from_pretrained(
cache_dir=cache_dir,
token=token,
force_download=force_download,
subfolder=subfolder,
subfolder=config_folder,
trust_remote_code=trust_remote_code,
)
elif isinstance(config, (str, os.PathLike)):
Expand All @@ -421,7 +429,7 @@ def from_pretrained(
cache_dir=cache_dir,
token=token,
force_download=force_download,
subfolder=subfolder,
subfolder=config_folder,
trust_remote_code=trust_remote_code,
)

Expand Down
6 changes: 2 additions & 4 deletions optimum/onnxruntime/modeling_ort.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,13 +510,12 @@ def _from_pretrained(

if file_name is None:
if model_path.is_dir():
onnx_files = list(model_path.glob("*.onnx"))
onnx_files = list((model_path / subfolder).glob("*.onnx"))
else:
repo_files, _ = TasksManager.get_model_files(
model_id, revision=revision, cache_dir=cache_dir, token=token
)
repo_files = map(Path, repo_files)

pattern = "*.onnx" if subfolder == "" else f"{subfolder}/*.onnx"
onnx_files = [p for p in repo_files if p.match(pattern)]

Expand Down Expand Up @@ -983,10 +982,9 @@ def _cached_file(
token = use_auth_token

model_path = Path(model_path)

# locates a file in a local folder and repo, downloads and cache it if necessary.
if model_path.is_dir():
model_cache_path = model_path / file_name
model_cache_path = model_path / subfolder / file_name
preprocessors = maybe_load_preprocessors(model_path.as_posix())
else:
model_cache_path = hf_hub_download(
Expand Down
14 changes: 14 additions & 0 deletions tests/onnxruntime/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import requests
import timm
import torch
from huggingface_hub import HfApi
from huggingface_hub.constants import default_cache_path
from parameterized import parameterized
from PIL import Image
Expand Down Expand Up @@ -1263,6 +1264,19 @@ def test_trust_remote_code(self):
torch.allclose(pt_logits, ort_logits, atol=1e-4), f" Maxdiff: {torch.abs(pt_logits - ort_logits).max()}"
)

@parameterized.expand(("", "onnx"))
def test_loading_with_config_in_root(self, subfolder):
# config.json file in the root directory and not in the subfolder
model_id = "sentence-transformers-testing/stsb-bert-tiny-onnx"
# hub model
ORTModelForFeatureExtraction.from_pretrained(model_id, subfolder=subfolder, export=subfolder == "")
# local model
api = HfApi()
with tempfile.TemporaryDirectory() as tmpdirname:
local_dir = Path(tmpdirname) / "model"
api.snapshot_download(repo_id=model_id, local_dir=local_dir)
ORTModelForFeatureExtraction.from_pretrained(local_dir, subfolder=subfolder, export=subfolder == "")


class ORTModelForQuestionAnsweringIntegrationTest(ORTModelTestMixin):
SUPPORTED_ARCHITECTURES = [
Expand Down
Loading