Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX Honor HF_HUB_OFFLINE mode if set by user #1454

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions src/peft/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
import os
from typing import Optional

from huggingface_hub import file_exists
from huggingface_hub.utils import HfHubHTTPError, HFValidationError
from transformers import (
AutoModel,
AutoModelForCausalLM,
Expand All @@ -42,6 +40,7 @@
PeftModelForTokenClassification,
)
from .utils.constants import TOKENIZER_CONFIG_NAME
from .utils.other import check_file_exists_on_hf_hub


class _BaseAutoPeftModel:
Expand Down Expand Up @@ -112,16 +111,13 @@ def from_pretrained(
if token is None:
token = kwargs.get("use_auth_token", None)

try:
tokenizer_exists = file_exists(
repo_id=pretrained_model_name_or_path,
filename=TOKENIZER_CONFIG_NAME,
revision=kwargs.get("revision", None),
repo_type=kwargs.get("repo_type", None),
token=token,
)
except (HfHubHTTPError, HFValidationError): # not on the Hub, so probably local repo
pass
tokenizer_exists = check_file_exists_on_hf_hub(
repo_id=pretrained_model_name_or_path,
filename=TOKENIZER_CONFIG_NAME,
revision=kwargs.get("revision", None),
repo_type=kwargs.get("repo_type", None),
token=token,
)

if tokenizer_exists:
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path)
Expand Down
39 changes: 39 additions & 0 deletions src/peft/utils/other.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
import copy
import inspect
import os
import warnings
from contextlib import nullcontext
from typing import Optional, Tuple
Expand All @@ -21,6 +22,8 @@
import torch
from accelerate.hooks import add_hook_to_module, remove_hook_from_module
from accelerate.utils import is_npu_available, is_xpu_available
from huggingface_hub import file_exists
from huggingface_hub.utils import EntryNotFoundError, HFValidationError
from safetensors.torch import storage_ptr, storage_size

from ..import_utils import is_auto_gptq_available, is_torch_tpu_available
Expand Down Expand Up @@ -537,3 +540,39 @@ def cast_mixed_precision_params(model, dtype):
p.data = p.to(dtype)
else:
p.data = p.to(torch.float32)


def str_to_bool(value: str) -> int:
"""
Converts a string representation of truth to `True` (1) or `False` (0).

True values are `y`, `yes`, `t`, `true`, `on`, and `1`; False value are `n`, `no`, `f`, `false`, `off`, and `0`;
"""
# same as function as in accelerate.utils, which replaces the deprecated distutils.util.strtobool
value = value.lower()
if value in ("y", "yes", "t", "true", "on", "1"):
return 1
elif value in ("n", "no", "f", "false", "off", "0"):
return 0
else:
raise ValueError(f"invalid truth value {value}")


def check_file_exists_on_hf_hub(repo_id: str, filename: str, **kwargs) -> Optional[bool]:
"""Check if a file exists on HF Hub, if check was not successful returns None instead of erroring.

Respect offline mode if set.

"""
exists: Optional[bool] = None
if str_to_bool(os.environ.get("HF_HUB_OFFLINE", "0")):
# user set offline mode, cannot check
return exists

try:
exists = file_exists(repo_id, filename, **kwargs)
except (HFValidationError, EntryNotFoundError):
# error, exists stays None
pass

return exists
19 changes: 14 additions & 5 deletions src/peft/utils/save_and_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,16 @@

import torch
from huggingface_hub import file_exists, hf_hub_download
from huggingface_hub.utils import EntryNotFoundError, HFValidationError
from huggingface_hub.utils import EntryNotFoundError
from safetensors.torch import load_file as safe_load_file

from .other import EMBEDDING_LAYER_NAMES, SAFETENSORS_WEIGHTS_NAME, WEIGHTS_NAME, infer_device
from .other import (
EMBEDDING_LAYER_NAMES,
SAFETENSORS_WEIGHTS_NAME,
WEIGHTS_NAME,
check_file_exists_on_hf_hub,
infer_device,
)
from .peft_types import PeftType


Expand Down Expand Up @@ -140,14 +146,17 @@ def get_peft_model_state_dict(
# we need to make sure we can download that config.
has_remote_config = False

# ensure that this check is not performed in HF offline mode, see #1452
if model_id is not None:
try:
has_remote_config = file_exists(model_id, "config.json")
except (HFValidationError, EntryNotFoundError):
exists = check_file_exists_on_hf_hub(model_id, "config.json")
if exists is None:
# check failed, could not determine if it exists or not
warnings.warn(
f"Could not find a config file in {model_id} - will assume that the vocabulary was not modified."
)
has_remote_config = False
else:
has_remote_config = exists

# check if the vocab size of the base model is different from the vocab size of the finetuned model
if (
Expand Down
Loading