Skip to content

Commit

Permalink
FIX Honor HF_HUB_OFFLINE mode if set by user (#1454)
Browse files Browse the repository at this point in the history
Resolves #1452

If users enable offline mode, don't perform checks for files on HF Hub,
as they would fail.
  • Loading branch information
BenjaminBossan authored Feb 12, 2024
1 parent a1c472f commit 6e95381
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 17 deletions.
20 changes: 8 additions & 12 deletions src/peft/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
import os
from typing import Optional

from huggingface_hub import file_exists
from huggingface_hub.utils import HfHubHTTPError, HFValidationError
from transformers import (
AutoModel,
AutoModelForCausalLM,
Expand All @@ -42,6 +40,7 @@
PeftModelForTokenClassification,
)
from .utils.constants import TOKENIZER_CONFIG_NAME
from .utils.other import check_file_exists_on_hf_hub


class _BaseAutoPeftModel:
Expand Down Expand Up @@ -112,16 +111,13 @@ def from_pretrained(
if token is None:
token = kwargs.get("use_auth_token", None)

try:
tokenizer_exists = file_exists(
repo_id=pretrained_model_name_or_path,
filename=TOKENIZER_CONFIG_NAME,
revision=kwargs.get("revision", None),
repo_type=kwargs.get("repo_type", None),
token=token,
)
except (HfHubHTTPError, HFValidationError): # not on the Hub, so probably local repo
pass
tokenizer_exists = check_file_exists_on_hf_hub(
repo_id=pretrained_model_name_or_path,
filename=TOKENIZER_CONFIG_NAME,
revision=kwargs.get("revision", None),
repo_type=kwargs.get("repo_type", None),
token=token,
)

if tokenizer_exists:
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path)
Expand Down
39 changes: 39 additions & 0 deletions src/peft/utils/other.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
import copy
import inspect
import os
import warnings
from contextlib import nullcontext
from typing import Optional, Tuple
Expand All @@ -21,6 +22,8 @@
import torch
from accelerate.hooks import add_hook_to_module, remove_hook_from_module
from accelerate.utils import is_npu_available, is_xpu_available
from huggingface_hub import file_exists
from huggingface_hub.utils import EntryNotFoundError, HFValidationError
from safetensors.torch import storage_ptr, storage_size

from ..import_utils import is_auto_gptq_available, is_torch_tpu_available
Expand Down Expand Up @@ -537,3 +540,39 @@ def cast_mixed_precision_params(model, dtype):
p.data = p.to(dtype)
else:
p.data = p.to(torch.float32)


def str_to_bool(value: str) -> int:
"""
Converts a string representation of truth to `True` (1) or `False` (0).
True values are `y`, `yes`, `t`, `true`, `on`, and `1`; False value are `n`, `no`, `f`, `false`, `off`, and `0`;
"""
# same as function as in accelerate.utils, which replaces the deprecated distutils.util.strtobool
value = value.lower()
if value in ("y", "yes", "t", "true", "on", "1"):
return 1
elif value in ("n", "no", "f", "false", "off", "0"):
return 0
else:
raise ValueError(f"invalid truth value {value}")


def check_file_exists_on_hf_hub(repo_id: str, filename: str, **kwargs) -> Optional[bool]:
"""Check if a file exists on HF Hub, if check was not successful returns None instead of erroring.
Respect offline mode if set.
"""
exists: Optional[bool] = None
if str_to_bool(os.environ.get("HF_HUB_OFFLINE", "0")):
# user set offline mode, cannot check
return exists

try:
exists = file_exists(repo_id, filename, **kwargs)
except (HFValidationError, EntryNotFoundError):
# error, exists stays None
pass

return exists
19 changes: 14 additions & 5 deletions src/peft/utils/save_and_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,16 @@

import torch
from huggingface_hub import file_exists, hf_hub_download
from huggingface_hub.utils import EntryNotFoundError, HFValidationError
from huggingface_hub.utils import EntryNotFoundError
from safetensors.torch import load_file as safe_load_file

from .other import EMBEDDING_LAYER_NAMES, SAFETENSORS_WEIGHTS_NAME, WEIGHTS_NAME, infer_device
from .other import (
EMBEDDING_LAYER_NAMES,
SAFETENSORS_WEIGHTS_NAME,
WEIGHTS_NAME,
check_file_exists_on_hf_hub,
infer_device,
)
from .peft_types import PeftType


Expand Down Expand Up @@ -140,14 +146,17 @@ def get_peft_model_state_dict(
# we need to make sure we can download that config.
has_remote_config = False

# ensure that this check is not performed in HF offline mode, see #1452
if model_id is not None:
try:
has_remote_config = file_exists(model_id, "config.json")
except (HFValidationError, EntryNotFoundError):
exists = check_file_exists_on_hf_hub(model_id, "config.json")
if exists is None:
# check failed, could not determine if it exists or not
warnings.warn(
f"Could not find a config file in {model_id} - will assume that the vocabulary was not modified."
)
has_remote_config = False
else:
has_remote_config = exists

# check if the vocab size of the base model is different from the vocab size of the finetuned model
if (
Expand Down

0 comments on commit 6e95381

Please sign in to comment.