Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into chat_templates
Browse files Browse the repository at this point in the history
  • Loading branch information
zucchini-nlp committed Jul 16, 2024
2 parents 7c215bd + 6fbea6d commit 78a5876
Show file tree
Hide file tree
Showing 18 changed files with 148 additions and 183 deletions.
19 changes: 16 additions & 3 deletions .github/workflows/build-ci-docker-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ jobs:
strategy:
matrix:
file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "torch-jax-light", "jax-light", "examples-torch", "examples-tf"]
continue-on-error: true
continue-on-error: true

steps:
-
-
name: Set tag
run: |
if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then
Expand Down Expand Up @@ -61,4 +61,17 @@ jobs:
REF=${{ github.sha }}
file: "./docker/${{ matrix.file }}.dockerfile"
push: ${{ contains(github.event.head_commit.message, 'ci-image]') || github.event_name == 'schedule' }}
tags: ${{ env.TAG }}
tags: ${{ env.TAG }}

notify:
runs-on: ubuntu-22.04
if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }}
steps:
- name: Post to Slack
if: ${{ contains(github.event.head_commit.message, '[push-ci-image]') && github.event_name != 'schedule' }}
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: "#transformers-ci-circleci-images"
title: 🤗 New docker images for CircleCI are pushed.
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
2 changes: 1 addition & 1 deletion src/transformers/commands/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def run(self):
print("Abort")
exit()
try:
url = create_repo(token, name=self.args.name, organization=self.args.organization)
url = create_repo(repo_id=full_name, token=token)
except HTTPError as e:
print(e)
print(ANSI.red(e.response.text))
Expand Down
22 changes: 11 additions & 11 deletions src/transformers/generation/configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,10 @@ class GenerationConfig(PushToHubMixin):
heuristic is applied and the generation stops when is it very unlikely to find better candidates;
`"never"`, where the beam search procedure only stops when there cannot be better candidates (canonical
beam search algorithm).
max_time(`float`, *optional*):
max_time (`float`, *optional*):
The maximum amount of time you allow the computation to run for in seconds. generation will still finish
the current pass after allocated time has been passed.
stop_strings(`str or List[str]`, *optional*):
stop_strings (`str or List[str]`, *optional*):
A string or a list of strings that should terminate generation if the model outputs them.
> Parameters that control the generation strategy used
Expand Down Expand Up @@ -181,10 +181,10 @@ class GenerationConfig(PushToHubMixin):
`length_penalty` < 0.0 encourages shorter sequences.
no_repeat_ngram_size (`int`, *optional*, defaults to 0):
If set to int > 0, all ngrams of that size can only occur once.
bad_words_ids(`List[List[int]]`, *optional*):
bad_words_ids (`List[List[int]]`, *optional*):
List of list of token ids that are not allowed to be generated. Check
[`~generation.NoBadWordsLogitsProcessor`] for further documentation and examples.
force_words_ids(`List[List[int]]` or `List[List[List[int]]]`, *optional*):
force_words_ids (`List[List[int]]` or `List[List[List[int]]]`, *optional*):
List of token ids that must be generated. If given a `List[List[int]]`, this is treated as a simple list of
words that must be included, the opposite to `bad_words_ids`. If given `List[List[List[int]]]`, this
triggers a [disjunctive constraint](https://github.com/huggingface/transformers/issues/14081), where one
Expand All @@ -200,7 +200,7 @@ class GenerationConfig(PushToHubMixin):
The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for
multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be the target
language token.
forced_eos_token_id (`Union[int, List[int]]`, *optional*, defaults to `model.config.forced_eos_token_id`):
forced_eos_token_id (`int` or List[int]`, *optional*, defaults to `model.config.forced_eos_token_id`):
The id of the token to force as the last generated token when `max_length` is reached. Optionally, use a
list to set multiple *end-of-sequence* tokens.
remove_invalid_values (`bool`, *optional*, defaults to `model.config.remove_invalid_values`):
Expand All @@ -210,7 +210,7 @@ class GenerationConfig(PushToHubMixin):
This Tuple adds an exponentially increasing length penalty, after a certain amount of tokens have been
generated. The tuple shall consist of: `(start_index, decay_factor)` where `start_index` indicates where
penalty starts and `decay_factor` represents the factor of exponential decay
suppress_tokens (`List[int]`, *optional*):
suppress_tokens (`List[int]`, *optional*):
A list of tokens that will be suppressed at generation. The `SupressTokens` logit processor will set their
log probs to `-inf` so that they are not sampled.
begin_suppress_tokens (`List[int]`, *optional*):
Expand All @@ -234,7 +234,7 @@ class GenerationConfig(PushToHubMixin):
low_memory (`bool`, *optional*):
Switch to sequential beam search and sequential topk for contrastive search to reduce peak memory.
Used with beam search and contrastive search.
watermarking_config (Union[`WatermarkingConfig`, `dict`], *optional*):
watermarking_config (`WatermarkingConfig` or `dict`, *optional*):
Arguments used to watermark the model outputs by adding a small bias to randomly selected set of "green" tokens.
If passed as `Dict`, it will be converted to a `WatermarkingConfig` internally.
See [this paper](https://arxiv.org/abs/2306.04634) for more details. Accepts the following keys:
Expand All @@ -249,12 +249,12 @@ class GenerationConfig(PushToHubMixin):
- "lefthash" (default): "green" tokens selection depend on the last token (Algorithm 2 from the paper)
- "selfhash": "green" tokens selection depends on the current token itself (Algorithm 3 from the paper)
The downside of this scheme is that it considers all possible next tokens and can be slower than "lefthash".
- context_width(`int`):
- context_width (`int`):
The context length of previous tokens to use in seeding. Higher context length makes watermarking more robust.
> Parameters that define the output variables of generate
num_return_sequences(`int`, *optional*, defaults to 1):
num_return_sequences (`int`, *optional*, defaults to 1):
The number of independently computed returned sequences for each element in the batch.
output_attentions (`bool`, *optional*, defaults to `False`):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
Expand Down Expand Up @@ -284,7 +284,7 @@ class GenerationConfig(PushToHubMixin):
encoder_no_repeat_ngram_size (`int`, *optional*, defaults to 0):
If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the
`decoder_input_ids`.
decoder_start_token_id (`Union[int, List[int]]`, *optional*):
decoder_start_token_id (`int` or `List[int]`, *optional*):
If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token or a list of length
`batch_size`. Indicating a list enables different start ids for each element in the batch
(e.g. multilingual models with different target languages in one batch)
Expand Down Expand Up @@ -323,7 +323,7 @@ class GenerationConfig(PushToHubMixin):
cache_implementation (`str`, *optional*, default to `None`):
Cache class that should be used when generating.
cache_config (`Union[CacheConfig, dict]`, *optional*, default to `None`):
cache_config (`CacheConfig` or `dict`, *optional*, default to `None`):
Arguments used in the key-value cache class can be passed in `cache_config`. Can be passed as a `Dict` and
it will be converted to its repsective `CacheConfig` internally.
Otherwise can be passed as a `CacheConfig` class matching the indicated `cache_implementation`.
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2219,7 +2219,7 @@ def _dola_decoding(
generation_config: GenerationConfig,
synced_gpus: bool,
streamer: "BaseStreamer",
logits_warper: LogitsProcessorList,
logits_warper: Optional[LogitsProcessorList],
**model_kwargs,
) -> Union[GenerateNonBeamOutput, torch.LongTensor]:
r"""
Expand Down Expand Up @@ -2826,7 +2826,7 @@ def _sample(
generation_config: GenerationConfig,
synced_gpus: bool,
streamer: Optional["BaseStreamer"],
logits_warper: LogitsProcessorList,
logits_warper: Optional[LogitsProcessorList],
**model_kwargs,
) -> Union[GenerateNonBeamOutput, torch.LongTensor]:
r"""
Expand Down Expand Up @@ -3033,7 +3033,7 @@ def _beam_search(
stopping_criteria: StoppingCriteriaList,
generation_config: GenerationConfig,
synced_gpus: bool,
logits_warper: LogitsProcessorList,
logits_warper: Optional[LogitsProcessorList],
**model_kwargs,
) -> Union[GenerateBeamOutput, torch.LongTensor]:
r"""
Expand Down
80 changes: 17 additions & 63 deletions src/transformers/models/musicgen/modeling_musicgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from torch.nn import CrossEntropyLoss

from ...activations import ACT2FN
from ...generation.configuration_utils import GenerationConfig
from ...generation.configuration_utils import GenerationConfig, GenerationMode
from ...generation.logits_process import ClassifierFreeGuidanceLogitsProcessor, LogitsProcessorList
from ...generation.stopping_criteria import StoppingCriteriaList
from ...modeling_attn_mask_utils import (
Expand Down Expand Up @@ -1618,16 +1618,7 @@ def generate(
model_kwargs["delay_pattern_mask"] = delay_pattern_mask

# 7. determine generation mode
is_greedy_gen_mode = (
(generation_config.num_beams == 1)
and (generation_config.num_beam_groups == 1)
and generation_config.do_sample is False
)
is_sample_gen_mode = (
(generation_config.num_beams == 1)
and (generation_config.num_beam_groups == 1)
and generation_config.do_sample is True
)
generation_mode = generation_config.get_generation_mode()

# 8. prepare batched CFG externally (to enable coexistance with the unbatched CFG)
if generation_config.guidance_scale is not None and generation_config.guidance_scale > 1:
Expand All @@ -1649,27 +1640,13 @@ def generate(
generation_config=generation_config, stopping_criteria=stopping_criteria
)

if is_greedy_gen_mode:
if generation_config.num_return_sequences > 1:
raise ValueError(
"num_return_sequences has to be 1 when doing greedy search, "
f"but is {generation_config.num_return_sequences}."
)

# 11. run greedy search
outputs = self._sample(
input_ids,
logits_processor=logits_processor,
stopping_criteria=stopping_criteria,
generation_config=generation_config,
synced_gpus=synced_gpus,
streamer=streamer,
**model_kwargs,
)

elif is_sample_gen_mode:
if generation_mode in (GenerationMode.SAMPLE, GenerationMode.GREEDY_SEARCH):
# 11. prepare logits warper
logits_warper = self._get_logits_warper(generation_config, device=input_ids.device)
prepared_logits_warper = (
self._get_logits_warper(generation_config, device=input_ids.device)
if generation_config.do_sample
else None
)

# expand input_ids with `num_return_sequences` additional sequences per batch
input_ids, model_kwargs = self._expand_inputs_for_generation(
Expand All @@ -1682,7 +1659,7 @@ def generate(
outputs = self._sample(
input_ids,
logits_processor=logits_processor,
logits_warper=logits_warper,
logits_warper=prepared_logits_warper,
stopping_criteria=stopping_criteria,
generation_config=generation_config,
synced_gpus=synced_gpus,
Expand Down Expand Up @@ -2714,16 +2691,7 @@ def generate(
streamer.put(input_ids.cpu())

# 7. determine generation mode
is_greedy_gen_mode = (
(generation_config.num_beams == 1)
and (generation_config.num_beam_groups == 1)
and generation_config.do_sample is False
)
is_sample_gen_mode = (
(generation_config.num_beams == 1)
and (generation_config.num_beam_groups == 1)
and generation_config.do_sample is True
)
generation_mode = generation_config.get_generation_mode()

# 8. prepare batched CFG externally (to enable coexistance with the unbatched CFG)
if generation_config.guidance_scale is not None and generation_config.guidance_scale > 1:
Expand All @@ -2745,27 +2713,13 @@ def generate(
generation_config=generation_config, stopping_criteria=stopping_criteria
)

if is_greedy_gen_mode:
if generation_config.num_return_sequences > 1:
raise ValueError(
"num_return_sequences has to be 1 when doing greedy search, "
f"but is {generation_config.num_return_sequences}."
)

# 11. run greedy search
outputs = self._sample(
input_ids,
logits_processor=logits_processor,
stopping_criteria=stopping_criteria,
generation_config=generation_config,
synced_gpus=synced_gpus,
streamer=streamer,
**model_kwargs,
)

elif is_sample_gen_mode:
if generation_mode in (GenerationMode.SAMPLE, GenerationMode.GREEDY_SEARCH):
# 11. prepare logits warper
logits_warper = self._get_logits_warper(generation_config, device=input_ids.device)
prepared_logits_warper = (
self._get_logits_warper(generation_config, device=input_ids.device)
if generation_config.do_sample
else None
)

# expand input_ids with `num_return_sequences` additional sequences per batch
input_ids, model_kwargs = self._expand_inputs_for_generation(
Expand All @@ -2779,7 +2733,7 @@ def generate(
outputs = self._sample(
input_ids,
logits_processor=logits_processor,
logits_warper=logits_warper,
logits_warper=prepared_logits_warper,
stopping_criteria=stopping_criteria,
generation_config=generation_config,
synced_gpus=synced_gpus,
Expand Down
Loading

0 comments on commit 78a5876

Please sign in to comment.