Skip to content

Commit

Permalink
Merge branch 'main' into meister/nfa
Browse files Browse the repository at this point in the history
  • Loading branch information
ssh-meister committed Sep 27, 2024
2 parents ff84955 + d51d8b9 commit 08fbe40
Show file tree
Hide file tree
Showing 88 changed files with 2,154 additions and 704 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/_test_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
(
set -e
docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c '${{ inputs.SCRIPT }}'
docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=64g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c '${{ inputs.SCRIPT }}'
) 2> >(tee err.log)
EXIT_CODE=$?
Expand All @@ -73,4 +73,4 @@ jobs:
- name: after_script
if: always() && inputs.AFTER_SCRIPT != ':'
run: |
docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c '${{ inputs.AFTER_SCRIPT }}'
docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=64g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c '${{ inputs.AFTER_SCRIPT }}'
4 changes: 2 additions & 2 deletions .github/workflows/cherry-pick-release-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
-H "Authorization: Bearer $GH_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/NVIDIA/NeMo/pulls \
-d $PAYLOAD)
-d "$PAYLOAD")

NEW_PR_ID=$(echo -E $NEW_PR | jq '.number')
curl -L \
Expand Down Expand Up @@ -120,7 +120,7 @@ jobs:
"type": "section",
"text": {
"type": "mrkdwn",
"text": ":alert: Cherrypick bot 🤖: Hey @'$USERNAME': Cherry-pick of <'$URL'|#'$PR_ID'> failed (3-way merge impossible). Please resolve manually and create a PR.\n\ncc: @okoenig"
"text": ":alert: Cherrypick bot 🤖: Hey <@'$USERNAME'>: Cherry-pick of <'$URL'|#'$PR_ID'> failed (3-way merge impossible). Please resolve manually and create a PR.\n\ncc: <@${{ secrets.SLACK_WEBHOOK_ADMIN }}>"
}
}
]
Expand Down
50 changes: 49 additions & 1 deletion .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5223,6 +5223,36 @@ jobs:
AFTER_SCRIPT: |
rm -rf /home/TestData/nlp/megatron_mamba/nemo-ux-mamba/cicd_test_sft/${{ github.run_id }}
L2_NeMo_2_HF_MODEL_IMPORT:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
with:
RUNNER: self-hosted-azure
SCRIPT: |
python tests/collections/llm/gpt/model/test_model_import.py
AFTER_SCRIPT: |
rm -rf ~/.cache/nemo/models
L2_NeMo_2_T5_Pretraining:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_T5_Pretraining') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: |
NVTE_FUSED_ATTN=0 NVTE_FLASH_ATTN=0 python tests/collections/llm/megatron_t5_pretraining.py \
--devices=2 \
--max-steps=3 \
--experiment-dir=tests/collections/llm/t5_pretrain_results \
--data-path=/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document \
--index-mapping-dir=tests/collections/llm/t5_index_mappings
AFTER_SCRIPT: |
rm -rf tests/collections/llm/t5_pretrain_results
rm -rf tests/collections/llm/t5_index_mappings
Nemo_CICD_Test:
needs:
- pre-flight
Expand Down Expand Up @@ -5357,8 +5387,10 @@ jobs:
#- OPTIONAL_L2_Stable_Diffusion_Training
- L2_NeMo_2_GPT_Pretraining_no_transformer_engine
- L2_NeMo_2_GPT_DDP_Param_Parity_check
- L2_NeMo_2_HF_MODEL_IMPORT
- L2_NeMo_2_SSM_Pretraining
- L2_NeMo_2_SSM_Finetuning
- L2_NeMo_2_T5_Pretraining
if: always()
runs-on: ubuntu-latest
steps:
Expand All @@ -5377,6 +5409,23 @@ jobs:
- if: ${{ always() && steps.pipeline-conclusion.outputs.SUCCESS == 'true' }}
run: exit 0

- if: ${{ always() && steps.pipeline-conclusion.outputs.SUCCESS == 'true' && github.event_name == 'pull_request' }}
uses: peter-evans/create-or-update-comment@v4
with:
issue-number: ${{ github.event.number }}
body: |
[🤖]: Hi @${{ github.event.pull_request.user.login }} 👋,
I just wanted to let you know that, you know, a [CICD pipeline](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) for this PR just finished successfully ✨
So it might be time to merge this PR or like to get some approvals 🚀
But I'm just a 🤖 so I'll leave it you what to do next.
Have a great day!
//cc @ko3n1g
- if: ${{ always() && steps.pipeline-conclusion.outputs.FAILED == 'true' }}
name: Checkout repository
uses: actions/checkout@v4
Expand Down Expand Up @@ -5452,4 +5501,3 @@ jobs:
- if: ${{ always() && steps.pipeline-conclusion.outputs.SUCCESS == 'false' }}
run: |
exit 1
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@

<details open>
<summary><b>Speech Recognition</b></summary>
<details>
<summary>
<a href="https://developer.nvidia.com/blog/accelerating-leaderboard-topping-asr-models-10x-with-nvidia-nemo/">
Accelerating Leaderboard-Topping ASR Models 10x with NVIDIA NeMo
</a> (2024/09/24)
</summary>
NVIDIA NeMo team released a number of inference optimizations for CTC, RNN-T, and TDT models that resulted in up to 10x inference speed-up.
These models now exceed an inverse real-time factor (RTFx) of 2,000, with some reaching RTFx of even 6,000.
<br><br>
</details>
<details>
<summary>
<a href="https://developer.nvidia.com/blog/new-standard-for-speech-recognition-and-translation-from-the-nvidia-nemo-canary-model/">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def perform_streaming(
# would pass the whole audio at once through the model like offline mode in order to compare the results with the stremaing mode
# the output of the model in the offline and streaming mode should be exactly the same
with torch.inference_mode():
with autocast():
with autocast:
processed_signal, processed_signal_length = streaming_buffer.get_all_audios()
with torch.no_grad():
(
Expand Down Expand Up @@ -156,7 +156,7 @@ def perform_streaming(
pred_out_stream = None
for step_num, (chunk_audio, chunk_lengths) in enumerate(streaming_buffer_iter):
with torch.inference_mode():
with autocast():
with autocast:
# keep_all_outputs needs to be True for the last step of streaming when model is trained with att_context_style=regular
# otherwise the last outputs would get dropped

Expand Down Expand Up @@ -313,19 +313,7 @@ def main():
raise ValueError("Model does not support multiple lookaheads.")

global autocast
if (
args.use_amp
and torch.cuda.is_available()
and hasattr(torch.cuda, 'amp')
and hasattr(torch.cuda.amp, 'autocast')
):
logging.info("AMP enabled!\n")
autocast = torch.cuda.amp.autocast
else:

@contextlib.contextmanager
def autocast():
yield
autocast = torch.amp.autocast(asr_model.device.type, enabled=args.use_amp)

# configure the decoding config
decoding_cfg = asr_model.cfg.decoding
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,16 +170,6 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
# Disable config overwriting
OmegaConf.set_struct(model_cfg.preprocessor, True)

# setup AMP (optional)
if cfg.amp and torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and hasattr(torch.cuda.amp, 'autocast'):
logging.info("AMP enabled!\n")
autocast = torch.cuda.amp.autocast
else:

@contextlib.contextmanager
def autocast(*args, **kwargs):
yield

# Compute output filename
cfg = compute_output_filename(cfg, model_name)

Expand Down Expand Up @@ -208,7 +198,7 @@ def autocast(*args, **kwargs):

amp_dtype = torch.float16 if cfg.amp_dtype == "float16" else torch.bfloat16

with autocast(dtype=amp_dtype):
with torch.amp.autocast(asr_model.device.type, enabled=cfg.amp, dtype=amp_dtype):
with torch.no_grad():
hyps = get_buffered_pred_feat_multitaskAED(
frame_asr,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,9 @@ class TranscriptionConfig:
# Chunked configs
chunk_len_in_secs: float = 1.6 # Chunk length in seconds
total_buffer_in_secs: float = 4.0 # Length of buffer (chunk + left and right padding) in seconds
model_stride: int = 8 # Model downsampling factor, 8 for Citrinet and FasConformer models and 4 for Conformer models.
model_stride: int = (
8 # Model downsampling factor, 8 for Citrinet and FasConformer models and 4 for Conformer models.
)

# Decoding strategy for CTC models
decoding: CTCDecodingConfig = CTCDecodingConfig()
Expand Down Expand Up @@ -163,16 +165,6 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
# Disable config overwriting
OmegaConf.set_struct(model_cfg.preprocessor, True)

# setup AMP (optional)
if cfg.amp and torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and hasattr(torch.cuda.amp, 'autocast'):
logging.info("AMP enabled!\n")
autocast = torch.cuda.amp.autocast
else:

@contextlib.contextmanager
def autocast():
yield

# Compute output filename
cfg = compute_output_filename(cfg, model_name)

Expand Down Expand Up @@ -214,20 +206,24 @@ def autocast():
logging.info(f"tokens_per_chunk is {tokens_per_chunk}, mid_delay is {mid_delay}")

frame_asr = FrameBatchASR(
asr_model=asr_model, frame_len=chunk_len, total_buffer=cfg.total_buffer_in_secs, batch_size=cfg.batch_size,
asr_model=asr_model,
frame_len=chunk_len,
total_buffer=cfg.total_buffer_in_secs,
batch_size=cfg.batch_size,
)

hyps = get_buffered_pred_feat(
frame_asr,
chunk_len,
tokens_per_chunk,
mid_delay,
model_cfg.preprocessor,
model_stride_in_secs,
asr_model.device,
manifest,
filepaths,
)
with torch.amp.autocast(asr_model.device.type, enabled=cfg.amp):
hyps = get_buffered_pred_feat(
frame_asr,
chunk_len,
tokens_per_chunk,
mid_delay,
model_cfg.preprocessor,
model_stride_in_secs,
asr_model.device,
manifest,
filepaths,
)
output_filename, pred_text_attr_name = write_transcription(
hyps, cfg, model_name, filepaths=filepaths, compute_langs=False, compute_timestamps=False
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@
from nemo.core.config import hydra_runner
from nemo.utils import logging

can_gpu = torch.cuda.is_available()


@dataclass
class TranscriptionConfig:
Expand All @@ -112,7 +110,9 @@ class TranscriptionConfig:
# Chunked configs
chunk_len_in_secs: float = 1.6 # Chunk length in seconds
total_buffer_in_secs: float = 4.0 # Length of buffer (chunk + left and right padding) in seconds
model_stride: int = 8 # Model downsampling factor, 8 for Citrinet and FastConformer models and 4 for Conformer models.
model_stride: int = (
8 # Model downsampling factor, 8 for Citrinet and FastConformer models and 4 for Conformer models.
)

# Set `cuda` to int to define CUDA device. If 'None', will look for CUDA
# device anyway, and do inference on CPU only if CUDA device is not found.
Expand Down Expand Up @@ -274,6 +274,7 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
batch_size=cfg.batch_size,
manifest=manifest,
filepaths=filepaths,
accelerator=accelerator,
)

output_filename, pred_text_attr_name = write_transcription(
Expand Down
Loading

0 comments on commit 08fbe40

Please sign in to comment.