From 519947a61a435edc9604dd819fa4f9a5525dbe84 Mon Sep 17 00:00:00 2001 From: shcxlee Date: Sat, 1 Oct 2022 11:48:22 -0500 Subject: [PATCH 1/6] Modified pruned_transducer_stateless/train.py --- egs/tedlium3/ASR/pruned_transducer_stateless/train.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/egs/tedlium3/ASR/pruned_transducer_stateless/train.py b/egs/tedlium3/ASR/pruned_transducer_stateless/train.py index b6fc9a9265..b004785693 100755 --- a/egs/tedlium3/ASR/pruned_transducer_stateless/train.py +++ b/egs/tedlium3/ASR/pruned_transducer_stateless/train.py @@ -662,14 +662,6 @@ def remove_short_and_long_utt(c: Cut): train_cuts = train_cuts.filter(remove_short_and_long_utt) - num_left = len(train_cuts) - num_removed = num_in_total - num_left - removed_percent = num_removed / num_in_total * 100 - - logging.info(f"Before removing short and long utterances: {num_in_total}") - logging.info(f"After removing short and long utterances: {num_left}") - logging.info(f"Removed {num_removed} utterances ({removed_percent:.5f}%)") - train_dl = tedlium.train_dataloaders(train_cuts) valid_cuts = tedlium.dev_cuts() valid_dl = tedlium.valid_dataloaders(valid_cuts) From 3ebe6fc8acbf488a8a29c71edb97a7ef9a196585 Mon Sep 17 00:00:00 2001 From: shcxlee Date: Sat, 1 Oct 2022 22:57:26 -0500 Subject: [PATCH 2/6] Modified transducer_stateless/train.py --- egs/tedlium3/ASR/transducer_stateless/train.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/egs/tedlium3/ASR/transducer_stateless/train.py b/egs/tedlium3/ASR/transducer_stateless/train.py index dda6108c5e..6851a38f68 100755 --- a/egs/tedlium3/ASR/transducer_stateless/train.py +++ b/egs/tedlium3/ASR/transducer_stateless/train.py @@ -631,14 +631,6 @@ def remove_short_and_long_utt(c: Cut): train_cuts = train_cuts.filter(remove_short_and_long_utt) - num_left = len(train_cuts) - num_removed = num_in_total - num_left - removed_percent = num_removed / num_in_total * 100 - - logging.info(f"Before removing short and long utterances: {num_in_total}") - logging.info(f"After removing short and long utterances: {num_left}") - logging.info(f"Removed {num_removed} utterances ({removed_percent:.5f}%)") - train_dl = tedlium.train_dataloaders(train_cuts) valid_cuts = tedlium.dev_cuts() valid_dl = tedlium.valid_dataloaders(valid_cuts) From 26142356e37784f5ade847f45b567b66bedefd71 Mon Sep 17 00:00:00 2001 From: shcxlee Date: Sat, 1 Oct 2022 23:25:32 -0500 Subject: [PATCH 3/6] Deleted variable 'num_in_total' --- egs/tedlium3/ASR/transducer_stateless/train.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/egs/tedlium3/ASR/transducer_stateless/train.py b/egs/tedlium3/ASR/transducer_stateless/train.py index 6851a38f68..09cbf4a004 100755 --- a/egs/tedlium3/ASR/transducer_stateless/train.py +++ b/egs/tedlium3/ASR/transducer_stateless/train.py @@ -627,8 +627,6 @@ def remove_short_and_long_utt(c: Cut): # Keep only utterances with duration between 1 second and 17 seconds return 1.0 <= c.duration <= 17.0 - num_in_total = len(train_cuts) - train_cuts = train_cuts.filter(remove_short_and_long_utt) train_dl = tedlium.train_dataloaders(train_cuts) From 966b7e482edce1d68140489137584e97ce06aec5 Mon Sep 17 00:00:00 2001 From: shcxlee Date: Sat, 1 Oct 2022 23:25:39 -0500 Subject: [PATCH 4/6] Deleted variable 'num_in_total' --- egs/tedlium3/ASR/pruned_transducer_stateless/train.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/egs/tedlium3/ASR/pruned_transducer_stateless/train.py b/egs/tedlium3/ASR/pruned_transducer_stateless/train.py index b004785693..8d5cdf6831 100755 --- a/egs/tedlium3/ASR/pruned_transducer_stateless/train.py +++ b/egs/tedlium3/ASR/pruned_transducer_stateless/train.py @@ -658,8 +658,6 @@ def remove_short_and_long_utt(c: Cut): # Keep only utterances with duration between 1 second and 17 seconds return 1.0 <= c.duration <= 17.0 - num_in_total = len(train_cuts) - train_cuts = train_cuts.filter(remove_short_and_long_utt) train_dl = tedlium.train_dataloaders(train_cuts) From 70343b32dd93f1d990700fbc37a52ace3d88b0a4 Mon Sep 17 00:00:00 2001 From: shcxlee Date: Sat, 1 Oct 2022 23:48:20 -0500 Subject: [PATCH 5/6] Merged latest master --- egs/tedlium3/ASR/pruned_transducer_stateless/train.py | 1 + 1 file changed, 1 insertion(+) diff --git a/egs/tedlium3/ASR/pruned_transducer_stateless/train.py b/egs/tedlium3/ASR/pruned_transducer_stateless/train.py index 8d5cdf6831..3ea4998705 100755 --- a/egs/tedlium3/ASR/pruned_transducer_stateless/train.py +++ b/egs/tedlium3/ASR/pruned_transducer_stateless/train.py @@ -1,3 +1,4 @@ +# #!/usr/bin/env python3 # Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang, # Wei Kang From e7b2b2e4f7b8b008300f95da0932be11382f49d9 Mon Sep 17 00:00:00 2001 From: shcxlee Date: Sat, 1 Oct 2022 23:48:32 -0500 Subject: [PATCH 6/6] Merged latest master --- egs/tedlium3/ASR/pruned_transducer_stateless/train.py | 1 - 1 file changed, 1 deletion(-) diff --git a/egs/tedlium3/ASR/pruned_transducer_stateless/train.py b/egs/tedlium3/ASR/pruned_transducer_stateless/train.py index 3ea4998705..8d5cdf6831 100755 --- a/egs/tedlium3/ASR/pruned_transducer_stateless/train.py +++ b/egs/tedlium3/ASR/pruned_transducer_stateless/train.py @@ -1,4 +1,3 @@ -# #!/usr/bin/env python3 # Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang, # Wei Kang