From c34189cbd8b0d6cc1f99c22256c76edb4770788c Mon Sep 17 00:00:00 2001
From: shanguanma <nanr9544@gmail.com>
Date: Tue, 23 Aug 2022 09:58:45 +0800
Subject: [PATCH 1/4] fixed import quantization is none

Signed-off-by: shanguanma <nanr9544@gmail.com>
---
 egs/librispeech/ASR/distillation_with_hubert.sh              | 4 ++--
 egs/librispeech/ASR/pruned_transducer_stateless6/vq_utils.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/egs/librispeech/ASR/distillation_with_hubert.sh b/egs/librispeech/ASR/distillation_with_hubert.sh
index 9c47e8eaeb..2a69d39211 100755
--- a/egs/librispeech/ASR/distillation_with_hubert.sh
+++ b/egs/librispeech/ASR/distillation_with_hubert.sh
@@ -81,9 +81,9 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ] && [ ! "$use_extracted_codebook" ==
   # or
   # pip install multi_quantization
 
-  has_quantization=$(python3 -c "import importlib; print(importlib.util.find_spec('quantization') is not None)")
+  has_quantization=$(python3 -c "import importlib; print(importlib.util.find_spec('multi_quantization') is not None)")
   if [ $has_quantization == 'False' ]; then
-    log "Please install quantization before running following stages"
+    log "Please install multi_quantization before running following stages"
     exit 1
   fi
 
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless6/vq_utils.py b/egs/librispeech/ASR/pruned_transducer_stateless6/vq_utils.py
index e3dcd039bb..65895c9208 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless6/vq_utils.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless6/vq_utils.py
@@ -28,7 +28,7 @@
 import numpy as np
 import torch
 import torch.multiprocessing as mp
-import quantization
+import multi_quantization as quantization
 
 from asr_datamodule import LibriSpeechAsrDataModule
 from hubert_xlarge import HubertXlargeFineTuned

From 92b2ded2a980680f2fd2fc849a0b3202b82e4cec Mon Sep 17 00:00:00 2001
From: shanguanma <nanr9544@gmail.com>
Date: Thu, 25 Aug 2022 09:52:49 +0800
Subject: [PATCH 2/4] fixed no cut_id error in decode_dataset

Signed-off-by: shanguanma <nanr9544@gmail.com>
---
 .../pruned_transducer_stateless6/hubert_decode.py   | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_decode.py
index 10b0e5edc1..38105f1312 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_decode.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_decode.py
@@ -81,18 +81,17 @@ def decode_dataset(
 
     results = defaultdict(list)
     for batch_idx, batch in enumerate(dl):
-
+        ## hyps is a list, every element is decode result of a sentence.
         hyps = hubert_model.ctc_greedy_search(batch)
-
+        
         texts = batch["supervisions"]["text"]
-        assert len(hyps) == len(texts)
+        cut_ids = [cut.id for cut in batch["supervisions"]["cut"]]
         this_batch = []
-
-        for hyp_text, ref_text in zip(hyps, texts):
+        assert len(hyps) == len(texts)
+        for cut_id, hyp_text, ref_text in zip(cut_ids, hyps, texts):
             ref_words = ref_text.split()
             hyp_words = hyp_text.split()
-            this_batch.append((ref_words, hyp_words))
-
+            this_batch.append((cut_id, ref_words, hyp_words))
         results["ctc_greedy_search"].extend(this_batch)
 
         num_cuts += len(texts)

From dfc45581b3ba078aaea92b38f0f9eb740a8bcdc8 Mon Sep 17 00:00:00 2001
From: shanguanma <nanr9544@gmail.com>
Date: Thu, 25 Aug 2022 10:00:19 +0800
Subject: [PATCH 3/4] fixed more than one "#"

Signed-off-by: shanguanma <nanr9544@gmail.com>
---
 .../ASR/pruned_transducer_stateless6/hubert_decode.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_decode.py
index 38105f1312..f43a23e43c 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_decode.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_decode.py
@@ -81,7 +81,7 @@ def decode_dataset(
 
     results = defaultdict(list)
     for batch_idx, batch in enumerate(dl):
-        ## hyps is a list, every element is decode result of a sentence.
+        # hyps is a list, every element is decode result of a sentence.
         hyps = hubert_model.ctc_greedy_search(batch)
         
         texts = batch["supervisions"]["text"]

From bd2b455f1204069cff814c38bfe2541dd79e7a0e Mon Sep 17 00:00:00 2001
From: shanguanma <nanr9544@gmail.com>
Date: Thu, 25 Aug 2022 10:47:48 +0800
Subject: [PATCH 4/4] fixed code style

Signed-off-by: shanguanma <nanr9544@gmail.com>
---
 .../ASR/pruned_transducer_stateless6/hubert_decode.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_decode.py
index f43a23e43c..49b5578142 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_decode.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_decode.py
@@ -83,7 +83,7 @@ def decode_dataset(
     for batch_idx, batch in enumerate(dl):
         # hyps is a list, every element is decode result of a sentence.
         hyps = hubert_model.ctc_greedy_search(batch)
-        
+
         texts = batch["supervisions"]["text"]
         cut_ids = [cut.id for cut in batch["supervisions"]["cut"]]
         this_batch = []