Skip to content

Commit

Permalink
update users/raissi
Browse files Browse the repository at this point in the history
  • Loading branch information
Marvin84 committed Dec 11, 2024
1 parent 180646d commit aae42ae
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 6 deletions.
10 changes: 8 additions & 2 deletions users/raissi/experiments/domain_mismtach/medline/base_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import i6_core.corpus as corpus_recipes
import i6_core.meta as meta
from i6_core.audio import BlissChangeEncodingJob
from i6_core.lexicon import MergeLexiconJob

from i6_experiments.common.baselines.librispeech.data import CorpusData
from i6_experiments.common.datasets.librispeech import (
Expand Down Expand Up @@ -143,7 +144,7 @@ class DATASET:
),
corpus=wmt22_medline_noise07,
lm=tk.Path(f"{PREPATH_ASR3}/lm/v2/only_medline/ufal_v1_lm_3more.gz", cached=True, hash_overwrite="v22_lm"),
description="lexicon uses both LBS and medline data with words repeating 3 or more",
description="lexicon uses only medline data with words repeating 3 or more",
),
0.3: DATASET(
lexicon_with_unk=tk.Path(
Expand All @@ -158,7 +159,7 @@ class DATASET:
),
corpus=wmt22_medline_noise03,
lm=tk.Path(f"{PREPATH_ASR3}/lm/v2/only_medline/ufal_v1_lm_3more.gz", cached=True, hash_overwrite="v22_lm"),
description="lexicon uses both LBS and medline data with words repeating 3 or more",
description="lexicon uses only medline data with words repeating 3 or more",
),
}

Expand Down Expand Up @@ -295,6 +296,11 @@ def get_corpus_data_inputs(
if add_unknown_for_medline_lex
else MEDLINE_DATA["dev"][version][noise].lexicon_no_unk
)
if version > 1:
seed_lexicon = "seed_withunk.xml.gz" if add_unknown_for_medline_lex else "seed_nounk.xml.gz"
seed_lexicon_path = tk.Path(("/").join([f"{PREPATH_ASR3}", f"lexicon/seed_lbs_lexicon_nolemmata/{seed_lexicon}"]), hash_overwrite=f"seed_{seed_lexicon}")
med_lex = MergeLexiconJob([seed_lexicon_path, med_lex]).out_bliss_lexicon

oov_lexicon_medline = {
"filename": med_lex,
"normalize_pronunciation": False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(
gpu=gpu,
)
self.trafo_lm_config = self.get_eugen_trafo_with_quant_and_compress_config()
self.lstm_lm_config = self.get_kazuki_lstm_config()
#self.lstm_lm_config = self.get_kazuki_lstm_config()


def get_kazuki_lstm_config(
Expand All @@ -88,14 +88,13 @@ def get_kazuki_lstm_config(
) -> rasr.RasrConfig:

assert self.library_path is not None
dependency_path = Path("/work/asr4/raissi/setups/librispeech/960-ls/dependencies/trafo-lm_kazuki/IS2019", hash_overwrite="LBS_LM_KAZUKI")

trafo_config = rasr.RasrConfig()

#model and graph info
trafo_config.loader.type = "meta"
trafo_config.loader.meta_graph_file = tk.Path("/u/rossenbach/experiments/asru_ls100_full_context_transducer/work/crnn/compile/CompileTFGraphJob.0dxq1DSvOxuN/output/graph.meta", cached=True)
trafo_config.loader.saved_model_file = DelayedFormat("/u/zhou/asr-exps/librispeech/dependencies/kazuki_lstmlm_27062019/network.040")
trafo_config.loader.meta_graph_file = tk.Path("/u/raissi/Desktop/debug/lm/graph.meta", cached=True)
trafo_config.loader.saved_model_file = DelayedFormat("/work/asr4/rossenbach/custom_projects/kazuki_replicate_lm_training/net-model/network.029")
trafo_config.loader.required_libraries = self.library_path

trafo_config.type = "tfrnn"
Expand Down

0 comments on commit aae42ae

Please sign in to comment.