diff --git a/egs/aishell/asr/simple_v1/mmi_bigram_train.py b/egs/aishell/asr/simple_v1/mmi_bigram_train.py
index 2ff9342d..b5371018 100644
--- a/egs/aishell/asr/simple_v1/mmi_bigram_train.py
+++ b/egs/aishell/asr/simple_v1/mmi_bigram_train.py
@@ -1,3 +1,4 @@
+  
 #!/usr/bin/env python3
 # Copyright (c)  2020  Xiaomi Corporation (authors: Daniel Povey, Haowen Qiu, Mingshuang Luo)
 #                2021  Pingfeng Luo
diff --git a/egs/timit/asr/simple_v1/RESULTS.md b/egs/timit/asr/simple_v1/RESULTS.md
new file mode 100644
index 00000000..066bcf95
--- /dev/null
+++ b/egs/timit/asr/simple_v1/RESULTS.md
@@ -0,0 +1,65 @@
+# TIMIT CTC Training Results
+
+## 2021-09-03
+(Mingshuang Luo):
+
+### TIMIT CTC_Train Based on 48 phones
+
+Testing results based on different training epochs:
+```
+epoch=20
+2021-09-03 10:54:10,903 INFO [ctc_decode.py:188] %PER 30.34% [2225 / 7333, 293 ins, 441 del, 1491 sub ]
+
+epoch=30
+2021-09-03 10:59:10,147 INFO [ctc_decode.py:188] %PER 29.77% [2183 / 7333, 221 ins, 473 del, 1489 sub ]
+
+epoch=35
+2021-09-03 11:11:00,885 INFO [ctc_decode.py:188] %PER 28.94% [2122 / 7333, 266 ins, 397 del, 1459 sub ]
+
+epoch=40
+2021-09-03 11:12:39,029 INFO [ctc_decode.py:188] %PER 29.52% [2165 / 7333, 304 ins, 348 del, 1513 sub ]
+```
+
+### TIMIT CTC_Train Based on 39 phones
+
+Testing results based on different training epochs:
+```
+epoch=40
+2021-09-13 11:02:14,793 INFO [ctc_decode.py:189] %PER 25.61% [1848 / 7215, 301 ins, 396 del, 1151 sub ]
+
+epoch=45
+2021-09-13 11:01:20,787 INFO [ctc_decode.py:189] %PER 25.50% [1840 / 7215, 286 ins, 386 del, 1168 sub ]
+
+epoch=47
+2021-09-13 11:04:05,533 INFO [ctc_decode.py:189] %PER 26.20% [1890 / 7215, 373 ins, 367 del, 1150 sub ]
+
+```
+### TIMIT CTC_TRAIN_with_CRDNN Based on 48 phones
+
+Testing results based on different training epochs:
+```
+epoch=35
+2021-09-13 11:21:01,592 INFO [ctc_crdnn_decode.py:201] %PER 20.46% [1476 / 7215, 249 ins, 356 del, 871 sub ]
+
+epoch=45
+2021-09-13 11:22:02,221 INFO [ctc_crdnn_decode.py:201] %PER 19.75% [1425 / 7215, 239 ins, 324 del, 862 sub ]
+
+epoch=53
+2021-09-13 11:23:07,969 INFO [ctc_crdnn_decode.py:201] %PER 18.86% [1361 / 7215, 214 ins, 320 del, 827 sub ]
+
+```
+
+### TIMIT CTC_TRAIN_with_CRDNN Based on 39 phones
+
+Testing results based on different training epochs:
+```
+epoch=26
+2021-09-13 11:32:41,388 INFO [ctc_crdnn_decode.py:201] %PER 21.04% [1518 / 7215, 345 ins, 251 del, 922 sub ]
+
+epoch=45
+2021-09-13 11:34:27,566 INFO [ctc_crdnn_decode.py:201] %PER 18.74% [1352 / 7215, 316 ins, 239 del, 797 sub ]
+
+epoch=55
+2021-09-13 11:35:55,751 INFO [ctc_crdnn_decode.py:201] %PER 18.24% [1316 / 7215, 267 ins, 242 del, 807 sub ]
+
+```
\ No newline at end of file
diff --git a/egs/timit/asr/simple_v1/ctc_crdnn_decode.py b/egs/timit/asr/simple_v1/ctc_crdnn_decode.py
new file mode 100644
index 00000000..2a981f78
--- /dev/null
+++ b/egs/timit/asr/simple_v1/ctc_crdnn_decode.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python3
+
+# Copyright (c)  2020  Xiaomi Corporation (authors: Daniel Povey, Haowen Qiu)
+#                2021  Xiaomi Corporation (authors: Mingshuang Luo)
+# Apache 2.0
+
+# Notice: before you run this script, you should install speechbrain first.
+# You can install speechbrain by "pip install speechbrain".
+
+import k2
+import logging
+import os
+import torch
+import torch.nn as nn
+
+from k2 import Fsa, SymbolTable
+from kaldialign import edit_distance
+from pathlib import Path
+from typing import Union
+
+from lhotse import CutSet
+from lhotse.dataset import K2SpeechRecognitionDataset
+from lhotse.dataset import SingleCutSampler
+from snowfall.common import find_first_disambig_symbol
+from snowfall.common import get_phone_symbols
+from snowfall.common import get_texts
+from snowfall.common import load_checkpoint
+from snowfall.common import setup_logger
+from snowfall.decoding.graph import compile_HLG
+from snowfall.models import AcousticModel
+from snowfall.training.ctc_graph import build_ctc_topo
+
+import sys
+import argparse
+
+from speechbrain.lobes.models.CRDNN import CRDNN
+from speechbrain.nnet.linear import Linear
+
+class crdnn_model(nn.Module):
+    def __init__(self, num_features:int, 
+                       num_classes:int, 
+                       subsampling_factor: int, 
+                       crdnn, 
+                       linear):
+
+        super(crdnn_model, self).__init__()
+        self.num_features = num_features
+        self.num_classes = num_classes
+        self.subsampling_factor = subsampling_factor
+
+        self.crdnn = crdnn
+        self.linear=linear
+
+    def forward(self, x):
+        x = self.crdnn(x)
+        x = self.linear(x)
+        x = x.transpose(1,2)
+        x = nn.functional.log_softmax(x, dim=1)
+
+        return x
+
+def decode(dataloader: torch.utils.data.DataLoader, model: AcousticModel,
+           device: Union[str, torch.device], HLG: Fsa, symbols: SymbolTable):
+    tot_num_cuts = len(dataloader)
+    num_cuts = 0
+    results = []  # a list of pair (ref_words, hyp_words)
+    for batch_idx, batch in enumerate(dataloader):
+        
+        feature = batch['inputs']
+        supervisions = batch['supervisions']
+        supervision_segments = torch.stack(
+            (supervisions['sequence_idx'],
+             torch.floor_divide(supervisions['start_frame'],
+                                model.subsampling_factor),
+             torch.floor_divide(supervisions['num_frames'],
+                                model.subsampling_factor)), 1).to(torch.int32)
+        indices = torch.argsort(supervision_segments[:, 2], descending=True)
+        supervision_segments = supervision_segments[indices]
+        texts = supervisions['text']
+        assert feature.ndim == 3
+
+        feature = feature.to(device)
+        # at entry, feature is [N, T, C]
+        #feature = feature.permute(0, 2, 1)  # now feature is [N, C, T]
+        with torch.no_grad():
+            nnet_output = model(feature)
+        # nnet_output is [N, C, T]
+        nnet_output = nnet_output.permute(0, 2,
+                                          1)  # now nnet_output is [N, T, C]
+
+        blank_bias = -3.0
+        nnet_output[:, :, 0] += blank_bias
+        
+        dense_fsa_vec = k2.DenseFsaVec(nnet_output, supervision_segments)
+        # assert HLG.is_cuda()
+        assert HLG.device == nnet_output.device, \
+            f"Check failed: HLG.device ({HLG.device}) == nnet_output.device ({nnet_output.device})"
+        # TODO(haowen): with a small `beam`, we may get empty `target_graph`,
+        # thus `tot_scores` will be `inf`. Definitely we need to handle this later.
+        lattices = k2.intersect_dense_pruned(HLG, dense_fsa_vec, 20.0, 7.0, 30, 10000)
+
+        # lattices = k2.intersect_dense(HLG, dense_fsa_vec, 10.0)
+        best_paths = k2.shortest_path(lattices, use_double_scores=True)
+        assert best_paths.shape[0] == len(texts)
+        hyps = get_texts(best_paths, indices)
+        assert len(hyps) == len(texts)
+
+        for i in range(len(texts)):
+            hyp_words = [symbols.get(x) for x in hyps[i]]
+            ref_words = texts[i].split(' ')
+            results.append((ref_words, hyp_words))
+
+        if batch_idx % 10 == 0:
+            logging.info(
+                'batch {}, cuts processed until now is {}/{} ({:.6f}%)'.format(
+                    batch_idx, num_cuts, tot_num_cuts,
+                    float(num_cuts) / tot_num_cuts * 100))
+
+        num_cuts += len(texts)
+
+    return results
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Process some integers.')
+    parser.add_argument('--epoch', type=int, default=20,
+            help='the checkpoint for loading.')
+
+    parser.add_argument('--mode', type=str, default='TEST',
+            help='the mode to test.')
+
+    args = parser.parse_args()
+
+    exp_dir = Path('exp-lstm-adam-ctc-musan-with-crdnn')
+    setup_logger('{}/log/log-decode'.format(exp_dir), log_level='debug')
+
+    # load L, G, symbol_table
+    lang_dir = Path('data/lang_nosp')
+    symbol_table = k2.SymbolTable.from_file(lang_dir / 'words.txt')
+    phone_symbol_table = k2.SymbolTable.from_file(lang_dir / 'phones.txt')
+    phone_ids = get_phone_symbols(phone_symbol_table)
+    phone_ids_with_blank = [0] + phone_ids
+    ctc_topo = k2.arc_sort(build_ctc_topo(phone_ids_with_blank))
+
+    if not os.path.exists(lang_dir / 'HLG.pt'):
+        print("Loading L_disambig.fst.txt")
+        with open(lang_dir / 'L_disambig.fst.txt') as f:
+            L = k2.Fsa.from_openfst(f.read(), acceptor=False)
+        print("Loading G.fst.txt")
+        with open(lang_dir / 'G.fst.txt') as f:
+            G = k2.Fsa.from_openfst(f.read(), acceptor=False)
+        first_phone_disambig_id = find_first_disambig_symbol(phone_symbol_table)
+        first_word_disambig_id = find_first_disambig_symbol(symbol_table)
+        HLG = compile_HLG(L=L,
+                         G=G,
+                         H=ctc_topo,
+                         labels_disambig_id_start=first_phone_disambig_id,
+                         aux_labels_disambig_id_start=first_word_disambig_id)
+        torch.save(HLG.as_dict(), lang_dir / 'HLG.pt')
+    else:
+        print("Loading pre-compiled HLG")
+        d = torch.load(lang_dir / 'HLG.pt')
+        HLG = k2.Fsa.from_dict(d)
+
+    # load dataset
+    feature_dir = Path('exp/data')
+    print("About to get test cuts")
+    cuts_test = CutSet.from_json(feature_dir / 'cuts_{}.json.gz'.format(args.mode))
+
+    print("About to create test dataset")
+    test = K2SpeechRecognitionDataset(cuts_test)
+    sampler = SingleCutSampler(cuts_test, max_frames=100000)
+    print("About to create test dataloader")
+    test_dl = torch.utils.data.DataLoader(test, batch_size=None, sampler=sampler, num_workers=1)
+
+    #  if not torch.cuda.is_available():
+    #  logging.error('No GPU detected!')
+    #  sys.exit(-1)
+
+    print("About to load model")
+    # Note: Use "export CUDA_VISIBLE_DEVICES=N" to setup device id to N
+    # device = torch.device('cuda', 1)
+    device = torch.device('cuda')
+    
+    crdnn = CRDNN(
+        input_size=80,
+        time_pooling=True)
+        
+    linear = Linear(
+        input_size=512,
+        n_neurons=len(phone_ids) + 1)
+    
+    model = crdnn_model(80, len(phone_ids)+1, 2, crdnn, linear)
+
+    checkpoint = os.path.join(exp_dir, 'epoch-{}.pt'.format(args.epoch))
+    load_checkpoint(checkpoint, model)
+    model.to(device)
+    model.eval()
+
+    print("convert HLG to device")
+    HLG = HLG.to(device)
+    HLG.aux_labels = k2.ragged.remove_values_eq(HLG.aux_labels, 0)
+    HLG.requires_grad_(False)
+    print("About to decode")
+    results = decode(dataloader=test_dl,
+                     model=model,
+                     device=device,
+                     HLG=HLG,
+                     symbols=symbol_table)
+    s = ''
+    for ref, hyp in results:
+        s += f'ref={ref}\n'
+        s += f'hyp={hyp}\n'
+    #logging.info(s)
+    results = [([one for one in n[0] if one], [one for one in n[1] if one]) for n in results]
+    # compute WER
+    dists = [edit_distance(r, h) for r, h in results]
+    errors = {
+        key: sum(dist[key] for dist in dists)
+        for key in ['sub', 'ins', 'del', 'total']
+    }
+    total_words = sum(len(ref) for ref, _ in results)
+   
+    logging.info(
+        f'%PER {errors["total"] / total_words:.2%} '
+        f'[{errors["total"]} / {total_words}, {errors["ins"]} ins, {errors["del"]} del, {errors["sub"]} sub ]'
+    )
+
+
+torch.set_num_threads(1)
+torch.set_num_interop_threads(1)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/egs/timit/asr/simple_v1/ctc_crdnn_train.py b/egs/timit/asr/simple_v1/ctc_crdnn_train.py
new file mode 100644
index 00000000..109e1110
--- /dev/null
+++ b/egs/timit/asr/simple_v1/ctc_crdnn_train.py
@@ -0,0 +1,456 @@
+#!/usr/bin/env python3
+
+# Copyright (c)  2020  Xiaomi Corporation (authors: Daniel Povey, Haowen Qiu)
+#                2021  Xiaomi Corporation (authors: Mingshuang Luo)
+# Apache 2.0
+
+# Notice: before you run this script, you should install speechbrain first.
+# You can install speechbrain by "pip install speechbrain".
+
+import k2
+import logging
+import math
+import numpy as np
+import os
+import sys
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from datetime import datetime
+from pathlib import Path
+from torch.nn.utils import clip_grad_value_, clip_grad_norm_
+from torch.utils.tensorboard import SummaryWriter
+from typing import Dict, Optional, Tuple
+
+from lhotse import CutSet
+from lhotse.dataset import CutConcatenate, CutMix, K2SpeechRecognitionDataset, SingleCutSampler
+from lhotse.utils import fix_random_seed
+from snowfall.common import describe
+from snowfall.common import get_phone_symbols
+from snowfall.common import load_checkpoint, save_checkpoint
+from snowfall.common import save_training_info
+from snowfall.common import setup_logger
+from snowfall.models import AcousticModel
+from snowfall.training.ctc_graph import CtcTrainingGraphCompiler
+
+from speechbrain.lobes.models.CRDNN import CRDNN
+from speechbrain.nnet.linear import Linear
+
+class crdnn_model(nn.Module):
+    def __init__(self, num_features:int, 
+                       num_classes:int, 
+                       subsampling_factor: int, 
+                       crdnn, 
+                       linear):
+
+        super(crdnn_model, self).__init__()
+        self.num_features = num_features
+        self.num_classes = num_classes
+        self.subsampling_factor = subsampling_factor
+
+        self.crdnn = crdnn
+        self.linear=linear
+
+    def forward(self, x):
+        x = self.crdnn(x)
+        x = self.linear(x)
+        x = x.transpose(1,2)
+        x = nn.functional.log_softmax(x, dim=1)
+
+        return x
+
+def get_tot_objf_and_num_frames(tot_scores: torch.Tensor,
+                                frames_per_seq: torch.Tensor
+                                ) -> Tuple[float, int, int]:
+    ''' Figures out the total score(log-prob) over all successful supervision segments
+    (i.e. those for which the total score wasn't -infinity), and the corresponding
+    number of frames of neural net output
+         Args:
+            tot_scores: a Torch tensor of shape (num_segments,) containing total scores
+                       from forward-backward
+        frames_per_seq: a Torch tensor of shape (num_segments,) containing the number of
+                       frames for each segment
+        Returns:
+             Returns a tuple of 3 scalar tensors:  (tot_score, ok_frames, all_frames)
+        where ok_frames is the frames for successful (finite) segments, and
+       all_frames is the frames for all segments (finite or not).
+    '''
+    mask = torch.ne(tot_scores, -math.inf)
+    # finite_indexes is a tensor containing successful segment indexes, e.g.
+    # [ 0 1 3 4 5 ]
+    finite_indexes = torch.nonzero(mask).squeeze(1)
+    if False:
+        bad_indexes = torch.nonzero(~mask).squeeze(1)
+        if bad_indexes.shape[0] > 0:
+            print("Bad indexes: ", bad_indexes, ", bad lengths: ",
+                  frames_per_seq[bad_indexes], " vs. max length ",
+                  torch.max(frames_per_seq), ", avg ",
+                  (torch.sum(frames_per_seq) / frames_per_seq.numel()))
+    
+    ok_frames = frames_per_seq[finite_indexes].sum()
+    all_frames = frames_per_seq.sum()
+    return (tot_scores[finite_indexes].sum(), ok_frames, all_frames)
+
+
+def get_objf(batch: Dict,
+             model: AcousticModel,
+             device: torch.device,
+             graph_compiler: CtcTrainingGraphCompiler,
+             training: bool,
+             optimizer: Optional[torch.optim.Optimizer] = None):
+    feature = batch['inputs']
+    supervisions = batch['supervisions']
+    supervision_segments = torch.stack(
+        (supervisions['sequence_idx'],
+         torch.floor_divide(supervisions['start_frame'],
+                            2),
+         torch.floor_divide(supervisions['num_frames'],
+                            2)), 1).to(torch.int32)
+    indices = torch.argsort(supervision_segments[:, 2], descending=True)
+    supervision_segments = supervision_segments[indices]
+
+    texts = supervisions['text']
+    texts = [texts[idx] for idx in indices]
+    assert feature.ndim == 3
+    # print(supervision_segments[:, 1] + supervision_segments[:, 2])
+
+    feature = feature.to(device)
+    # at entry, feature is [N, T, C]
+    #feature = feature.permute(0, 2, 1)  # now feature is [N, C, T]
+    if training:
+        nnet_output = model(feature)
+    else:
+        with torch.no_grad():
+            nnet_output = model(feature)
+
+    # nnet_output is [N, C, T]
+    nnet_output = nnet_output.permute(0, 2, 1)  # now nnet_output is [N, T, C]
+    #print('nnet_output: ', nnet_output.size(), nnet_output)
+    decoding_graph = graph_compiler.compile(texts).to(device)
+
+    # nnet_output2 = nnet_output.clone()
+    # blank_bias = -7.0
+    # nnet_output2[:,:,0] += blank_bias
+
+    dense_fsa_vec = k2.DenseFsaVec(nnet_output, supervision_segments)
+    assert decoding_graph.is_cuda()
+    assert decoding_graph.device == device
+    assert nnet_output.device == device
+
+    target_graph = k2.intersect_dense(decoding_graph, dense_fsa_vec, 10.0)
+
+    tot_scores = target_graph.get_tot_scores(
+        log_semiring=True,
+        use_double_scores=True)
+
+    (tot_score, tot_frames,
+     all_frames) = get_tot_objf_and_num_frames(tot_scores,
+                                               supervision_segments[:, 2])
+
+    if training:
+        optimizer.zero_grad()
+        (-tot_score).backward()
+        #clip_grad_value_(model.parameters(), 5.0)
+        clip_grad_norm_(model.parameters(), max_norm=5.0, norm_type=2.0)
+        optimizer.step()
+
+    ans = -tot_score.detach().cpu().item(), tot_frames.cpu().item(
+    ), all_frames.cpu().item()
+    return ans
+
+
+def get_validation_objf(dataloader: torch.utils.data.DataLoader,
+                        model: AcousticModel, device: torch.device,
+                        graph_compiler: CtcTrainingGraphCompiler):
+    total_objf = 0.
+    total_frames = 0.  # for display only
+    total_all_frames = 0.  # all frames including those seqs that failed.
+
+    model.eval()
+
+    for batch_idx, batch in enumerate(dataloader):
+        objf, frames, all_frames = get_objf(batch, model, device,
+                                            graph_compiler, False)
+        total_objf += objf
+        total_frames += frames
+        total_all_frames += all_frames
+
+    return total_objf, total_frames, total_all_frames
+
+
+def train_one_epoch(dataloader: torch.utils.data.DataLoader,
+                    valid_dataloader: torch.utils.data.DataLoader,
+                    model: AcousticModel, device: torch.device,
+                    graph_compiler: CtcTrainingGraphCompiler,
+                    optimizer: torch.optim.Optimizer,
+                    current_epoch: int,
+                    tb_writer: SummaryWriter,
+                    num_epochs: int,
+                    global_batch_idx_train: int):
+    total_objf, total_frames, total_all_frames = 0., 0., 0.
+    valid_average_objf = float('inf')
+    time_waiting_for_batch = 0
+    prev_timestamp = datetime.now()
+
+    model.train()
+    for batch_idx, batch in enumerate(dataloader):
+        global_batch_idx_train += 1
+        timestamp = datetime.now()
+        time_waiting_for_batch += (timestamp - prev_timestamp).total_seconds()
+        curr_batch_objf, curr_batch_frames, curr_batch_all_frames = \
+            get_objf(batch, model, device, graph_compiler, True, optimizer)
+
+        total_objf += curr_batch_objf
+        total_frames += curr_batch_frames
+        total_all_frames += curr_batch_all_frames
+
+        if batch_idx % 10 == 0:
+            logging.info(
+                'batch {}, epoch {}/{} '
+                'global average objf: {:.6f} over {} '
+                'frames ({:.1f}% kept), current batch average objf: {:.6f} over {} frames ({:.1f}% kept) '
+                'avg time waiting for batch {:.3f}s'.format(
+                    batch_idx, current_epoch, num_epochs,
+                    total_objf / total_frames, total_frames,
+                    100.0 * total_frames / total_all_frames,
+                    curr_batch_objf / (curr_batch_frames + 0.001),
+                    curr_batch_frames,
+                    100.0 * curr_batch_frames / curr_batch_all_frames,
+                    time_waiting_for_batch / max(1, batch_idx)))
+
+            tb_writer.add_scalar('train/global_average_objf',
+                                 total_objf / total_frames, global_batch_idx_train)
+
+            tb_writer.add_scalar('train/current_batch_average_objf',
+                                 curr_batch_objf / (curr_batch_frames + 0.001),
+                                 global_batch_idx_train)
+            # if batch_idx >= 10:
+            #    print("Exiting early to get profile info")
+            #    sys.exit(0)
+
+        if batch_idx > 0 and batch_idx % 10 == 0:
+            total_valid_objf, total_valid_frames, total_valid_all_frames = get_validation_objf(
+                dataloader=valid_dataloader,
+                model=model,
+                device=device,
+                graph_compiler=graph_compiler)
+            valid_average_objf = total_valid_objf / total_valid_frames
+            model.train()
+            logging.info(
+                'Validation average objf: {:.6f} over {} frames ({:.1f}% kept)'
+                    .format(valid_average_objf,
+                            total_valid_frames,
+                            100.0 * total_valid_frames / total_valid_all_frames))
+
+            tb_writer.add_scalar('train/global_valid_average_objf',
+                                 valid_average_objf,
+                                 global_batch_idx_train)
+        prev_timestamp = datetime.now()
+        if batch_idx >= 50:
+            break
+    return total_objf / total_frames, valid_average_objf, global_batch_idx_train
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Process some integers.')
+    parser.add_argument('--epochs', type=int, default=100, help='the number of epoch for training.')
+
+    parser.add_argument('--duration', type=int, default=200, help='the max duration in a batch for training.')
+
+    args = parser.parse_args()
+
+    fix_random_seed(42)
+
+    start_epoch = 7
+    num_epochs = args.epochs
+
+    exp_dir = 'exp-lstm-adam-ctc-musan-with-crdnn'
+    setup_logger('{}/log/log-train'.format(exp_dir))
+    tb_writer = SummaryWriter(log_dir=f'{exp_dir}/tensorboard')
+
+    # load L, G, symbol_table
+    lang_dir = Path('data/lang_nosp')
+    phone_symbol_table = k2.SymbolTable.from_file(lang_dir / 'phones.txt')
+    word_symbol_table = k2.SymbolTable.from_file(lang_dir / 'words.txt')
+
+    logging.info("Loading L.fst")
+    if (lang_dir / 'Linv.pt').exists():
+        L_inv = k2.Fsa.from_dict(torch.load(lang_dir / 'Linv.pt'))
+    else:
+        with open(lang_dir / 'L.fst.txt') as f:
+            L = k2.Fsa.from_openfst(f.read(), acceptor=False)
+            L_inv = k2.arc_sort(L.invert_())
+            torch.save(L_inv.as_dict(), lang_dir / 'Linv.pt')
+
+    graph_compiler = CtcTrainingGraphCompiler(
+        L_inv=L_inv,
+        phones=phone_symbol_table,
+        words=word_symbol_table
+    )
+    phone_ids = get_phone_symbols(phone_symbol_table)
+
+    # load dataset
+    feature_dir = Path('exp/data')
+    logging.info("About to get train cuts")
+    cuts_train = CutSet.from_json(feature_dir /
+                                  'cuts_TRAIN.json.gz')
+    logging.info("About to get dev cuts")
+    cuts_dev = CutSet.from_json(feature_dir / 'cuts_TEST.json.gz')
+    logging.info("About to get Musan cuts")
+    cuts_musan = CutSet.from_json(feature_dir / 'cuts_musan.json.gz')
+
+    logging.info("About to create train dataset")
+    train = K2SpeechRecognitionDataset(
+        cuts_train,
+        cut_transforms=[
+            CutConcatenate(),
+            CutMix(
+                cuts=cuts_musan,
+                prob=0.5,
+                snr=(10, 20)
+            )
+        ]
+    )
+    train_sampler = SingleCutSampler(
+        cuts_train,
+        #max_frames=180000,
+        max_duration=args.duration,
+        shuffle=True,
+    )
+    logging.info("About to create train dataloader")
+    train_dl = torch.utils.data.DataLoader(
+        train,
+        sampler=train_sampler,
+        batch_size=None,
+        num_workers=4
+    )
+    logging.info("About to create dev dataset")
+    validate = K2SpeechRecognitionDataset(cuts_dev)
+    valid_sampler = SingleCutSampler(cuts_dev, max_frames=90000)
+    logging.info("About to create dev dataloader")
+    valid_dl = torch.utils.data.DataLoader(
+        validate,
+        sampler=valid_sampler,
+        batch_size=None,
+        num_workers=1
+    )
+
+    if not torch.cuda.is_available():
+        logging.error('No GPU detected!')
+        sys.exit(-1)
+
+    logging.info("About to create model")
+    device_id = 0
+    device = torch.device('cuda', device_id)
+    crdnn = CRDNN(
+        input_size=80,
+        time_pooling=True)
+        
+    linear = Linear(
+        input_size=512,
+        n_neurons=len(phone_ids) + 1)
+    
+    model = crdnn_model(80, len(phone_ids)+1, 2, crdnn, linear)
+    model = model.to(device)
+
+    describe(model)
+
+    learning_rate = 0.7e-3
+    optimizer = optim.AdamW(model.parameters(),
+                            lr=learning_rate,
+                            weight_decay=5e-4)
+
+    best_objf = np.inf
+    best_valid_objf = np.inf
+    best_epoch = start_epoch
+    best_model_path = os.path.join(exp_dir, 'best_model.pt')
+    best_epoch_info_filename = os.path.join(exp_dir, 'best-epoch-info')
+    global_batch_idx_train = 0  # for logging only
+
+    if start_epoch > 0:
+        model_path = os.path.join(exp_dir, 'epoch-{}.pt'.format(start_epoch - 1))
+        ckpt = load_checkpoint(filename=model_path, model=model, optimizer=optimizer)
+        best_objf = ckpt['objf']
+        best_valid_objf = ckpt['valid_objf']
+        global_batch_idx_train = ckpt['global_batch_idx_train']
+        logging.info(f"epoch = {ckpt['epoch']}, objf = {best_objf}, valid_objf = {best_valid_objf}")
+
+    for epoch in range(start_epoch, args.epochs):
+        train_sampler.set_epoch(epoch)
+        curr_learning_rate = 1e-3
+        # curr_learning_rate = learning_rate * pow(0.4, epoch)
+        # for param_group in optimizer.param_groups:
+        #     param_group['lr'] = curr_learning_rate
+
+        tb_writer.add_scalar('learning_rate', curr_learning_rate, epoch)
+
+        logging.info('epoch {}, learning rate {}'.format(
+            epoch, curr_learning_rate))
+        objf, valid_objf, global_batch_idx_train = train_one_epoch(dataloader=train_dl,
+                                                                   valid_dataloader=valid_dl,
+                                                                   model=model,
+                                                                   device=device,
+                                                                   graph_compiler=graph_compiler,
+                                                                   optimizer=optimizer,
+                                                                   current_epoch=epoch,
+                                                                   tb_writer=tb_writer,
+                                                                   num_epochs=num_epochs,
+                                                                   global_batch_idx_train=global_batch_idx_train)
+        # the lower, the better
+        if valid_objf < best_valid_objf:
+            best_valid_objf = valid_objf
+            best_objf = objf
+            best_epoch = epoch
+            save_checkpoint(filename=best_model_path,
+                            model=model,
+                            epoch=epoch,
+                            optimizer=None,
+                            scheduler=None,
+                            learning_rate=curr_learning_rate,
+                            objf=objf,
+                            valid_objf=valid_objf,
+                            global_batch_idx_train=global_batch_idx_train)
+            save_training_info(filename=best_epoch_info_filename,
+                               model_path=best_model_path,
+                               current_epoch=epoch,
+                               learning_rate=curr_learning_rate,
+                               objf=best_objf,
+                               best_objf=best_objf,
+                               valid_objf=valid_objf,
+                               best_valid_objf=best_valid_objf,
+                               best_epoch=best_epoch)
+
+        # we always save the model for every epoch
+        model_path = os.path.join(exp_dir, 'epoch-{}.pt'.format(epoch))
+        save_checkpoint(filename=model_path,
+                        model=model,
+                        optimizer=optimizer,
+                        scheduler=None,
+                        epoch=epoch,
+                        learning_rate=curr_learning_rate,
+                        objf=objf,
+                        valid_objf=valid_objf,
+                        global_batch_idx_train=global_batch_idx_train)
+        epoch_info_filename = os.path.join(exp_dir,
+                                           'epoch-{}-info'.format(epoch))
+        save_training_info(filename=epoch_info_filename,
+                           model_path=model_path,
+                           current_epoch=epoch,
+                           learning_rate=curr_learning_rate,
+                           objf=objf,
+                           best_objf=best_objf,
+                           valid_objf=valid_objf,
+                           best_valid_objf=best_valid_objf,
+                           best_epoch=best_epoch)
+
+    logging.warning('Done')
+
+
+torch.set_num_threads(1)
+torch.set_num_interop_threads(1)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/egs/timit/asr/simple_v1/ctc_decode.py b/egs/timit/asr/simple_v1/ctc_decode.py
new file mode 100644
index 00000000..aff8e13e
--- /dev/null
+++ b/egs/timit/asr/simple_v1/ctc_decode.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+
+# Copyright (c)  2020  Xiaomi Corporation (authors: Daniel Povey, Haowen Qiu)
+# Apache 2.0
+
+import k2
+import logging
+import os
+import torch
+from k2 import Fsa, SymbolTable
+from kaldialign import edit_distance
+from pathlib import Path
+from typing import Union
+
+from lhotse import CutSet
+from lhotse.dataset import K2SpeechRecognitionDataset
+from lhotse.dataset import SingleCutSampler
+from snowfall.common import find_first_disambig_symbol
+from snowfall.common import get_phone_symbols
+from snowfall.common import get_texts
+from snowfall.common import load_checkpoint
+from snowfall.common import setup_logger
+from snowfall.decoding.graph import compile_HLG
+from snowfall.models import AcousticModel
+from snowfall.models.tdnn_lstm import TdnnLstm1b
+from snowfall.training.ctc_graph import build_ctc_topo
+
+import sys
+import argparse
+
+def decode(dataloader: torch.utils.data.DataLoader, model: AcousticModel,
+           device: Union[str, torch.device], HLG: Fsa, symbols: SymbolTable):
+    tot_num_cuts = len(dataloader)
+    num_cuts = 0
+    results = []  # a list of pair (ref_words, hyp_words)
+    for batch_idx, batch in enumerate(dataloader):
+        
+        feature = batch['inputs']
+        supervisions = batch['supervisions']
+        supervision_segments = torch.stack(
+            (supervisions['sequence_idx'],
+             torch.floor_divide(supervisions['start_frame'],
+                                model.subsampling_factor),
+             torch.floor_divide(supervisions['num_frames'],
+                                model.subsampling_factor)), 1).to(torch.int32)
+        indices = torch.argsort(supervision_segments[:, 2], descending=True)
+        supervision_segments = supervision_segments[indices]
+        texts = supervisions['text']
+        assert feature.ndim == 3
+
+        feature = feature.to(device)
+        # at entry, feature is [N, T, C]
+        feature = feature.permute(0, 2, 1)  # now feature is [N, C, T]
+        with torch.no_grad():
+            nnet_output = model(feature)
+        # nnet_output is [N, C, T]
+        nnet_output = nnet_output.permute(0, 2,
+                                          1)  # now nnet_output is [N, T, C]
+
+        blank_bias = -3.0
+        nnet_output[:, :, 0] += blank_bias
+        
+        dense_fsa_vec = k2.DenseFsaVec(nnet_output, supervision_segments)
+        # assert HLG.is_cuda()
+        assert HLG.device == nnet_output.device, \
+            f"Check failed: HLG.device ({HLG.device}) == nnet_output.device ({nnet_output.device})"
+        # TODO(haowen): with a small `beam`, we may get empty `target_graph`,
+        # thus `tot_scores` will be `inf`. Definitely we need to handle this later.
+        lattices = k2.intersect_dense_pruned(HLG, dense_fsa_vec, 20.0, 7.0, 30, 10000)
+
+        # lattices = k2.intersect_dense(HLG, dense_fsa_vec, 10.0)
+        best_paths = k2.shortest_path(lattices, use_double_scores=True)
+        assert best_paths.shape[0] == len(texts)
+        hyps = get_texts(best_paths, indices)
+        assert len(hyps) == len(texts)
+
+        for i in range(len(texts)):
+            hyp_words = [symbols.get(x) for x in hyps[i]]
+            ref_words = texts[i].split(' ')
+            results.append((ref_words, hyp_words))
+
+        if batch_idx % 10 == 0:
+            logging.info(
+                'batch {}, cuts processed until now is {}/{} ({:.6f}%)'.format(
+                    batch_idx, num_cuts, tot_num_cuts,
+                    float(num_cuts) / tot_num_cuts * 100))
+
+        num_cuts += len(texts)
+
+    return results
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Process some integers.')
+    parser.add_argument('--epoch', type=int, default=9,
+            help='the checkpoint for loading.')
+
+    parser.add_argument('--mode', type=str, default='TEST',
+            help='the mode to test.')
+
+    args = parser.parse_args()
+
+    exp_dir = Path('exp-lstm-adam-ctc-musan')
+    setup_logger('{}/log/log-decode'.format(exp_dir), log_level='debug')
+
+    # load L, G, symbol_table
+    lang_dir = Path('data/lang_nosp')
+    symbol_table = k2.SymbolTable.from_file(lang_dir / 'words.txt')
+    phone_symbol_table = k2.SymbolTable.from_file(lang_dir / 'phones.txt')
+    phone_ids = get_phone_symbols(phone_symbol_table)
+    phone_ids_with_blank = [0] + phone_ids
+    ctc_topo = k2.arc_sort(build_ctc_topo(phone_ids_with_blank))
+
+    if not os.path.exists(lang_dir / 'HLG.pt'):
+        print("Loading L_disambig.fst.txt")
+        with open(lang_dir / 'L_disambig.fst.txt') as f:
+            L = k2.Fsa.from_openfst(f.read(), acceptor=False)
+        print("Loading G.fst.txt")
+        with open(lang_dir / 'G.fst.txt') as f:
+            G = k2.Fsa.from_openfst(f.read(), acceptor=False)
+        first_phone_disambig_id = find_first_disambig_symbol(phone_symbol_table)
+        first_word_disambig_id = find_first_disambig_symbol(symbol_table)
+        HLG = compile_HLG(L=L,
+                         G=G,
+                         H=ctc_topo,
+                         labels_disambig_id_start=first_phone_disambig_id,
+                         aux_labels_disambig_id_start=first_word_disambig_id)
+        torch.save(HLG.as_dict(), lang_dir / 'HLG.pt')
+    else:
+        print("Loading pre-compiled HLG")
+        d = torch.load(lang_dir / 'HLG.pt')
+        HLG = k2.Fsa.from_dict(d)
+
+    # load dataset
+    feature_dir = Path('exp/data')
+    print("About to get test cuts")
+    cuts_test = CutSet.from_json(feature_dir / 'cuts_{}.json.gz'.format(args.mode))
+
+    print("About to create test dataset")
+    test = K2SpeechRecognitionDataset(cuts_test)
+    sampler = SingleCutSampler(cuts_test, max_frames=100000)
+    print("About to create test dataloader")
+    test_dl = torch.utils.data.DataLoader(test, batch_size=None, sampler=sampler, num_workers=1)
+
+    #  if not torch.cuda.is_available():
+    #  logging.error('No GPU detected!')
+    #  sys.exit(-1)
+
+    print("About to load model")
+    # Note: Use "export CUDA_VISIBLE_DEVICES=N" to setup device id to N
+    # device = torch.device('cuda', 1)
+    device = torch.device('cuda')
+    model = TdnnLstm1b(
+        num_features=80,
+        num_classes=len(phone_ids) + 1,  # +1 for the blank symbol
+        subsampling_factor=2)
+
+    checkpoint = os.path.join(exp_dir, 'epoch-{}.pt'.format(args.epoch))
+    load_checkpoint(checkpoint, model)
+    model.to(device)
+    model.eval()
+
+    print("convert HLG to device")
+    HLG = HLG.to(device)
+    HLG.aux_labels = k2.ragged.remove_values_eq(HLG.aux_labels, 0)
+    HLG.requires_grad_(False)
+    print("About to decode")
+    results = decode(dataloader=test_dl,
+                     model=model,
+                     device=device,
+                     HLG=HLG,
+                     symbols=symbol_table)
+    s = ''
+    for ref, hyp in results:
+        s += f'ref={ref}\n'
+        s += f'hyp={hyp}\n'
+    # logging.info(s)
+    # compute PER
+    dists = [edit_distance(r, h) for r, h in results]
+    errors = {
+        key: sum(dist[key] for dist in dists)
+        for key in ['sub', 'ins', 'del', 'total']
+    }
+    total_words = sum(len(ref) for ref, _ in results)
+   
+    logging.info(
+        f'%PER {errors["total"] / total_words:.2%} '
+        f'[{errors["total"]} / {total_words}, {errors["ins"]} ins, {errors["del"]} del, {errors["sub"]} sub ]'
+    )
+
+
+torch.set_num_threads(1)
+torch.set_num_interop_threads(1)
+
+if __name__ == '__main__':
+    main()
diff --git a/egs/timit/asr/simple_v1/ctc_train.py b/egs/timit/asr/simple_v1/ctc_train.py
new file mode 100644
index 00000000..ccf47b95
--- /dev/null
+++ b/egs/timit/asr/simple_v1/ctc_train.py
@@ -0,0 +1,418 @@
+#!/usr/bin/env python3
+
+# Copyright (c)  2020  Xiaomi Corporation (authors: Daniel Povey, Haowen Qiu)
+# Apache 2.0
+
+import k2
+import logging
+import math
+import numpy as np
+import os
+import sys
+import torch
+import torch.optim as optim
+from datetime import datetime
+from pathlib import Path
+from torch.nn.utils import clip_grad_value_
+from torch.utils.tensorboard import SummaryWriter
+from typing import Dict, Optional, Tuple
+
+from lhotse import CutSet
+from lhotse.dataset import CutConcatenate, CutMix, K2SpeechRecognitionDataset, SingleCutSampler
+from lhotse.utils import fix_random_seed
+from snowfall.common import describe
+from snowfall.common import get_phone_symbols
+from snowfall.common import load_checkpoint, save_checkpoint
+from snowfall.common import save_training_info
+from snowfall.common import setup_logger
+from snowfall.models import AcousticModel
+from snowfall.models.tdnn_lstm import TdnnLstm1b
+from snowfall.training.ctc_graph import CtcTrainingGraphCompiler
+
+
+def get_tot_objf_and_num_frames(tot_scores: torch.Tensor,
+                                frames_per_seq: torch.Tensor
+                                ) -> Tuple[float, int, int]:
+    ''' Figures out the total score(log-prob) over all successful supervision segments
+    (i.e. those for which the total score wasn't -infinity), and the corresponding
+    number of frames of neural net output
+         Args:
+            tot_scores: a Torch tensor of shape (num_segments,) containing total scores
+                       from forward-backward
+        frames_per_seq: a Torch tensor of shape (num_segments,) containing the number of
+                       frames for each segment
+        Returns:
+             Returns a tuple of 3 scalar tensors:  (tot_score, ok_frames, all_frames)
+        where ok_frames is the frames for successful (finite) segments, and
+       all_frames is the frames for all segments (finite or not).
+    '''
+    mask = torch.ne(tot_scores, -math.inf)
+    # finite_indexes is a tensor containing successful segment indexes, e.g.
+    # [ 0 1 3 4 5 ]
+    finite_indexes = torch.nonzero(mask).squeeze(1)
+    if False:
+        bad_indexes = torch.nonzero(~mask).squeeze(1)
+        if bad_indexes.shape[0] > 0:
+            print("Bad indexes: ", bad_indexes, ", bad lengths: ",
+                  frames_per_seq[bad_indexes], " vs. max length ",
+                  torch.max(frames_per_seq), ", avg ",
+                  (torch.sum(frames_per_seq) / frames_per_seq.numel()))
+    # print("finite_indexes = ", finite_indexes, ", tot_scores = ", tot_scores)
+    ok_frames = frames_per_seq[finite_indexes].sum()
+    all_frames = frames_per_seq.sum()
+    return (tot_scores[finite_indexes].sum(), ok_frames, all_frames)
+
+
+def get_objf(batch: Dict,
+             model: AcousticModel,
+             device: torch.device,
+             graph_compiler: CtcTrainingGraphCompiler,
+             training: bool,
+             optimizer: Optional[torch.optim.Optimizer] = None):
+    feature = batch['inputs']
+    supervisions = batch['supervisions']
+    supervision_segments = torch.stack(
+        (supervisions['sequence_idx'],
+         torch.floor_divide(supervisions['start_frame'],
+                            model.subsampling_factor),
+         torch.floor_divide(supervisions['num_frames'],
+                            model.subsampling_factor)), 1).to(torch.int32)
+    indices = torch.argsort(supervision_segments[:, 2], descending=True)
+    supervision_segments = supervision_segments[indices]
+
+    texts = supervisions['text']
+    texts = [texts[idx] for idx in indices]
+    assert feature.ndim == 3
+    # print(supervision_segments[:, 1] + supervision_segments[:, 2])
+
+    feature = feature.to(device)
+    # at entry, feature is [N, T, C]
+    feature = feature.permute(0, 2, 1)  # now feature is [N, C, T]
+    if training:
+        nnet_output = model(feature)
+    else:
+        with torch.no_grad():
+            nnet_output = model(feature)
+
+    # nnet_output is [N, C, T]
+    nnet_output = nnet_output.permute(0, 2, 1)  # now nnet_output is [N, T, C]
+    #print(nnet_output.size())
+    decoding_graph = graph_compiler.compile(texts).to(device)
+
+    # nnet_output2 = nnet_output.clone()
+    # blank_bias = -7.0
+    # nnet_output2[:,:,0] += blank_bias
+
+    dense_fsa_vec = k2.DenseFsaVec(nnet_output, supervision_segments)
+    assert decoding_graph.is_cuda()
+    assert decoding_graph.device == device
+    assert nnet_output.device == device
+
+    target_graph = k2.intersect_dense(decoding_graph, dense_fsa_vec, 10.0)
+
+    tot_scores = target_graph.get_tot_scores(
+        log_semiring=True,
+        use_double_scores=True)
+
+    (tot_score, tot_frames,
+     all_frames) = get_tot_objf_and_num_frames(tot_scores,
+                                               supervision_segments[:, 2])
+
+    if training:
+        optimizer.zero_grad()
+        (-tot_score).backward()
+        clip_grad_value_(model.parameters(), 5.0)
+        optimizer.step()
+
+    ans = -tot_score.detach().cpu().item(), tot_frames.cpu().item(
+    ), all_frames.cpu().item()
+    return ans
+
+
+def get_validation_objf(dataloader: torch.utils.data.DataLoader,
+                        model: AcousticModel, device: torch.device,
+                        graph_compiler: CtcTrainingGraphCompiler):
+    total_objf = 0.
+    total_frames = 0.  # for display only
+    total_all_frames = 0.  # all frames including those seqs that failed.
+
+    model.eval()
+
+    for batch_idx, batch in enumerate(dataloader):
+        objf, frames, all_frames = get_objf(batch, model, device,
+                                            graph_compiler, False)
+        total_objf += objf
+        total_frames += frames
+        total_all_frames += all_frames
+
+    return total_objf, total_frames, total_all_frames
+
+
+def train_one_epoch(dataloader: torch.utils.data.DataLoader,
+                    valid_dataloader: torch.utils.data.DataLoader,
+                    model: AcousticModel, device: torch.device,
+                    graph_compiler: CtcTrainingGraphCompiler,
+                    optimizer: torch.optim.Optimizer,
+                    current_epoch: int,
+                    tb_writer: SummaryWriter,
+                    num_epochs: int,
+                    global_batch_idx_train: int):
+    total_objf, total_frames, total_all_frames = 0., 0., 0.
+    valid_average_objf = float('inf')
+    time_waiting_for_batch = 0
+    prev_timestamp = datetime.now()
+
+    model.train()
+    for batch_idx, batch in enumerate(dataloader):
+        global_batch_idx_train += 1
+        timestamp = datetime.now()
+        time_waiting_for_batch += (timestamp - prev_timestamp).total_seconds()
+        curr_batch_objf, curr_batch_frames, curr_batch_all_frames = \
+            get_objf(batch, model, device, graph_compiler, True, optimizer)
+
+        total_objf += curr_batch_objf
+        total_frames += curr_batch_frames
+        total_all_frames += curr_batch_all_frames
+
+        if batch_idx % 10 == 0:
+            logging.info(
+                'batch {}, epoch {}/{} '
+                'global average objf: {:.6f} over {} '
+                'frames ({:.1f}% kept), current batch average objf: {:.6f} over {} frames ({:.1f}% kept) '
+                'avg time waiting for batch {:.3f}s'.format(
+                    batch_idx, current_epoch, num_epochs,
+                    total_objf / total_frames, total_frames,
+                    100.0 * total_frames / total_all_frames,
+                    curr_batch_objf / (curr_batch_frames + 0.001),
+                    curr_batch_frames,
+                    100.0 * curr_batch_frames / curr_batch_all_frames,
+                    time_waiting_for_batch / max(1, batch_idx)))
+
+            tb_writer.add_scalar('train/global_average_objf',
+                                 total_objf / total_frames, global_batch_idx_train)
+
+            tb_writer.add_scalar('train/current_batch_average_objf',
+                                 curr_batch_objf / (curr_batch_frames + 0.001),
+                                 global_batch_idx_train)
+            # if batch_idx >= 10:
+            #    print("Exiting early to get profile info")
+            #    sys.exit(0)
+
+        if batch_idx > 0 and batch_idx % 10 == 0:
+            total_valid_objf, total_valid_frames, total_valid_all_frames = get_validation_objf(
+                dataloader=valid_dataloader,
+                model=model,
+                device=device,
+                graph_compiler=graph_compiler)
+            valid_average_objf = total_valid_objf / total_valid_frames
+            model.train()
+            logging.info(
+                'Validation average objf: {:.6f} over {} frames ({:.1f}% kept)'
+                    .format(valid_average_objf,
+                            total_valid_frames,
+                            100.0 * total_valid_frames / total_valid_all_frames))
+
+            tb_writer.add_scalar('train/global_valid_average_objf',
+                                 valid_average_objf,
+                                 global_batch_idx_train)
+        prev_timestamp = datetime.now()
+    return total_objf / total_frames, valid_average_objf, global_batch_idx_train
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Process some integers.')
+    parser.add_argument('--epochs', type=int, default=40, help='the number of epoch for training.')
+
+    parser.add_argument('--duration', type=int, default=200, help='the max duration in a batch for training.')
+
+    args = parser.parse_args()
+
+    fix_random_seed(42)
+
+    start_epoch = 0
+    num_epochs = args.epochs
+
+    exp_dir = 'exp-lstm-adam-ctc-musan'
+    setup_logger('{}/log/log-train'.format(exp_dir))
+    tb_writer = SummaryWriter(log_dir=f'{exp_dir}/tensorboard')
+
+    # load L, G, symbol_table
+    lang_dir = Path('data/lang_nosp')
+    phone_symbol_table = k2.SymbolTable.from_file(lang_dir / 'phones.txt')
+    word_symbol_table = k2.SymbolTable.from_file(lang_dir / 'words.txt')
+
+    logging.info("Loading L.fst")
+    if (lang_dir / 'Linv.pt').exists():
+        L_inv = k2.Fsa.from_dict(torch.load(lang_dir / 'Linv.pt'))
+    else:
+        with open(lang_dir / 'L.fst.txt') as f:
+            L = k2.Fsa.from_openfst(f.read(), acceptor=False)
+            L_inv = k2.arc_sort(L.invert_())
+            torch.save(L_inv.as_dict(), lang_dir / 'Linv.pt')
+
+    graph_compiler = CtcTrainingGraphCompiler(
+        L_inv=L_inv,
+        phones=phone_symbol_table,
+        words=word_symbol_table
+    )
+    phone_ids = get_phone_symbols(phone_symbol_table)
+
+    # load dataset
+    feature_dir = Path('exp/data')
+    logging.info("About to get train cuts")
+    cuts_train = CutSet.from_json(feature_dir /
+                                  'cuts_TRAIN.json.gz')
+    logging.info("About to get dev cuts")
+    cuts_dev = CutSet.from_json(feature_dir / 'cuts_TEST.json.gz')
+    logging.info("About to get Musan cuts")
+    cuts_musan = CutSet.from_json(feature_dir / 'cuts_musan.json.gz')
+
+    logging.info("About to create train dataset")
+    train = K2SpeechRecognitionDataset(
+        cuts_train,
+        cut_transforms=[
+            CutConcatenate(),
+            CutMix(
+                cuts=cuts_musan,
+                prob=0.5,
+                snr=(10, 20)
+            )
+        ]
+    )
+    train_sampler = SingleCutSampler(
+        cuts_train,
+        #max_frames=180000,
+        max_duration=args.duration,
+        shuffle=True,
+    )
+    logging.info("About to create train dataloader")
+    train_dl = torch.utils.data.DataLoader(
+        train,
+        sampler=train_sampler,
+        batch_size=None,
+        num_workers=4
+    )
+    logging.info("About to create dev dataset")
+    validate = K2SpeechRecognitionDataset(cuts_dev)
+    valid_sampler = SingleCutSampler(cuts_dev, max_frames=90000)
+    logging.info("About to create dev dataloader")
+    valid_dl = torch.utils.data.DataLoader(
+        validate,
+        sampler=valid_sampler,
+        batch_size=None,
+        num_workers=1
+    )
+
+    if not torch.cuda.is_available():
+        logging.error('No GPU detected!')
+        sys.exit(-1)
+
+    logging.info("About to create model")
+    device_id = 0
+    device = torch.device('cuda', device_id)
+    model = TdnnLstm1b(
+        num_features=80,
+        num_classes=len(phone_ids) + 1,  # +1 for the blank symbol
+        subsampling_factor=2)
+
+    model.to(device)
+    describe(model)
+
+    learning_rate = 2e-3
+    optimizer = optim.AdamW(model.parameters(),
+                            lr=learning_rate,
+                            weight_decay=5e-4)
+
+    best_objf = np.inf
+    best_valid_objf = np.inf
+    best_epoch = start_epoch
+    best_model_path = os.path.join(exp_dir, 'best_model.pt')
+    best_epoch_info_filename = os.path.join(exp_dir, 'best-epoch-info')
+    global_batch_idx_train = 0  # for logging only
+
+    if start_epoch > 0:
+        model_path = os.path.join(exp_dir, 'epoch-{}.pt'.format(start_epoch - 1))
+        ckpt = load_checkpoint(filename=model_path, model=model, optimizer=optimizer)
+        best_objf = ckpt['objf']
+        best_valid_objf = ckpt['valid_objf']
+        global_batch_idx_train = ckpt['global_batch_idx_train']
+        logging.info(f"epoch = {ckpt['epoch']}, objf = {best_objf}, valid_objf = {best_valid_objf}")
+
+    for epoch in range(start_epoch, args.epochs):
+        train_sampler.set_epoch(epoch)
+        curr_learning_rate = 1e-3
+        # curr_learning_rate = learning_rate * pow(0.4, epoch)
+        # for param_group in optimizer.param_groups:
+        #     param_group['lr'] = curr_learning_rate
+
+        tb_writer.add_scalar('learning_rate', curr_learning_rate, epoch)
+
+        logging.info('epoch {}, learning rate {}'.format(
+            epoch, curr_learning_rate))
+        objf, valid_objf, global_batch_idx_train = train_one_epoch(dataloader=train_dl,
+                                                                   valid_dataloader=valid_dl,
+                                                                   model=model,
+                                                                   device=device,
+                                                                   graph_compiler=graph_compiler,
+                                                                   optimizer=optimizer,
+                                                                   current_epoch=epoch,
+                                                                   tb_writer=tb_writer,
+                                                                   num_epochs=num_epochs,
+                                                                   global_batch_idx_train=global_batch_idx_train)
+        # the lower, the better
+        if valid_objf < best_valid_objf:
+            best_valid_objf = valid_objf
+            best_objf = objf
+            best_epoch = epoch
+            save_checkpoint(filename=best_model_path,
+                            model=model,
+                            epoch=epoch,
+                            optimizer=None,
+                            scheduler=None,
+                            learning_rate=curr_learning_rate,
+                            objf=objf,
+                            valid_objf=valid_objf,
+                            global_batch_idx_train=global_batch_idx_train)
+            save_training_info(filename=best_epoch_info_filename,
+                               model_path=best_model_path,
+                               current_epoch=epoch,
+                               learning_rate=curr_learning_rate,
+                               objf=best_objf,
+                               best_objf=best_objf,
+                               valid_objf=valid_objf,
+                               best_valid_objf=best_valid_objf,
+                               best_epoch=best_epoch)
+
+        # we always save the model for every epoch
+        model_path = os.path.join(exp_dir, 'epoch-{}.pt'.format(epoch))
+        save_checkpoint(filename=model_path,
+                        model=model,
+                        optimizer=optimizer,
+                        scheduler=None,
+                        epoch=epoch,
+                        learning_rate=curr_learning_rate,
+                        objf=objf,
+                        valid_objf=valid_objf,
+                        global_batch_idx_train=global_batch_idx_train)
+        epoch_info_filename = os.path.join(exp_dir,
+                                           'epoch-{}-info'.format(epoch))
+        save_training_info(filename=epoch_info_filename,
+                           model_path=model_path,
+                           current_epoch=epoch,
+                           learning_rate=curr_learning_rate,
+                           objf=objf,
+                           best_objf=best_objf,
+                           valid_objf=valid_objf,
+                           best_valid_objf=best_valid_objf,
+                           best_epoch=best_epoch)
+
+    logging.warning('Done')
+
+
+torch.set_num_threads(1)
+torch.set_num_interop_threads(1)
+
+if __name__ == '__main__':
+    main()
diff --git a/egs/timit/asr/simple_v1/local/add_lex_disambig.pl b/egs/timit/asr/simple_v1/local/add_lex_disambig.pl
new file mode 100644
index 00000000..c4277e8d
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local/add_lex_disambig.pl
@@ -0,0 +1,196 @@
+#!/usr/bin/env perl
+#  Copyright 2010-2011  Microsoft Corporation
+#            2013-2016  Johns Hopkins University (author: Daniel Povey)
+#                 2015  Hainan Xu
+#                 2015  Guoguo Chen
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Adds disambiguation symbols to a lexicon.
+# Outputs still in the normal lexicon format.
+# Disambig syms are numbered #1, #2, #3, etc. (#0
+# reserved for symbol in grammar).
+# Outputs the number of disambig syms to the standard output.
+# With the --pron-probs option, expects the second field
+# of each lexicon line to be a pron-prob.
+# With the --sil-probs option, expects three additional
+# fields after the pron-prob, representing various components
+# of the silence probability model.
+
+$pron_probs = 0;
+$sil_probs = 0;
+$first_allowed_disambig = 1;
+
+for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
+  if ($ARGV[0] eq "--pron-probs") {
+    $pron_probs = 1;
+    shift @ARGV;
+  }
+  if ($ARGV[0] eq "--sil-probs") {
+    $sil_probs = 1;
+    shift @ARGV;
+  }
+  if ($ARGV[0] eq "--first-allowed-disambig") {
+    $first_allowed_disambig = 0 + $ARGV[1];
+    if ($first_allowed_disambig < 1) {
+      die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
+    }
+    shift @ARGV;
+    shift @ARGV;
+  }
+}
+
+if (@ARGV != 2) {
+  die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
+    "This script adds disambiguation symbols to a lexicon in order to\n" .
+    "make decoding graphs determinizable; it adds pseudo-phone\n" .
+    "disambiguation symbols #1, #2 and so on at the ends of phones\n" .
+    "to ensure that all pronunciations are different, and that none\n" .
+    "is a prefix of another.\n" .
+    "It prints to the standard output the number of the largest-numbered" .
+    "disambiguation symbol that was used.\n" .
+    "\n" .
+    "Options:   --pron-probs       Expect pronunciation probabilities in the 2nd field\n" .
+    "           --sil-probs        [should be with --pron-probs option]\n" .
+    "                              Expect 3 extra fields after the pron-probs, for aspects of\n" .
+    "                              the silence probability model\n" .
+    "           --first-allowed-disambig <n>  The number of the first disambiguation symbol\n" .
+    "                              that this script is allowed to add.  By default this is\n" .
+    "                              #1, but you can set this to a larger value using this option.\n" .
+    "e.g.:\n" .
+    " add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
+    " add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
+    " add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
+}
+
+
+$lexfn = shift @ARGV;
+$lexoutfn = shift @ARGV;
+
+open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
+
+# (1)  Read in the lexicon.
+@L = ( );
+while(<L>) {
+    @A = split(" ", $_);
+    push @L, join(" ", @A);
+}
+
+# (2) Work out the count of each phone-sequence in the
+# lexicon.
+
+foreach $l (@L) {
+    @A = split(" ", $l);
+    shift @A; # Remove word.
+    if ($pron_probs) {
+      $p = shift @A;
+      if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
+    }
+    if ($sil_probs) {
+      $silp = shift @A;
+      if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
+      $correction = shift @A;
+      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
+      $correction = shift @A;
+      if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
+    }
+    if (!(@A)) {
+      die "Bad lexicon line $1, no phone in phone list";
+    }
+    $count{join(" ",@A)}++;
+}
+
+# (3) For each left sub-sequence of each phone-sequence, note down
+# that it exists (for identifying prefixes of longer strings).
+
+foreach $l (@L) {
+    @A = split(" ", $l);
+    shift @A; # Remove word.
+    if ($pron_probs) { shift @A; } # remove pron-prob.
+    if ($sil_probs) {
+      shift @A; # Remove silprob
+      shift @A; # Remove silprob
+      shift @A; # Remove silprob, there three numbers for sil_probs
+    }
+    while(@A > 0) {
+        pop @A;  # Remove last phone
+        $issubseq{join(" ",@A)} = 1;
+    }
+}
+
+# (4) For each entry in the lexicon:
+#  if the phone sequence is unique and is not a
+#  prefix of another word, no diambig symbol.
+#  Else output #1, or #2, #3, ... if the same phone-seq
+#  has already been assigned a disambig symbol.
+
+
+open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
+
+# max_disambig will always be the highest-numbered disambiguation symbol that
+# has been used so far.
+$max_disambig = $first_allowed_disambig - 1;
+
+foreach $l (@L) {
+  @A = split(" ", $l);
+  $word = shift @A;
+  if ($pron_probs) {
+    $pron_prob = shift @A;
+  }
+  if ($sil_probs) {
+    $sil_word_prob = shift @A;
+    $word_sil_correction = shift @A;
+    $prev_nonsil_correction = shift @A
+  }
+  $phnseq = join(" ", @A);
+  if (!defined $issubseq{$phnseq}
+      && $count{$phnseq} == 1) {
+    ;                           # Do nothing.
+  } else {
+    if ($phnseq eq "") {        # need disambig symbols for the empty string
+      # that are not use anywhere else.
+      $max_disambig++;
+      $reserved_for_the_empty_string{$max_disambig} = 1;
+      $phnseq = "#$max_disambig";
+    } else {
+      $cur_disambig = $last_used_disambig_symbol_of{$phnseq};
+      if (!defined $cur_disambig) {
+        $cur_disambig = $first_allowed_disambig;
+      } else {
+        $cur_disambig++;           # Get a number that has not been used yet for
+                                   # this phone sequence.
+      }
+      while (defined $reserved_for_the_empty_string{$cur_disambig}) {
+        $cur_disambig++;
+      }
+      if ($cur_disambig > $max_disambig) {
+        $max_disambig = $cur_disambig;
+      }
+      $last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
+      $phnseq = $phnseq . " #" . $cur_disambig;
+    }
+  }
+  if ($pron_probs) {
+    if ($sil_probs) {
+      print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
+    } else {
+      print O "$word\t$pron_prob\t$phnseq\n";
+    }
+  } else {
+    print O "$word\t$phnseq\n";
+  }
+}
+
+print $max_disambig . "\n";
diff --git a/egs/timit/asr/simple_v1/local/add_silence_to_transcript.py b/egs/timit/asr/simple_v1/local/add_silence_to_transcript.py
new file mode 100644
index 00000000..685bb2f2
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local/add_silence_to_transcript.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+
+# Copyright 2021 Xiaomi Corporation (Author: Fangjun Kuang)
+'''
+Add silence with a given probability after each word in the transcript.
+
+If the input transcript contains:
+
+    hello world
+    foo bar koo
+    zoo
+
+Then the output transcript **may** look like the following:
+
+    !SIL hello !SIL world !SIL
+    foo bar !SIL koo !SIL
+    !SIL zoo !SIL
+
+(Assume !SIL represents silence.)
+'''
+
+from pathlib import Path
+
+import argparse
+import random
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--transcript',
+                        type=str,
+                        help='The input transcript file.'
+                        'We assume that the transcript file consists of '
+                        'lines. Each line consists of space separated words.')
+    parser.add_argument('--sil-word',
+                        type=str,
+                        default='!SIL',
+                        help='The word that represents silence.')
+    parser.add_argument('--sil-prob',
+                        type=float,
+                        default=0.5,
+                        help='The probability for adding a '
+                        'silence after each world.')
+    parser.add_argument('--seed',
+                        type=int,
+                        default=None,
+                        help='The seed for random number generators.')
+
+    return parser.parse_args()
+
+
+def need_silence(sil_prob: float) -> bool:
+    '''
+    Args:
+      sil_prob:
+        The probability to add a silence.
+    Returns:
+      Return True if a silence is needed.
+      Return False otherwise.
+    '''
+    return random.uniform(0, 1) <= sil_prob
+
+
+def process_line(line: str, sil_word: str, sil_prob: float) -> None:
+    '''Process a single line from the transcript.
+
+    Args:
+      line:
+        A str containing space separated words.
+      sil_word:
+        The symbol indicating silence.
+      sil_prob:
+        The probability for adding a silence after each word.
+    Returns:
+      Return None.
+    '''
+    
+    words = line.strip().split(' ')[1:]
+    
+    for i, word in enumerate(words):
+        if i == 0:
+            # beginning of the line
+            if need_silence(sil_prob):
+                print(sil_word, end=' ')
+
+        print(word, end=' ')
+
+        if need_silence(sil_prob):
+            print(sil_word, end=' ')
+
+        # end of the line, print a new line
+        if i == len(words) - 1:
+            print()
+
+
+def main():
+    args = get_args()
+    random.seed(args.seed)
+
+    assert Path(args.transcript).is_file()
+    assert len(args.sil_word) > 0
+    assert 0 < args.sil_prob < 1
+
+    with open(args.transcript) as f:
+        for line in f:
+            process_line(line=line,
+                         sil_word=args.sil_word,
+                         sil_prob=args.sil_prob)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/egs/timit/asr/simple_v1/local/apply_map.pl b/egs/timit/asr/simple_v1/local/apply_map.pl
new file mode 100644
index 00000000..725d3463
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local/apply_map.pl
@@ -0,0 +1,97 @@
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# This program is a bit like ./sym2int.pl in that it applies a map
+# to things in a file, but it's a bit more general in that it doesn't
+# assume the things being mapped to are single tokens, they could
+# be sequences of tokens.  See the usage message.
+
+
+$permissive = 0;
+
+for ($x = 0; $x <= 2; $x++) {
+
+  if (@ARGV > 0 && $ARGV[0] eq "-f") {
+    shift @ARGV;
+    $field_spec = shift @ARGV;
+    if ($field_spec =~ m/^\d+$/) {
+      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
+    }
+    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
+      if ($1 ne "") {
+        $field_begin = $1 - 1;  # Change to zero-based indexing.
+      }
+      if ($2 ne "") {
+        $field_end = $2 - 1;    # Change to zero-based indexing.
+      }
+    }
+    if (!defined $field_begin && !defined $field_end) {
+      die "Bad argument to -f option: $field_spec";
+    }
+  }
+
+  if (@ARGV > 0 && $ARGV[0] eq '--permissive') {
+    shift @ARGV;
+    # Mapping is optional (missing key is printed to output)
+    $permissive = 1;
+  }
+}
+
+if(@ARGV != 1) {
+  print STDERR "Invalid usage: " . join(" ", @ARGV) . "\n";
+  print STDERR <<'EOF';
+Usage: apply_map.pl [options] map <input >output
+ options: [-f <field-range> ] [--permissive]
+   This applies a map to some specified fields of some input text:
+   For each line in the map file: the first field is the thing we
+   map from, and the remaining fields are the sequence we map it to.
+   The -f (field-range) option says which fields of the input file the map
+   map should apply to.
+   If the --permissive option is supplied, fields which are not present
+   in the map will be left as they were.
+ Applies the map 'map' to all input text, where each line of the map
+ is interpreted as a map from the first field to the list of the other fields
+ Note: <field-range> can look like 4-5, or 4-, or 5-, or 1, it means the field
+ range in the input to apply the map to.
+ e.g.: echo A B | apply_map.pl a.txt
+ where a.txt is:
+ A a1 a2
+ B b
+ will produce:
+ a1 a2 b
+EOF
+  exit(1);
+}
+
+($map_file) = @ARGV;
+open(M, "<$map_file") || die "Error opening map file $map_file: $!";
+
+while (<M>) {
+  @A = split(" ", $_);
+  @A >= 1 || die "apply_map.pl: empty line.";
+  $i = shift @A;
+  $o = join(" ", @A);
+  $map{$i} = $o;
+}
+
+while(<STDIN>) {
+  @A = split(" ", $_);
+  for ($x = 0; $x < @A; $x++) {
+    if ( (!defined $field_begin || $x >= $field_begin)
+         && (!defined $field_end || $x <= $field_end)) {
+      $a = $A[$x];
+      if (!defined $map{$a}) {
+        if (!$permissive) {
+          die "apply_map.pl: undefined key $a in $map_file\n";
+        } else {
+          print STDERR "apply_map.pl: warning! missing key $a in $map_file\n";
+        }
+      } else {
+        $A[$x] = $map{$a};
+      }
+    }
+  }
+  print join(" ", @A) . "\n";
+}
diff --git a/egs/timit/asr/simple_v1/local/arpa2fst.py b/egs/timit/asr/simple_v1/local/arpa2fst.py
new file mode 100644
index 00000000..61a4167d
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local/arpa2fst.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python3
+
+# Copyright 2020 Xiaomi Corporation (Author: Junbo Zhang, Haowen Qiu)
+# Apache 2.0
+"""
+arpa2fst.py:
+Similar to Kaldi's `lmbin/arpa2fst.cc`, rewritten in Python.
+
+Generally, we do the following. Suppose we are adding an n-gram "A B C". Then find the node for "A B", add a new node
+for "A B C", and connect them with the arc accepting "C" with the specified weight. Also, add a backoff arc from the
+new "A B C" node to its backoff state "B C".
+
+Two notable exceptions are the highest order n-grams, and final n-grams.
+
+When adding a highest order n-gram (e. g., our "A B C" is in a 3-gram LM), the following optimization is performed.
+There is no point adding a node for "A B C" with a "C" arc from "A B", since there will be no other arcs ingoing to
+this node, and an epsilon backoff arc into the backoff model "B C", with the weight of \bar{1}. To save a node,
+create an arc accepting "C" directly from "A B" to "B C". This saves as many nodes as there are the highest order
+n-grams, which is typically about half the size of a large 3-gram model.
+
+Indeed, this does not apply to n-grams ending in EOS, since they do not back off. These are special, as they do not
+have a back-off state, and the node for "(..anything..) </s>" is always final. These are handled in one of the two
+possible ways, If symbols <s> and </s> are being replaced by epsilons, neither node nor arc is created,
+and the logprob of the n-gram is applied to its source node as final weight. If <s> and </s> are preserved,
+then a special final node for </s> is allocated and used as the destination of the "</s>" acceptor arc. """
+
+import re
+import argparse
+from typing import List, NamedTuple
+
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="""This script is similar to Kaldi's `lmbin/arpa2fst.cc`,
+                but was rewritten in Python. The output goes to the stdout.""")
+    parser.add_argument('arpa', type=str, help="""The arpa file.""")
+    parser.add_argument('--bos',
+                        type=str,
+                        default='<s>',
+                        help="""The begin symbol.""")
+    parser.add_argument('--eos',
+                        type=str,
+                        default='</s>',
+                        help="""The ending symbol.""")
+    parser.add_argument('--disambig_symbol',
+                        type=str,
+                        default='#0',
+                        help="""The disambig symbol.""")
+    parser.add_argument('--to-natural-log',
+                        type=bool,
+                        default=True,
+                        help="""Convert to natural log.""")
+    args = parser.parse_args()
+    return args
+
+
+class Ngram(NamedTuple):
+    logprob: float
+    prev_words: List
+    cur_word: str
+    backoff: float
+
+
+def parse_arpa_file(filename, to_natural_log=True):
+    ngrams = []
+    with open(filename) as f:
+        stage = 0  # stage 0 for header, 1 for unigram, 2 for bigram, ...
+        for line in f:
+            line = re.sub(r'\s+$', '', line)
+            if re.match(r'^\s*$', line) or re.match(
+                    r'\\(data|end)\\', line) or re.match(r'ngram \d+=', line):
+                continue
+            elif re.match(r'\\\d+-grams:', line):
+                stage = int(re.match(r'\\(\d+)-grams:', line).group(1))
+                ngrams.append([])
+                assert len(ngrams) == stage
+            else:
+                items = re.split(r'\s+', line)
+                assert stage + 1 <= len(
+                    items) <= stage + 2, f'Invalid arpa line: {line}'
+
+                logprob = float(items[0]) if not to_natural_log else float(
+                    items[0]) * 2.30258509299404568402
+                words = items[1:stage + 1]
+                backoff = float(items[stage +
+                                      1]) if len(items) == stage + 2 else 0.0
+                if to_natural_log:
+                    backoff = backoff * 2.30258509299404568402
+
+                if stage == 1:
+                    prev_words = []
+                    assert len(words) == 1
+                    cur_word = words[0]
+                else:
+                    prev_words = words[0:-1]
+                    cur_word = words[-1]
+
+                ngrams[stage - 1].append(
+                    Ngram(logprob=logprob,
+                          prev_words=prev_words,
+                          cur_word=cur_word,
+                          backoff=backoff))
+
+    return ngrams
+
+
+def create_backoff(key, state, state_id, weight, sub_eps):
+    while key not in state_id:
+        key = ' '.join((key.split(' '))[1:])
+    dest = state_id[key]
+    print(f'{state} {dest} {sub_eps} {weight}')
+
+
+# TODO(haowen): refactor the code (and support case where sub_eps is empty?)
+def print_fst_from_ngrams(ngram_lm, bos='<s>', eos='</s>', sub_eps='#0'):
+    # for now this version only support case that disambig sybmol is not zero
+    assert sub_eps != ''
+    highest_order = len(ngram_lm)
+    state_id = {bos: 0, '': 1, eos: 2}
+    state_count = len(state_id)
+    for order, ngrams in enumerate(ngram_lm, start=1):
+        for logprob, prev_words, cur_word, backoff in ngrams:
+            assert len(prev_words) + 1 == order
+            prev_words_str = ' '.join(prev_words)
+            whole_words_str = ' '.join(prev_words + [cur_word])
+            if prev_words_str not in state_id:
+                continue  # no parent (n-1) gram
+            source = state_id[prev_words_str]
+            weight = -logprob
+            assert cur_word != sub_eps
+            if cur_word == eos:
+                if sub_eps == '':
+                    dest = state_id[eos]
+                else:
+                    # treat </s> as if it was epsilon; mark source final
+                    print(f'{source} {weight}')
+                    continue
+            else:
+                key = whole_words_str if order != highest_order else ' '.join(
+                    (prev_words + [cur_word])[1:])
+                if key not in state_id:
+                    state_id[key] = state_count
+                    dest = state_count
+                    state_count += 1
+                    tails = ' '.join((key.split(' '))[1:])
+                    create_backoff(tails, dest, state_id, -backoff, sub_eps)
+                else:
+                    dest = state_id[key]
+
+            if cur_word == bos:
+                weight = 0
+                if sub_eps != '':
+                    continue
+
+            print(f'{source} {dest} {cur_word} {weight}')
+
+    if sub_eps == '':
+        print(f'{state_id[eos]} 0')
+
+
+def main():
+    args = get_args()
+    print_fst_from_ngrams(parse_arpa_file(args.arpa, args.to_natural_log),
+                          args.bos, args.eos, args.disambig_symbol)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/egs/timit/asr/simple_v1/local/convert_transcript_to_corpus.py b/egs/timit/asr/simple_v1/local/convert_transcript_to_corpus.py
new file mode 100644
index 00000000..62c60074
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local/convert_transcript_to_corpus.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+
+# Copyright 2021 Xiaomi Corporation (Author: Fangjun Kuang)
+'''
+Convert a transcript file to a corpus for LM training with
+the help of a lexicon. If the lexicon contains phones, the resulting
+LM will be a phone LM; If the lexicon contains word pieces,
+the resulting LM will be a word piece LM.
+
+If a word has multiple pronunciations, only the first one is used.
+
+If the input transcript is:
+
+    hello zoo world hello
+    world zoo
+    foo zoo world hellO
+
+and if the lexicon is
+
+    <UNK> SPN
+    hello h e l l o 2
+    hello h e l l o
+    world w o r l d
+    zoo z o o
+
+Then the output is
+
+    h e l l o 2 z o o w o r l d h e l l o 2
+    w o r l d z o o
+    SPN z o o w o r l d SPN
+'''
+
+from pathlib import Path
+from typing import Dict
+
+import argparse
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--transcript',
+                        type=str,
+                        help='The input transcript file.'
+                        'We assume that the transcript file consists of '
+                        'lines. Each line consists of space separated words.')
+    parser.add_argument('--lexicon', type=str, help='The input lexicon file.')
+    parser.add_argument('--oov',
+                        type=str,
+                        default='<UNK>',
+                        help='The OOV word.')
+
+    return parser.parse_args()
+
+
+def read_lexicon(filename: str) -> Dict[str, str]:
+    '''
+    Args:
+      filename:
+        Filename to the lexicon. Each line in the lexicon
+        has the following format:
+
+            word p1 p2 p3
+
+        where the first field is a word and the remaining fields
+        are the pronunciations of the word. Fields are separated
+        by spaces.
+    Returns:
+      Return a dict whose keys are words and values are the pronunciations.
+    '''
+    ans = dict()
+    with open(filename) as f:
+        for line in f:
+            line = line.strip()
+
+            if len(line) == 0:
+                # skip empty lines
+                continue
+
+            fields = line.split()
+            assert len(fields) >= 2
+
+            word = fields[0]
+            pron = ' '.join(fields[1:])
+
+            if word not in ans:
+                # In case a word has multiple pronunciations,
+                # we only use the first one
+                ans[word] = pron
+    return ans
+
+
+def process_line(lexicon: Dict[str, str], line: str, oov_pron: str) -> None:
+    '''
+    Args:
+      lexicon:
+        A dict containing pronunciations. Its keys are words and values
+        are pronunciations.
+      line:
+        A line of transcript consisting of space separated words.
+      oov_pron:
+        The pronunciation of the oov word if a word in line is not present
+        in the lexicon.
+    Returns:
+      Return None.
+    '''
+    words = line.strip().split()
+    for i, w in enumerate(words):
+        pron = lexicon.get(w, oov_pron)
+        print(pron, end=' ')
+        if i == len(words) - 1:
+            # end of the line, prints a new line
+            print()
+
+
+def main():
+    args = get_args()
+    assert Path(args.lexicon).is_file()
+    assert Path(args.transcript).is_file()
+    assert len(args.oov) > 0
+
+    lexicon = read_lexicon(args.lexicon)
+    assert args.oov in lexicon
+
+    oov_pron = lexicon[args.oov]
+
+    with open(args.transcript) as f:
+        for line in f:
+            process_line(lexicon=lexicon, line=line, oov_pron=oov_pron)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/egs/timit/asr/simple_v1/local/filter_scp.pl b/egs/timit/asr/simple_v1/local/filter_scp.pl
new file mode 100644
index 00000000..b76d37f4
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local/filter_scp.pl
@@ -0,0 +1,87 @@
+#!/usr/bin/env perl
+# Copyright 2010-2012 Microsoft Corporation
+#                     Johns Hopkins University (author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This script takes a list of utterance-ids or any file whose first field
+# of each line is an utterance-id, and filters an scp
+# file (or any file whose "n-th" field is an utterance id), printing
+# out only those lines whose "n-th" field is in id_list. The index of
+# the "n-th" field is 1, by default, but can be changed by using
+# the -f <n> switch
+
+$exclude = 0;
+$field = 1;
+$shifted = 0;
+
+do {
+  $shifted=0;
+  if ($ARGV[0] eq "--exclude") {
+    $exclude = 1;
+    shift @ARGV;
+    $shifted=1;
+  }
+  if ($ARGV[0] eq "-f") {
+    $field = $ARGV[1];
+    shift @ARGV; shift @ARGV;
+    $shifted=1
+  }
+} while ($shifted);
+
+if(@ARGV < 1 || @ARGV > 2) {
+  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
+      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
+      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
+      "only the lines that were *not* in id_list.\n" .
+      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
+      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
+      "-f option, add 1 to the argument.\n" .
+      "See also: utils/filter_scp.pl .\n";
+}
+
+
+$idlist = shift @ARGV;
+open(F, "<$idlist") || die "Could not open id-list file $idlist";
+while(<F>) {
+  @A = split;
+  @A>=1 || die "Invalid id-list file line $_";
+  $seen{$A[0]} = 1;
+}
+
+if ($field == 1) { # Treat this as special case, since it is common.
+  while(<>) {
+    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
+    # $1 is what we filter on.
+    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
+      print $_;
+    }
+  }
+} else {
+  while(<>) {
+    @A = split;
+    @A > 0 || die "Invalid scp file line $_";
+    @A >= $field || die "Invalid scp file line $_";
+    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
+      print $_;
+    }
+  }
+}
+
+# tests:
+# the following should print "foo 1"
+# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
+# the following should print "bar 2".
+# ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
diff --git a/egs/timit/asr/simple_v1/local/fstaddselfloops.pl b/egs/timit/asr/simple_v1/local/fstaddselfloops.pl
new file mode 100644
index 00000000..133a518e
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local/fstaddselfloops.pl
@@ -0,0 +1,44 @@
+#!/usr/bin/env perl
+
+# Copyright 2020 Xiaomi Corporation (Author: Junbo Zhang)
+# Apache 2.0
+
+use strict;
+use warnings;
+
+my $Usage = <<EOU;
+fstaddselfloops.pl:
+Adds self-loops to states of an FST to propagate disambiguation symbols through it.
+They are added on each final state and each state with non-epsilon output symbols
+on at least one arc out of the state. 
+
+Usage: local/fstaddselfloops.pl <wdisambig_phone> <wdisambig_word> < <openfst_text>
+ e.g.: cat L_disambig.txt | local/fstaddselfloops.pl 347 200004 > L_disambig_with_loop.txt
+EOU
+
+if (@ARGV != 2) {
+  die $Usage;
+}
+
+my $wdisambig_phone = shift @ARGV;
+my $wdisambig_word = shift @ARGV;
+
+my %states_needs_self_loops;
+while (<>) {
+    print $_;
+
+    my @items = split(/\s+/);
+    if (@items == 2) {
+        # it is a final state
+        $states_needs_self_loops{$items[0]} = 1;
+    } elsif (@items == 5) {
+        my ($src, $dst, $inlabel, $outlabel, $score) = @items;
+        $states_needs_self_loops{$src} = 1 if ($outlabel != 0);
+    } else {
+        die "Invalid openfst line.";
+    }
+}
+
+foreach (keys %states_needs_self_loops) {
+    print "$_ $_ $wdisambig_phone $wdisambig_word 0.0\n"
+}
diff --git a/egs/timit/asr/simple_v1/local/make_kn_lm.py b/egs/timit/asr/simple_v1/local/make_kn_lm.py
new file mode 100644
index 00000000..58b721d2
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local/make_kn_lm.py
@@ -0,0 +1,377 @@
+#!/usr/bin/env python3
+
+# Copyright 2016  Johns Hopkins University (Author: Daniel Povey)
+#           2018  Ruizhe Huang
+# Apache 2.0.
+
+# This is an implementation of computing Kneser-Ney smoothed language model
+# in the same way as srilm. This is a back-off, unmodified version of
+# Kneser-Ney smoothing, which produces the same results as the following
+# command (as an example) of srilm:
+#
+# $ ngram-count -order 4 -kn-modify-counts-at-end -ukndiscount -gt1min 0 -gt2min 0 -gt3min 0 -gt4min 0 \
+# -text corpus.txt -lm lm.arpa
+#
+# The data structure is based on: kaldi/egs/wsj/s5/utils/lang/make_phone_lm.py
+# The smoothing algorithm is based on: http://www.speech.sri.com/projects/srilm/manpages/ngram-discount.7.html
+
+import sys
+import os
+import re
+import io
+import math
+import argparse
+from collections import Counter, defaultdict
+
+
+parser = argparse.ArgumentParser(description="""
+    Generate kneser-ney language model as arpa format. By default,
+    it will read the corpus from standard input, and output to standard output.
+    """)
+parser.add_argument("-ngram-order", type=int, default=4, choices=[2, 3, 4, 5, 6, 7], help="Order of n-gram")
+parser.add_argument("-text", type=str, default=None, help="Path to the corpus file")
+parser.add_argument("-lm", type=str, default=None, help="Path to output arpa file for language models")
+parser.add_argument("-verbose", type=int, default=0, choices=[0, 1, 2, 3, 4, 5], help="Verbose level")
+args = parser.parse_args()
+
+default_encoding = "latin-1"  # For encoding-agnostic scripts, we assume byte stream as input.
+                              # Need to be very careful about the use of strip() and split()
+                              # in this case, because there is a latin-1 whitespace character
+                              # (nbsp) which is part of the unicode encoding range.
+                              # Ref: kaldi/egs/wsj/s5/utils/lang/bpe/prepend_words.py @ 69cd717
+strip_chars = " \t\r\n"
+whitespace = re.compile("[ \t]+")
+
+
+class CountsForHistory:
+    # This class (which is more like a struct) stores the counts seen in a
+    # particular history-state.  It is used inside class NgramCounts.
+    # It really does the job of a dict from int to float, but it also
+    # keeps track of the total count.
+    def __init__(self):
+        # The 'lambda: defaultdict(float)' is an anonymous function taking no
+        # arguments that returns a new defaultdict(float).
+        self.word_to_count = defaultdict(int)
+        self.word_to_context = defaultdict(set)  # using a set to count the number of unique contexts
+        self.word_to_f = dict()  # discounted probability
+        self.word_to_bow = dict()  # back-off weight
+        self.total_count = 0
+
+    def words(self):
+        return self.word_to_count.keys()
+
+    def __str__(self):
+        # e.g. returns ' total=12: 3->4, 4->6, -1->2'
+        return ' total={0}: {1}'.format(
+            str(self.total_count),
+            ', '.join(['{0} -> {1}'.format(word, count)
+                      for word, count in self.word_to_count.items()]))
+
+    def add_count(self, predicted_word, context_word, count):
+        assert count >= 0
+
+        self.total_count += count
+        self.word_to_count[predicted_word] += count
+        if context_word is not None:
+            self.word_to_context[predicted_word].add(context_word)
+
+
+class NgramCounts:
+    # A note on data-structure.  Firstly, all words are represented as
+    # integers.  We store n-gram counts as an array, indexed by (history-length
+    # == n-gram order minus one) (note: python calls arrays "lists") of dicts
+    # from histories to counts, where histories are arrays of integers and
+    # "counts" are dicts from integer to float.  For instance, when
+    # accumulating the 4-gram count for the '8' in the sequence '5 6 7 8', we'd
+    # do as follows: self.counts[3][[5,6,7]][8] += 1.0 where the [3] indexes an
+    # array, the [[5,6,7]] indexes a dict, and the [8] indexes a dict.
+    def __init__(self, ngram_order, bos_symbol='<s>', eos_symbol='</s>'):
+        assert ngram_order >= 2
+
+        self.ngram_order = ngram_order
+        self.bos_symbol = bos_symbol
+        self.eos_symbol = eos_symbol
+
+        self.counts = []
+        for n in range(ngram_order):
+            self.counts.append(defaultdict(lambda: CountsForHistory()))
+
+        self.d = []  # list of discounting factor for each order of ngram
+
+    # adds a raw count (called while processing input data).
+    # Suppose we see the sequence '6 7 8 9' and ngram_order=4, 'history'
+    # would be (6,7,8) and 'predicted_word' would be 9; 'count' would be
+    # 1.
+    def add_count(self, history, predicted_word, context_word, count):
+        self.counts[len(history)][history].add_count(predicted_word, context_word, count)
+
+    # 'line' is a string containing a sequence of integer word-ids.
+    # This function adds the un-smoothed counts from this line of text.
+    def add_raw_counts_from_line(self, line):
+        if line == '':
+            words = [self.bos_symbol, self.eos_symbol]
+        else:
+            words = [self.bos_symbol] + whitespace.split(line) + [self.eos_symbol]
+
+        for i in range(len(words)):
+            for n in range(1, self.ngram_order+1):
+                if i + n > len(words):
+                    break
+                ngram = words[i: i + n]
+                predicted_word = ngram[-1]
+                history = tuple(ngram[: -1])
+                if i == 0 or n == self.ngram_order:
+                    context_word = None
+                else:
+                    context_word = words[i-1]
+
+                self.add_count(history, predicted_word, context_word, 1)
+
+    def add_raw_counts_from_standard_input(self):
+        lines_processed = 0
+        infile = io.TextIOWrapper(sys.stdin.buffer, encoding=default_encoding)  # byte stream as input
+        for line in infile:
+            line = line.strip(strip_chars)
+            self.add_raw_counts_from_line(line)
+            lines_processed += 1
+        if lines_processed == 0 or args.verbose > 0:
+            print("make_phone_lm.py: processed {0} lines of input".format(lines_processed), file=sys.stderr)
+
+    def add_raw_counts_from_file(self, filename):
+        lines_processed = 0
+        with open(filename, encoding=default_encoding) as fp:
+            for line in fp:
+                line = line.strip(strip_chars)
+                self.add_raw_counts_from_line(line)
+                lines_processed += 1
+        if lines_processed == 0 or args.verbose > 0:
+            print("make_phone_lm.py: processed {0} lines of input".format(lines_processed), file=sys.stderr)
+
+    def cal_discounting_constants(self):
+        # For each order N of N-grams, we calculate discounting constant D_N = n1_N / (n1_N + 2 * n2_N),
+        # where n1_N is the number of unique N-grams with count = 1 (counts-of-counts).
+        # This constant is used similarly to absolute discounting.
+        # Return value: d is a list of floats, where d[N+1] = D_N
+
+        self.d = [0]  # for the lowest order, i.e., 1-gram, we do not need to discount, thus the constant is 0
+                      # This is a special case: as we currently assumed having seen all vocabularies in the dictionary,
+                      # but perhaps this is not the case for some other scenarios.
+        for n in range(1, self.ngram_order):
+            this_order_counts = self.counts[n]
+            n1 = 0
+            n2 = 0
+            for hist, counts_for_hist in this_order_counts.items():
+                stat = Counter(counts_for_hist.word_to_count.values())
+                n1 += stat[1]
+                n2 += stat[2]
+            assert n1 + 2 * n2 > 0
+            self.d.append(n1 * 1.0 / (n1 + 2 * n2))
+
+    def cal_f(self):
+        # f(a_z) is a probability distribution of word sequence a_z.
+        # Typically f(a_z) is discounted to be less than the ML estimate so we have
+        # some leftover probability for the z words unseen in the context (a_).
+        #
+        # f(a_z) = (c(a_z) - D0) / c(a_)    ;; for highest order N-grams
+        # f(_z)  = (n(*_z) - D1) / n(*_*)	;; for lower order N-grams
+
+        # highest order N-grams
+        n = self.ngram_order - 1
+        this_order_counts = self.counts[n]
+        for hist, counts_for_hist in this_order_counts.items():
+            for w, c in counts_for_hist.word_to_count.items():
+                counts_for_hist.word_to_f[w] = max((c - self.d[n]), 0) * 1.0 / counts_for_hist.total_count
+
+        # lower order N-grams
+        for n in range(0, self.ngram_order - 1):
+            this_order_counts = self.counts[n]
+            for hist, counts_for_hist in this_order_counts.items():
+
+                n_star_star = 0
+                for w in counts_for_hist.word_to_count.keys():
+                    n_star_star += len(counts_for_hist.word_to_context[w])
+
+                if n_star_star != 0:
+                    for w in counts_for_hist.word_to_count.keys():
+                        n_star_z = len(counts_for_hist.word_to_context[w])
+                        counts_for_hist.word_to_f[w] = max((n_star_z - self.d[n]), 0) * 1.0 / n_star_star
+                else:  # patterns begin with <s>, they do not have "modified count", so use raw count instead
+                    for w in counts_for_hist.word_to_count.keys():
+                        n_star_z = counts_for_hist.word_to_count[w]
+                        counts_for_hist.word_to_f[w] = max((n_star_z - self.d[n]), 0) * 1.0 / counts_for_hist.total_count
+
+    def cal_bow(self):
+        # Backoff weights are only necessary for ngrams which form a prefix of a longer ngram.
+        # Thus, two sorts of ngrams do not have a bow:
+        # 1) highest order ngram
+        # 2) ngrams ending in </s>
+        #
+        # bow(a_) = (1 - Sum_Z1 f(a_z)) / (1 - Sum_Z1 f(_z))
+        # Note that Z1 is the set of all words with c(a_z) > 0
+
+        # highest order N-grams
+        n = self.ngram_order - 1
+        this_order_counts = self.counts[n]
+        for hist, counts_for_hist in this_order_counts.items():
+            for w in counts_for_hist.word_to_count.keys():
+                counts_for_hist.word_to_bow[w] = None
+
+        # lower order N-grams
+        for n in range(0, self.ngram_order - 1):
+            this_order_counts = self.counts[n]
+            for hist, counts_for_hist in this_order_counts.items():
+                for w in counts_for_hist.word_to_count.keys():
+                    if w == self.eos_symbol:
+                        counts_for_hist.word_to_bow[w] = None
+                    else:
+                        a_ = hist + (w,)
+
+                        assert len(a_) < self.ngram_order
+                        assert a_ in self.counts[len(a_)].keys()
+
+                        a_counts_for_hist = self.counts[len(a_)][a_]
+
+                        sum_z1_f_a_z = 0
+                        for u in a_counts_for_hist.word_to_count.keys():
+                            sum_z1_f_a_z += a_counts_for_hist.word_to_f[u]
+
+                        sum_z1_f_z = 0
+                        _ = a_[1:]
+                        _counts_for_hist = self.counts[len(_)][_]
+                        for u in a_counts_for_hist.word_to_count.keys():  # Should be careful here: what is Z1
+                            sum_z1_f_z += _counts_for_hist.word_to_f[u]
+
+                        counts_for_hist.word_to_bow[w] = (1.0 - sum_z1_f_a_z) / (1.0 - sum_z1_f_z)
+
+    def print_raw_counts(self, info_string):
+        # these are useful for debug.
+        print(info_string)
+        res = []
+        for this_order_counts in self.counts:
+            for hist, counts_for_hist in this_order_counts.items():
+                for w in counts_for_hist.word_to_count.keys():
+                    ngram = " ".join(hist) + " " + w
+                    ngram = ngram.strip(strip_chars)
+
+                    res.append("{0}\t{1}".format(ngram, counts_for_hist.word_to_count[w]))
+        res.sort(reverse=True)
+        for r in res:
+            print(r)
+
+    def print_modified_counts(self, info_string):
+        # these are useful for debug.
+        print(info_string)
+        res = []
+        for this_order_counts in self.counts:
+            for hist, counts_for_hist in this_order_counts.items():
+                for w in counts_for_hist.word_to_count.keys():
+                    ngram = " ".join(hist) + " " + w
+                    ngram = ngram.strip(strip_chars)
+
+                    modified_count = len(counts_for_hist.word_to_context[w])
+                    raw_count = counts_for_hist.word_to_count[w]
+
+                    if modified_count == 0:
+                        res.append("{0}\t{1}".format(ngram, raw_count))
+                    else:
+                        res.append("{0}\t{1}".format(ngram, modified_count))
+        res.sort(reverse=True)
+        for r in res:
+            print(r)
+
+    def print_f(self, info_string):
+        # these are useful for debug.
+        print(info_string)
+        res = []
+        for this_order_counts in self.counts:
+            for hist, counts_for_hist in this_order_counts.items():
+                for w in counts_for_hist.word_to_count.keys():
+                    ngram = " ".join(hist) + " " + w
+                    ngram = ngram.strip(strip_chars)
+
+                    f = counts_for_hist.word_to_f[w]
+                    if f == 0:  # f(<s>) is always 0
+                        f = 1e-99
+
+                    res.append("{0}\t{1}".format(ngram, math.log(f, 10)))
+        res.sort(reverse=True)
+        for r in res:
+            print(r)
+
+    def print_f_and_bow(self, info_string):
+        # these are useful for debug.
+        print(info_string)
+        res = []
+        for this_order_counts in self.counts:
+            for hist, counts_for_hist in this_order_counts.items():
+                for w in counts_for_hist.word_to_count.keys():
+                    ngram = " ".join(hist) + " " + w
+                    ngram = ngram.strip(strip_chars)
+
+                    f = counts_for_hist.word_to_f[w]
+                    if f == 0:  # f(<s>) is always 0
+                        f = 1e-99
+
+                    bow = counts_for_hist.word_to_bow[w]
+                    if bow is None:
+                        res.append("{1}\t{0}".format(ngram, math.log(f, 10)))
+                    else:
+                        res.append("{1}\t{0}\t{2}".format(ngram, math.log(f, 10), math.log(bow, 10)))
+        res.sort(reverse=True)
+        for r in res:
+            print(r)
+
+    def print_as_arpa(self, fout=io.TextIOWrapper(sys.stdout.buffer, encoding='latin-1')):
+        # print as ARPA format.
+
+        print('\\data\\', file=fout)
+        for hist_len in range(self.ngram_order):
+            # print the number of n-grams.
+            print('ngram {0}={1}'.format(
+                hist_len + 1,
+                sum([len(counts_for_hist.word_to_f) for counts_for_hist in self.counts[hist_len].values()])),
+                file=fout
+            )
+
+        print('', file=fout)
+
+        for hist_len in range(self.ngram_order):
+            print('\\{0}-grams:'.format(hist_len + 1), file=fout)
+
+            this_order_counts = self.counts[hist_len]
+            for hist, counts_for_hist in this_order_counts.items():
+                for word in counts_for_hist.word_to_count.keys():
+                    ngram = hist + (word,)
+                    prob = counts_for_hist.word_to_f[word]
+                    bow = counts_for_hist.word_to_bow[word]
+
+                    if prob == 0:  # f(<s>) is always 0
+                        prob = 1e-99
+
+                    line = '{0}\t{1}'.format('%.7f' % math.log10(prob), ' '.join(ngram))
+                    if bow is not None:
+                        line += '\t{0}'.format('%.7f' % math.log10(bow))
+                    print(line, file=fout)
+            print('', file=fout)
+        print('\\end\\', file=fout)
+
+
+if __name__ == "__main__":
+
+    ngram_counts = NgramCounts(args.ngram_order)
+
+    if args.text is None:
+        ngram_counts.add_raw_counts_from_standard_input()
+    else:
+        assert os.path.isfile(args.text)
+        ngram_counts.add_raw_counts_from_file(args.text)
+
+    ngram_counts.cal_discounting_constants()
+    ngram_counts.cal_f()
+    ngram_counts.cal_bow()
+
+    if args.lm is None:
+        ngram_counts.print_as_arpa()
+    else:
+        with open(args.lm, 'w', encoding=default_encoding) as f:
+            ngram_counts.print_as_arpa(fout=f)
diff --git a/egs/timit/asr/simple_v1/local/make_lexicon_fst.py b/egs/timit/asr/simple_v1/local/make_lexicon_fst.py
new file mode 100644
index 00000000..e22222db
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local/make_lexicon_fst.py
@@ -0,0 +1,411 @@
+#!/usr/bin/env python3
+
+# Copyright   2018  Johns Hopkins University (author: Daniel Povey)
+# Apache 2.0.
+
+# see get_args() below for usage message.
+import argparse
+import os
+import sys
+import math
+import re
+
+# The use of latin-1 encoding does not preclude reading utf-8.  latin-1
+# encoding means "treat words as sequences of bytes", and it is compatible
+# with utf-8 encoding as well as other encodings such as gbk, as long as the
+# spaces are also spaces in ascii (which we check).  It is basically how we
+# emulate the behavior of python before python3.
+sys.stdout = open(1, 'w', encoding='latin-1', closefd=False)
+sys.stderr = open(2, 'w', encoding='latin-1', closefd=False)
+
+def get_args():
+    parser = argparse.ArgumentParser(description="""This script creates the
+       text form of a lexicon FST, to be compiled by fstcompile using the
+       appropriate symbol tables (phones.txt and words.txt) .  It will mostly
+       be invoked indirectly via utils/prepare_lang.sh.  The output goes to
+       the stdout.""")
+
+    parser.add_argument('--sil-phone', dest='sil_phone', type=str,
+                        help="""Text form of optional-silence phone, e.g. 'SIL'.  See also
+                        the --silprob option.""")
+    parser.add_argument('--sil-prob', dest='sil_prob', type=float, default=0.0,
+                        help="""Probability of silence between words (including at the
+                        beginning and end of word sequences).  Must be in the range [0.0, 1.0].
+                        This refers to the optional silence inserted by the lexicon; see
+                        the --silphone option.""")
+    parser.add_argument('--sil-disambig', dest='sil_disambig', type=str,
+                        help="""Disambiguation symbol to disambiguate silence, e.g. #5.
+                        Will only be supplied if you are creating the version of L.fst
+                        with disambiguation symbols, intended for use with cyclic G.fst.
+                        This symbol was introduced to fix a rather obscure source of
+                        nondeterminism of CLG.fst, that has to do with reordering of
+                        disambiguation symbols and phone symbols.""")
+    parser.add_argument('--left-context-phones', dest='left_context_phones', type=str,
+                        help="""Only relevant if --nonterminals is also supplied; this relates
+                        to grammar decoding (see http://kaldi-asr.org/doc/grammar.html or
+                        src/doc/grammar.dox).  Format is a list of left-context phones,
+                        in text form, one per line.  E.g. data/lang/phones/left_context_phones.txt""")
+    parser.add_argument('--nonterminals', type=str,
+                        help="""If supplied, --left-context-phones must also be supplied.
+                        List of user-defined nonterminal symbols such as #nonterm:contact_list,
+                        one per line.  E.g. data/local/dict/nonterminals.txt.""")
+    parser.add_argument('lexiconp', type=str,
+                        help="""Filename of lexicon with pronunciation probabilities
+                        (normally lexiconp.txt), with lines of the form 'word prob p1 p2...',
+                        e.g. 'a   1.0    ay'""")
+    args = parser.parse_args()
+    return args
+
+
+def read_lexiconp(filename):
+    """Reads the lexiconp.txt file in 'filename', with lines like 'word pron p1 p2 ...'.
+    Returns a list of tuples (word, pron_prob, pron), where 'word' is a string,
+   'pron_prob', a float, is the pronunciation probability (which must be >0.0
+    and would normally be <=1.0),  and 'pron' is a list of strings representing phones.
+    An element in the returned list might be ('hello', 1.0, ['h', 'eh', 'l', 'ow']).
+    """
+
+    ans = []
+    found_empty_prons = False
+    found_large_pronprobs = False
+    # See the comment near the top of this file, RE why we use latin-1.
+    with open(filename, 'r', encoding='latin-1') as f:
+        whitespace = re.compile("[ \t]+")
+        for line in f:
+            a = whitespace.split(line.strip(" \t\r\n"))
+            if len(a) < 2:
+                print("{0}: error: found bad line '{1}' in lexicon file {2} ".format(
+                    sys.argv[0], line.strip(" \t\r\n"), filename), file=sys.stderr)
+                sys.exit(1)
+            word = a[0]
+            if word == "<eps>":
+                # This would clash with the epsilon symbol normally used in OpenFst.
+                print("{0}: error: found <eps> as a word in lexicon file "
+                      "{1}".format(line.strip(" \t\r\n"), filename), file=sys.stderr)
+                sys.exit(1)
+            try:
+                pron_prob = float(a[1])
+            except:
+                print("{0}: error: found bad line '{1}' in lexicon file {2}, 2nd field "
+                      "should be pron-prob".format(sys.argv[0], line.strip(" \t\r\n"), filename),
+                      file=sys.stderr)
+                sys.exit(1)
+            prons = a[2:]
+            if pron_prob <= 0.0:
+                print("{0}: error: invalid pron-prob in line '{1}' of lexicon file {1} ".format(
+                    sys.argv[0], line.strip(" \t\r\n"), filename), file=sys.stderr)
+                sys.exit(1)
+            if len(prons) == 0:
+                found_empty_prons = True
+            ans.append( (word, pron_prob, prons) )
+            if pron_prob > 1.0:
+                found_large_pronprobs = True
+    if found_empty_prons:
+        print("{0}: warning: found at least one word with an empty pronunciation "
+              "in lexicon file {1}.".format(sys.argv[0], filename),
+              file=sys.stderr)
+    if found_large_pronprobs:
+        print("{0}: warning: found at least one word with pron-prob >1.0 "
+              "in {1}".format(sys.argv[0], filename), file=sys.stderr)
+
+
+    if len(ans) == 0:
+        print("{0}: error: found no pronunciations in lexicon file {1}".format(
+            sys.argv[0], filename), file=sys.stderr)
+        sys.exit(1)
+    return ans
+
+
+def write_nonterminal_arcs(start_state, loop_state, next_state,
+                           nonterminals, left_context_phones):
+    """This function relates to the grammar-decoding setup, see
+    kaldi-asr.org/doc/grammar.html.  It is called from write_fst_no_silence
+    and write_fst_silence, and writes to the stdout some extra arcs
+    in the lexicon FST that relate to nonterminal symbols.
+    See the section "Special symbols in L.fst,
+    kaldi-asr.org/doc/grammar.html#grammar_special_l.
+       start_state: the start-state of L.fst.
+       loop_state:  the state of high out-degree in L.fst where words leave
+                  and enter.
+       next_state: the number from which this function can start allocating its
+                  own states.  the updated value of next_state will be returned.
+       nonterminals: the user-defined nonterminal symbols as a list of
+          strings, e.g. ['#nonterm:contact_list', ... ].
+       left_context_phones: a list of phones that may appear as left-context,
+          e.g. ['a', 'ah', ... '#nonterm_bos'].
+    """
+    shared_state = next_state
+    next_state += 1
+    final_state = next_state
+    next_state += 1
+
+    print("{src}\t{dest}\t{phone}\t{word}\t{cost}".format(
+        src=start_state, dest=shared_state,
+        phone='#nonterm_begin', word='#nonterm_begin',
+        cost=0.0))
+
+    for nonterminal in nonterminals:
+        print("{src}\t{dest}\t{phone}\t{word}\t{cost}".format(
+            src=loop_state, dest=shared_state,
+            phone=nonterminal, word=nonterminal,
+            cost=0.0))
+    # this_cost equals log(len(left_context_phones)) but the expression below
+    # better captures the meaning.  Applying this cost to arcs keeps the FST
+    # stochatic (sum-to-one, like an HMM), so that if we do weight pushing
+    # things won't get weird.  In the grammar-FST code when we splice things
+    # together we will cancel out this cost, see the function CombineArcs().
+    this_cost = -math.log(1.0 / len(left_context_phones))
+
+    for left_context_phone in left_context_phones:
+        print("{src}\t{dest}\t{phone}\t{word}\t{cost}".format(
+            src=shared_state, dest=loop_state,
+            phone=left_context_phone, word='<eps>', cost=this_cost))
+    # arc from loop-state to a final-state with #nonterm_end as ilabel and olabel
+    print("{src}\t{dest}\t{phone}\t{word}\t{cost}".format(
+        src=loop_state, dest=final_state,
+        phone='#nonterm_end', word='#nonterm_end', cost=0.0))
+    print("{state}\t{final_cost}".format(
+        state=final_state, final_cost=0.0))
+    return next_state
+
+
+
+def write_fst_no_silence(lexicon, nonterminals=None, left_context_phones=None):
+    """Writes the text format of L.fst to the standard output.  This version is for
+    when --sil-prob=0.0, meaning there is no optional silence allowed.
+
+      'lexicon' is a list of 3-tuples (word, pron-prob, prons) as returned by
+        read_lexiconp().
+     'nonterminals', which relates to grammar decoding (see kaldi-asr.org/doc/grammar.html),
+        is either None, or the user-defined nonterminal symbols as a list of
+        strings, e.g. ['#nonterm:contact_list', ... ].
+     'left_context_phones', which also relates to grammar decoding, and must be
+        supplied if 'nonterminals' is supplied is either None or a list of
+        phones that may appear as left-context, e.g. ['a', 'ah', ... '#nonterm_bos'].
+    """
+
+    loop_state = 0
+    next_state = 1  # the next un-allocated state, will be incremented as we go.
+    for (word, pronprob, pron) in lexicon:
+        cost = -math.log(pronprob)
+        cur_state = loop_state
+        for i in range(len(pron) - 1):
+            print("{src}\t{dest}\t{phone}\t{word}\t{cost}".format(
+                src=cur_state,
+                dest=next_state,
+                phone=pron[i],
+                word=(word if i == 0 else '<eps>'),
+                cost=(cost if i == 0 else 0.0)))
+            cur_state = next_state
+            next_state += 1
+
+        i = len(pron) - 1  # note: i == -1 if pron is empty.
+        print("{src}\t{dest}\t{phone}\t{word}\t{cost}".format(
+            src=cur_state,
+            dest=loop_state,
+            phone=(pron[i] if i >= 0 else '<eps>'),
+            word=(word if i <= 0 else '<eps>'),
+            cost=(cost if i <= 0 else 0.0)))
+
+    if nonterminals is not None:
+        next_state = write_nonterminal_arcs(
+            loop_state, loop_state, next_state,
+            nonterminals, left_context_phones)
+
+    print("{state}\t{final_cost}".format(
+        state=loop_state,
+        final_cost=0.0))
+
+
+def write_fst_with_silence(lexicon, sil_prob, sil_phone, sil_disambig,
+                           nonterminals=None, left_context_phones=None):
+    """Writes the text format of L.fst to the standard output.  This version is for
+       when --sil-prob != 0.0, meaning there is optional silence
+     'lexicon' is a list of 3-tuples (word, pron-prob, prons)
+         as returned by read_lexiconp().
+     'sil_prob', which is expected to be strictly between 0.. and 1.0, is the
+         probability of silence
+     'sil_phone' is the silence phone, e.g. "SIL".
+     'sil_disambig' is either None, or the silence disambiguation symbol, e.g. "#5".
+     'nonterminals', which relates to grammar decoding (see kaldi-asr.org/doc/grammar.html),
+        is either None, or the user-defined nonterminal symbols as a list of
+        strings, e.g. ['#nonterm:contact_list', ... ].
+     'left_context_phones', which also relates to grammar decoding, and must be
+        supplied if 'nonterminals' is supplied is either None or a list of
+        phones that may appear as left-context, e.g. ['a', 'ah', ... '#nonterm_bos'].
+    """
+
+    assert sil_prob > 0.0 and sil_prob < 1.0
+    sil_cost = -math.log(sil_prob)
+    no_sil_cost = -math.log(1.0 - sil_prob);
+
+    start_state = 0
+    loop_state = 1  # words enter and leave from here
+    sil_state = 2   # words terminate here when followed by silence; this state
+                    # has a silence transition to loop_state.
+    next_state = 3  # the next un-allocated state, will be incremented as we go.
+
+
+    print('{src}\t{dest}\t{phone}\t{word}\t{cost}'.format(
+        src=start_state, dest=loop_state,
+        phone='<eps>', word='<eps>', cost=no_sil_cost))
+    print('{src}\t{dest}\t{phone}\t{word}\t{cost}'.format(
+        src=start_state, dest=sil_state,
+        phone='<eps>', word='<eps>', cost=sil_cost))
+    if sil_disambig is None:
+        print('{src}\t{dest}\t{phone}\t{word}\t{cost}'.format(
+            src=sil_state, dest=loop_state,
+            phone=sil_phone, word='<eps>', cost=0.0))
+    else:
+        sil_disambig_state = next_state
+        next_state += 1
+        print('{src}\t{dest}\t{phone}\t{word}\t{cost}'.format(
+            src=sil_state, dest=sil_disambig_state,
+            phone=sil_phone, word='<eps>', cost=0.0))
+        print('{src}\t{dest}\t{phone}\t{word}\t{cost}'.format(
+            src=sil_disambig_state, dest=loop_state,
+            phone=sil_disambig, word='<eps>', cost=0.0))
+
+
+    for (word, pronprob, pron) in lexicon:
+        pron_cost = -math.log(pronprob)
+        cur_state = loop_state
+        for i in range(len(pron) - 1):
+            print("{src}\t{dest}\t{phone}\t{word}\t{cost}".format(
+                src=cur_state, dest=next_state,
+                phone=pron[i],
+                word=(word if i == 0 else '<eps>'),
+                cost=(pron_cost if i == 0 else 0.0)))
+            cur_state = next_state
+            next_state += 1
+
+        i = len(pron) - 1  # note: i == -1 if pron is empty.
+        print("{src}\t{dest}\t{phone}\t{word}\t{cost}".format(
+            src=cur_state,
+            dest=loop_state,
+            phone=(pron[i] if i >= 0 else '<eps>'),
+            word=(word if i <= 0 else '<eps>'),
+            cost=no_sil_cost + (pron_cost if i <= 0 else 0.0)))
+        print("{src}\t{dest}\t{phone}\t{word}\t{cost}".format(
+            src=cur_state,
+            dest=sil_state,
+            phone=(pron[i] if i >= 0 else '<eps>'),
+            word=(word if i <= 0 else '<eps>'),
+            cost=sil_cost + (pron_cost if i <= 0 else 0.0)))
+
+    if nonterminals is not None:
+        next_state = write_nonterminal_arcs(
+            start_state, loop_state, next_state,
+            nonterminals, left_context_phones)
+
+    print("{state}\t{final_cost}".format(
+        state=loop_state,
+        final_cost=0.0))
+
+
+
+
+def write_words_txt(orig_lines, highest_numbered_symbol, nonterminals, filename):
+    """Writes updated words.txt to 'filename'.  'orig_lines' is the original lines
+       in the words.txt file as a list of strings (without the newlines);
+       highest_numbered_symbol is the highest numbered symbol in the original
+       words.txt; nonterminals is a list of strings like '#nonterm:foo'."""
+    with open(filename, 'w', encoding='latin-1') as f:
+        for l in orig_lines:
+            print(l, file=f)
+        cur_symbol = highest_numbered_symbol + 1
+        for n in [ '#nonterm_begin', '#nonterm_end' ] + nonterminals:
+            print("{0} {1}".format(n, cur_symbol), file=f)
+            cur_symbol = cur_symbol + 1
+
+
+def read_nonterminals(filename):
+    """Reads the user-defined nonterminal symbols in 'filename', checks that
+       it has the expected format and has no duplicates, and returns the nonterminal
+       symbols as a list of strings, e.g.
+       ['#nonterm:contact_list', '#nonterm:phone_number', ... ]. """
+    ans = [line.strip(" \t\r\n") for line in open(filename, 'r', encoding='latin-1')]
+    if len(ans) == 0:
+        raise RuntimeError("The file {0} contains no nonterminals symbols.".format(filename))
+    for nonterm in ans:
+        if nonterm[:9] != '#nonterm:':
+            raise RuntimeError("In file '{0}', expected nonterminal symbols to start with '#nonterm:', found '{1}'"
+                               .format(filename, nonterm))
+    if len(set(ans)) != len(ans):
+        raise RuntimeError("Duplicate nonterminal symbols are present in file {0}".format(filename))
+    return ans
+
+def read_left_context_phones(filename):
+    """Reads, checks, and returns a list of left-context phones, in text form, one
+       per line.  Returns a list of strings, e.g. ['a', 'ah', ..., '#nonterm_bos' ]"""
+    ans = [line.strip(" \t\r\n") for line in open(filename, 'r', encoding='latin-1')]
+    if len(ans) == 0:
+        raise RuntimeError("The file {0} contains no left-context phones.".format(filename))
+    whitespace = re.compile("[ \t]+")
+    for s in ans:
+        if len(whitespace.split(s)) != 1:
+            raise RuntimeError("The file {0} contains an invalid line '{1}'".format(filename, s)   )
+
+    if len(set(ans)) != len(ans):
+        raise RuntimeError("Duplicate nonterminal symbols are present in file {0}".format(filename))
+    return ans
+
+
+def is_token(s):
+    """Returns true if s is a string and is space-free."""
+    if not isinstance(s, str):
+        return False
+    whitespace = re.compile("[ \t\r\n]+")
+    split_str = whitespace.split(s);
+    return len(split_str) == 1 and s == split_str[0]
+
+
+def main():
+    args = get_args()
+
+    lexicon = read_lexiconp(args.lexiconp)
+
+    if args.nonterminals is None:
+        nonterminals, left_context_phones = None, None
+    else:
+        if args.left_context_phones is None:
+            print("{0}: if --nonterminals is specified, --left-context-phones must also "
+                  "be specified".format(sys.argv[0]))
+            sys.exit(1)
+        nonterminals = read_nonterminals(args.nonterminals)
+        left_context_phones = read_left_context_phones(args.left_context_phones)
+
+    if args.sil_prob == 0.0:
+          write_fst_no_silence(lexicon,
+                               nonterminals=nonterminals,
+                               left_context_phones=left_context_phones)
+    else:
+        # Do some checking that the options make sense.
+        if args.sil_prob < 0.0 or args.sil_prob >= 1.0:
+            print("{0}: invalid value specified --sil-prob={1}".format(
+                sys.argv[0], args.sil_prob), file=sys.stderr)
+            sys.exit(1)
+
+        if not is_token(args.sil_phone):
+            print("{0}: you specified --sil-prob={1} but --sil-phone is set "
+                  "to '{2}'".format(sys.argv[0], args.sil_prob, args.sil_phone),
+                  file=sys.stderr)
+            sys.exit(1)
+        if args.sil_disambig is not None and not is_token(args.sil_disambig):
+            print("{0}: invalid value --sil-disambig='{1}' was specified."
+                  "".format(sys.argv[0], args.sil_disambig), file=sys.stderr)
+            sys.exit(1)
+        write_fst_with_silence(lexicon, args.sil_prob, args.sil_phone,
+                               args.sil_disambig,
+                               nonterminals=nonterminals,
+                               left_context_phones=left_context_phones)
+
+
+
+#    (lines, highest_symbol) = read_words_txt(args.input_words_txt)
+#    nonterminals = read_nonterminals(args.nonterminal_symbols_list)
+#    write_words_txt(lines, highest_symbol, nonterminals, args.output_words_txt)
+
+
+if __name__ == '__main__':
+      main()
diff --git a/egs/timit/asr/simple_v1/local/parse_options.sh b/egs/timit/asr/simple_v1/local/parse_options.sh
new file mode 100644
index 00000000..71fb9e5e
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local/parse_options.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
+#                 Arnab Ghoshal, Karel Vesely
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Parse command-line options.
+# To be sourced by another script (as in ". parse_options.sh").
+# Option format is: --option-name arg
+# and shell variable "option_name" gets set to value "arg."
+# The exception is --help, which takes no arguments, but prints the
+# $help_message variable (if defined).
+
+
+###
+### The --config file options have lower priority to command line
+### options, so we need to import them first...
+###
+
+# Now import all the configs specified by command-line, in left-to-right order
+for ((argpos=1; argpos<$#; argpos++)); do
+  if [ "${!argpos}" == "--config" ]; then
+    argpos_plus1=$((argpos+1))
+    config=${!argpos_plus1}
+    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
+    . $config  # source the config file.
+  fi
+done
+
+
+###
+### Now we process the command line options
+###
+while true; do
+  [ -z "${1:-}" ] && break;  # break if there are no arguments
+  case "$1" in
+    # If the enclosing script is called with --help option, print the help
+    # message and exit.  Scripts should put help messages in $help_message
+    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
+      else printf "$help_message\n" 1>&2 ; fi;
+      exit 0 ;;
+    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
+      exit 1 ;;
+    # If the first command-line argument begins with "--" (e.g. --foo-bar),
+    # then work out the variable name as $name, which will equal "foo_bar".
+    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
+      # Next we test whether the variable in question is undefned-- if so it's
+      # an invalid option and we die.  Note: $0 evaluates to the name of the
+      # enclosing script.
+      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
+      # is undefined.  We then have to wrap this test inside "eval" because
+      # foo_bar is itself inside a variable ($name).
+      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
+
+      oldval="`eval echo \\$$name`";
+      # Work out whether we seem to be expecting a Boolean argument.
+      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
+        was_bool=true;
+      else
+        was_bool=false;
+      fi
+
+      # Set the variable to the right value-- the escaped quotes make it work if
+      # the option had spaces, like --cmd "queue.pl -sync y"
+      eval $name=\"$2\";
+
+      # Check that Boolean-valued arguments are really Boolean.
+      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
+        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
+        exit 1;
+      fi
+      shift 2;
+      ;;
+  *) break;
+  esac
+done
+
+
+# Check for an empty argument to the --cmd option, which can easily occur as a
+# result of scripting errors.
+[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
+
+
+true; # so this script returns exit code 0.
diff --git a/egs/timit/asr/simple_v1/local/prepare_lang.sh b/egs/timit/asr/simple_v1/local/prepare_lang.sh
new file mode 100644
index 00000000..7faf63fc
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local/prepare_lang.sh
@@ -0,0 +1,382 @@
+#!/usr/bin/env bash
+# Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
+#                      Arnab Ghoshal
+#                2014  Guoguo Chen
+#                2015  Hainan Xu
+#                2016  FAU Erlangen (Author: Axel Horndasch)
+#                2020  Xiaomi Corporation (Author: Junbo Zhang)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script prepares a directory such as data/lang/, in the standard format,
+# given a source directory containing a dictionary lexicon.txt in a form like:
+# word phone1 phone2 ... phoneN
+# per line (alternate prons would be separate lines), or a dictionary with probabilities
+# called lexiconp.txt in a form:
+# word pron-prob phone1 phone2 ... phoneN
+# (with 0.0 < pron-prob <= 1.0); note: if lexiconp.txt exists, we use it even if
+# lexicon.txt exists.
+# and also files silence_phones.txt, nonsilence_phones.txt, optional_silence.txt
+# and extra_questions.txt
+# Here, silence_phones.txt and nonsilence_phones.txt are lists of silence and
+# non-silence phones respectively (where silence includes various kinds of
+# noise, laugh, cough, filled pauses etc., and nonsilence phones includes the
+# "real" phones.)
+# In each line of those files is a list of phones, and the phones on each line
+# are assumed to correspond to the same "base phone", i.e. they will be
+# different stress or tone variations of the same basic phone.
+# The file "optional_silence.txt" contains just a single phone (typically SIL)
+# which is used for optional silence in the lexicon.
+# extra_questions.txt might be empty; typically will consist of lists of phones,
+# all members of each list with the same stress or tone; and also possibly a
+# list for the silence phones.  This will augment the automatically generated
+# questions (note: the automatically generated ones will treat all the
+# stress/tone versions of a phone the same, so will not "get to ask" about
+# stress or tone).
+#
+
+# This script adds word-position-dependent phones and constructs a host of other
+# derived files, that go in data/lang/.
+
+# Begin configuration section.
+num_sil_states=5
+num_nonsil_states=3
+position_dependent_phones=true
+# position_dependent_phones is false also when position dependent phones and word_boundary.txt
+# have been generated by another source
+share_silence_phones=false  # if true, then share pdfs of different silence
+                            # phones together.
+sil_prob=0.5
+num_extra_phone_disambig_syms=1 # Standard one phone disambiguation symbol is used for optional silence.
+                                # Increasing this number does not harm, but is only useful if you later
+                                # want to introduce this labels to L_disambig.fst
+
+
+# end configuration sections
+
+echo "$0 $@"  # Print the command line for logging
+
+. local/parse_options.sh
+
+if [ $# -ne 4 ]; then
+  echo "Usage: local/prepare_lang.sh <dict-src-dir> <oov-dict-entry> <tmp-dir> <lang-dir>"
+  echo "e.g.: local/prepare_lang.sh data/local/dict <SPOKEN_NOISE> data/local/lang data/lang"
+  echo "<dict-src-dir> should contain the following files:"
+  echo " extra_questions.txt  lexicon.txt nonsilence_phones.txt  optional_silence.txt  silence_phones.txt"
+  echo "See http://kaldi-asr.org/doc/data_prep.html#data_prep_lang_creating for more info."
+  echo "options: "
+  echo "<dict-src-dir> may also, for the grammar-decoding case (see http://kaldi-asr.org/doc/grammar.html)"
+  echo "contain a file nonterminals.txt containing symbols like #nonterm:contact_list, one per line."
+  echo "     --num-sil-states <number of states>             # default: 5, #states in silence models."
+  echo "     --num-nonsil-states <number of states>          # default: 3, #states in non-silence models."
+  echo "     --position-dependent-phones (true|false)        # default: true; if true, use _B, _E, _S & _I"
+  echo "                                                     # markers on phones to indicate word-internal positions. "
+  echo "     --share-silence-phones (true|false)             # default: false; if true, share pdfs of "
+  echo "                                                     # all silence phones. "
+  echo "     --sil-prob <probability of silence>             # default: 0.5 [must have 0 <= silprob < 1]"
+  exit 1;
+fi
+
+srcdir=$1
+oov_word=$2
+tmpdir=$3
+dir=$4
+
+
+if [ -d $dir/phones ]; then
+  rm -r $dir/phones
+fi
+mkdir -p $dir $tmpdir $dir/phones
+
+silprob=false
+[ -f $srcdir/lexiconp_silprob.txt ] && silprob=true
+
+[ -f path.sh ] && . ./path.sh
+
+if [[ ! -f $srcdir/lexicon.txt ]]; then
+  echo "**Creating $srcdir/lexicon.txt from $srcdir/lexiconp.txt"
+  perl -ape 's/(\S+\s+)\S+\s+(.+)/$1$2/;' < $srcdir/lexiconp.txt > $srcdir/lexicon.txt || exit 1;
+fi
+if [[ ! -f $srcdir/lexiconp.txt ]]; then
+  echo "**Creating $srcdir/lexiconp.txt from $srcdir/lexicon.txt"
+  perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $srcdir/lexiconp.txt || exit 1;
+fi
+
+if [ ! -z "$unk_fst" ] && [ ! -f "$unk_fst" ]; then
+  echo "$0: expected --unk-fst $unk_fst to exist as a file"
+  exit 1
+fi
+
+if $position_dependent_phones; then
+  # Create $tmpdir/lexiconp.txt from $srcdir/lexiconp.txt (or
+  # $tmpdir/lexiconp_silprob.txt from $srcdir/lexiconp_silprob.txt) by
+  # adding the markers _B, _E, _S, _I depending on word position.
+  # In this recipe, these markers apply to silence also.
+  # Do this starting from lexiconp.txt only.
+  if "$silprob"; then
+    perl -ane '@A=split(" ",$_); $w = shift @A; $p = shift @A; $silword_p = shift @A;
+              $wordsil_f = shift @A; $wordnonsil_f = shift @A; @A>0||die;
+         if(@A==1) { print "$w $p $silword_p $wordsil_f $wordnonsil_f $A[0]_S\n"; }
+         else { print "$w $p $silword_p $wordsil_f $wordnonsil_f $A[0]_B ";
+         for($n=1;$n<@A-1;$n++) { print "$A[$n]_I "; } print "$A[$n]_E\n"; } ' \
+                < $srcdir/lexiconp_silprob.txt > $tmpdir/lexiconp_silprob.txt
+  else
+    perl -ane '@A=split(" ",$_); $w = shift @A; $p = shift @A; @A>0||die;
+         if(@A==1) { print "$w $p $A[0]_S\n"; } else { print "$w $p $A[0]_B ";
+         for($n=1;$n<@A-1;$n++) { print "$A[$n]_I "; } print "$A[$n]_E\n"; } ' \
+         < $srcdir/lexiconp.txt > $tmpdir/lexiconp.txt || exit 1;
+  fi
+
+  # create $tmpdir/phone_map.txt
+  # this has the format (on each line)
+  # <original phone> <version 1 of original phone> <version 2> ...
+  # where the versions depend on the position of the phone within a word.
+  # For instance, we'd have:
+  # AA AA_B AA_E AA_I AA_S
+  # for (B)egin, (E)nd, (I)nternal and (S)ingleton
+  # and in the case of silence
+  # SIL SIL SIL_B SIL_E SIL_I SIL_S
+  # [because SIL on its own is one of the variants; this is for when it doesn't
+  #  occur inside a word but as an option in the lexicon.]
+
+  # This phone map expands the phone lists into all the word-position-dependent
+  # versions of the phone lists.
+  cat <(set -f; for x in `cat $srcdir/silence_phones.txt`; do for y in "" "" "_B" "_E" "_I" "_S"; do echo -n "$x$y "; done; echo; done) \
+    <(set -f; for x in `cat $srcdir/nonsilence_phones.txt`; do for y in "" "_B" "_E" "_I" "_S"; do echo -n "$x$y "; done; echo; done) \
+    > $tmpdir/phone_map.txt
+else
+  if "$silprob"; then
+    cp $srcdir/lexiconp_silprob.txt $tmpdir/lexiconp_silprob.txt
+  else
+    cp $srcdir/lexiconp.txt $tmpdir/lexiconp.txt
+  fi
+
+  cat $srcdir/silence_phones.txt $srcdir/nonsilence_phones.txt | \
+    awk '{for(n=1;n<=NF;n++) print $n; }' > $tmpdir/phones
+  paste -d' ' $tmpdir/phones $tmpdir/phones > $tmpdir/phone_map.txt
+fi
+
+
+# Making monophone systems.
+cat $srcdir/silence_phones.txt | local/apply_map.pl $tmpdir/phone_map.txt | \
+  awk '{for(n=1;n<=NF;n++) print $n;}' > $dir/phones/silence.txt
+cat $srcdir/nonsilence_phones.txt | local/apply_map.pl $tmpdir/phone_map.txt | \
+  awk '{for(n=1;n<=NF;n++) print $n;}' > $dir/phones/nonsilence.txt
+cp $srcdir/optional_silence.txt $dir/phones/optional_silence.txt
+
+# if extra_questions.txt is empty, it's OK.
+cat $srcdir/extra_questions.txt 2>/dev/null | local/apply_map.pl $tmpdir/phone_map.txt \
+  >$dir/phones/extra_questions.txt
+
+# Want extra questions about the word-start/word-end stuff. Make it separate for
+# silence and non-silence. Probably doesn't matter, as silence will rarely
+# be inside a word.
+if $position_dependent_phones; then
+  for suffix in _B _E _I _S; do
+    (set -f; for x in `cat $srcdir/nonsilence_phones.txt`; do echo -n "$x$suffix "; done; echo) >>$dir/phones/extra_questions.txt
+  done
+  for suffix in "" _B _E _I _S; do
+    (set -f; for x in `cat $srcdir/silence_phones.txt`; do echo -n "$x$suffix "; done; echo) >>$dir/phones/extra_questions.txt
+  done
+fi
+
+# add_lex_disambig.pl is responsible for adding disambiguation symbols to
+# the lexicon, for telling us how many disambiguation symbols it used,
+# and also for modifying the unknown-word's pronunciation (if the
+# --unk-fst was provided) to the sequence "#1 #2 #3", and reserving those
+# disambig symbols for that purpose.
+# The #2 will later be replaced with the actual unk model.  The reason
+# for the #1 and the #3 is for disambiguation and also to keep the
+# FST compact.  If we didn't have the #1, we might have a different copy of
+# the unk-model FST, or at least some of its arcs, for each start-state from
+# which an <unk> transition comes (instead of per end-state, which is more compact);
+# and adding the #3 prevents us from potentially having 2 copies of the unk-model
+# FST due to the optional-silence [the last phone of any word gets 2 arcs].
+if [ ! -z "$unk_fst" ]; then  # if the --unk-fst option was provided...
+  if "$silprob"; then
+    local/lang/internal/modify_unk_pron.py $tmpdir/lexiconp_silprob.txt "$oov_word" || exit 1
+  else
+    local/lang/internal/modify_unk_pron.py $tmpdir/lexiconp.txt "$oov_word" || exit 1
+  fi
+  unk_opt="--first-allowed-disambig 4"
+else
+  unk_opt=
+fi
+
+if "$silprob"; then
+  ndisambig=$(local/add_lex_disambig.pl $unk_opt --pron-probs --sil-probs $tmpdir/lexiconp_silprob.txt $tmpdir/lexiconp_silprob_disambig.txt)
+else
+  ndisambig=$(local/add_lex_disambig.pl $unk_opt --pron-probs $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt)
+fi
+ndisambig=$[$ndisambig+$num_extra_phone_disambig_syms]; # add (at least) one disambig symbol for silence in lexicon FST.
+echo $ndisambig > $tmpdir/lex_ndisambig
+
+# Format of lexiconp_disambig.txt:
+# !SIL	1.0   SIL_S
+# <SPOKEN_NOISE>	1.0   SPN_S #1
+# <UNK>	1.0  SPN_S #2
+# <NOISE>	1.0  NSN_S
+# !EXCLAMATION-POINT	1.0  EH2_B K_I S_I K_I L_I AH0_I M_I EY1_I SH_I AH0_I N_I P_I OY2_I N_I T_E
+
+( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) >$dir/phones/disambig.txt
+
+# Create phone symbol table.
+echo "<eps>" | cat - $dir/phones/{silence,nonsilence,disambig}.txt | \
+  awk '{n=NR-1; print $1, n;}' > $dir/phones.txt
+
+# Create a file that describes the word-boundary information for
+# each phone.  5 categories.
+if $position_dependent_phones; then
+  cat $dir/phones/{silence,nonsilence}.txt | \
+    awk '/_I$/{print $1, "internal"; next;} /_B$/{print $1, "begin"; next; }
+         /_S$/{print $1, "singleton"; next;} /_E$/{print $1, "end"; next; }
+         {print $1, "nonword";} ' > $dir/phones/word_boundary.txt
+else
+  # word_boundary.txt might have been generated by another source
+  [ -f $srcdir/word_boundary.txt ] && cp $srcdir/word_boundary.txt $dir/phones/word_boundary.txt
+fi
+
+# Create word symbol table.
+# <s> and </s> are only needed due to the need to rescore lattices with
+# ConstArpaLm format language model. They do not normally appear in G.fst or
+# L.fst.
+
+if "$silprob"; then
+  # remove the silprob
+  cat $tmpdir/lexiconp_silprob.txt |\
+    awk '{
+      for(i=1; i<=NF; i++) {
+        if(i!=3 && i!=4 && i!=5) printf("%s\t", $i); if(i==NF) print "";
+      }
+    }' > $tmpdir/lexiconp.txt
+fi
+
+cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | uniq  | awk '
+  BEGIN {
+    print "<eps> 0";
+  }
+  {
+    if ($1 == "<s>") {
+      print "<s> is in the vocabulary!" | "cat 1>&2"
+      exit 1;
+    }
+    if ($1 == "</s>") {
+      print "</s> is in the vocabulary!" | "cat 1>&2"
+      exit 1;
+    }
+    printf("%s %d\n", $1, NR);
+  }
+  END {
+    printf("#0 %d\n", NR+1);
+    printf("<s> %d\n", NR+2);
+    printf("</s> %d\n", NR+3); 
+  }' > $dir/words.txt || exit 1;
+
+# format of $dir/words.txt:
+#<eps> 0
+#a 1
+#aa 2
+#aarvark 3
+#...
+
+silphone=`cat $srcdir/optional_silence.txt` || exit 1;
+[ -z "$silphone" ] && \
+  ( echo "You have no optional-silence phone; it is required in the current scripts"
+    echo "but you may use the option --sil-prob 0.0 to stop it being used." ) && \
+   exit 1;
+
+grammar_opts=
+
+# Create the basic L.fst without disambiguation symbols, for use
+# in training.
+
+if $silprob; then
+  # Add silence probabilities (models the prob. of silence before and after each
+  # word).  On some setups this helps a bit.  See local/dict_dir_add_pronprobs.sh
+  # and where it's called in the example scripts (run.sh).
+  local/make_lexicon_fst_silprob.py $grammar_opts --sil-phone=$silphone \
+    $tmpdir/lexiconp_silprob.txt $srcdir/silprob.txt | \
+    local/sym2int.pl -f 3 $dir/phones.txt | \
+    local/sym2int.pl -f 4 $dir/words.txt  > $dir/L.fst.txt || exit 1;
+
+    # fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
+    #   --keep_isymbols=false --keep_osymbols=false |   \
+    # fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
+else
+  local/make_lexicon_fst.py $grammar_opts --sil-prob=$sil_prob --sil-phone=$silphone \
+    $tmpdir/lexiconp.txt | \
+    local/sym2int.pl -f 3 $dir/phones.txt | \
+    local/sym2int.pl -f 4 $dir/words.txt > $dir/L.fst.txt || exit 1;
+
+    # fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
+    #   --keep_isymbols=false --keep_osymbols=false | \
+    # fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
+fi
+
+# The file oov.txt contains a word that we will map any OOVs to during
+# training.
+echo "$oov_word" > $dir/oov.txt || exit 1;
+cat $dir/oov.txt | local/sym2int.pl $dir/words.txt >$dir/oov.int || exit 1;
+# integer version of oov symbol, used in some scripts.
+
+
+# the file wdisambig.txt contains a (line-by-line) list of the text-form of the
+# disambiguation symbols that are used in the grammar and passed through by the
+# lexicon.  At this stage it's hardcoded as '#0', but we're laying the groundwork
+# for more generality (which probably would be added by another script).
+# wdisambig_words.int contains the corresponding list interpreted by the
+# symbol table words.txt, and wdisambig_phones.int contains the corresponding
+# list interpreted by the symbol table phones.txt.
+echo '#0' >$dir/phones/wdisambig.txt
+
+wdisambig_phone=`local/sym2int.pl $dir/phones.txt <$dir/phones/wdisambig.txt`
+wdisambig_word=`local/sym2int.pl $dir/words.txt <$dir/phones/wdisambig.txt`
+
+# Create these lists of phones in colon-separated integer list form too,
+# for purposes of being given to programs as command-line options.
+for f in silence nonsilence optional_silence disambig; do
+  local/sym2int.pl $dir/phones.txt <$dir/phones/$f.txt >$dir/phones/$f.int
+  local/sym2int.pl $dir/phones.txt <$dir/phones/$f.txt | \
+   awk '{printf(":%d", $1);} END{printf "\n"}' | sed s/:// > $dir/phones/$f.csl || exit 1;
+done
+
+if [ -f $dir/phones/word_boundary.txt ]; then
+  local/sym2int.pl -f 1 $dir/phones.txt <$dir/phones/word_boundary.txt \
+    > $dir/phones/word_boundary.int || exit 1;
+fi
+
+silphonelist=`cat $dir/phones/silence.csl`
+nonsilphonelist=`cat $dir/phones/nonsilence.csl`
+
+# Create the lexicon FST with disambiguation symbols, and put it in lang_test.
+# There is an extra step where we create a loop to "pass through" the
+# disambiguation symbols from G.fst.
+
+if $silprob; then
+  local/make_lexicon_fst_silprob.py $grammar_opts \
+    --sil-phone=$silphone --sil-disambig='#'$ndisambig \
+    $tmpdir/lexiconp_silprob_disambig.txt $srcdir/silprob.txt | \
+    local/sym2int.pl -f 3 $dir/phones.txt | \
+    local/sym2int.pl -f 4 $dir/words.txt | \
+    local/fstaddselfloops.pl $wdisambig_phone $wdisambig_word > $dir/L_disambig.fst.txt || exit 1;
+else
+  local/make_lexicon_fst.py $grammar_opts \
+    --sil-prob=$sil_prob --sil-phone=$silphone --sil-disambig='#'$ndisambig \
+    $tmpdir/lexiconp_disambig.txt | \
+    local/sym2int.pl -f 3 $dir/phones.txt | \
+    local/sym2int.pl -f 4 $dir/words.txt | \
+    local/fstaddselfloops.pl $wdisambig_phone $wdisambig_word > $dir/L_disambig.fst.txt || exit 1;
+fi
+
+exit 0;
diff --git a/egs/timit/asr/simple_v1/local/sym2int.pl b/egs/timit/asr/simple_v1/local/sym2int.pl
new file mode 100644
index 00000000..58167200
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local/sym2int.pl
@@ -0,0 +1,101 @@
+#!/usr/bin/env perl
+# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+$ignore_oov = 0;
+
+for($x = 0; $x < 2; $x++) {
+  if ($ARGV[0] eq "--map-oov") {
+    shift @ARGV;
+    $map_oov = shift @ARGV;
+    if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
+      # disallow '-f', the empty string and anything ending in words.txt as the
+      # OOV symbol because these are likely command-line errors.
+      die "the --map-oov option requires an argument";
+    }
+  }
+  if ($ARGV[0] eq "-f") {
+    shift @ARGV;
+    $field_spec = shift @ARGV;
+    if ($field_spec =~ m/^\d+$/) {
+      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
+    }
+    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesy (properly, 1-10)
+      if ($1 ne "") {
+        $field_begin = $1 - 1;  # Change to zero-based indexing.
+      }
+      if ($2 ne "") {
+        $field_end = $2 - 1;    # Change to zero-based indexing.
+      }
+    }
+    if (!defined $field_begin && !defined $field_end) {
+      die "Bad argument to -f option: $field_spec";
+    }
+  }
+}
+
+$symtab = shift @ARGV;
+if (!defined $symtab) {
+  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
+    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
+      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
+}
+open(F, "<$symtab") || die "Error opening symbol table file $symtab";
+while(<F>) {
+    @A = split(" ", $_);
+    @A == 2 || die "bad line in symbol table file: $_";
+    $sym2int{$A[0]} = $A[1] + 0;
+}
+
+if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
+  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
+  $map_oov = $sym2int{$map_oov};
+}
+
+$num_warning = 0;
+$max_warning = 20;
+
+while (<>) {
+  @A = split(" ", $_);
+  @B = ();
+  for ($n = 0; $n < @A; $n++) {
+    $a = $A[$n];
+    if ( (!defined $field_begin || $n >= $field_begin)
+         && (!defined $field_end || $n <= $field_end)) {
+      $i = $sym2int{$a};
+      if (!defined ($i)) {
+        if (defined $map_oov) {
+          if ($num_warning++ < $max_warning) {
+            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
+            if ($num_warning == $max_warning) {
+              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
+            }
+          }
+          $i = $map_oov;
+        }
+      }
+      $a = $i;
+    }
+    push @B, $a;
+  }
+  print join(" ", @B);
+  print "\n";
+}
+if ($num_warning > 0) {
+  print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
+}
+
+exit(0);
diff --git a/egs/timit/asr/simple_v1/local2/phones.60-48-39.map b/egs/timit/asr/simple_v1/local2/phones.60-48-39.map
new file mode 100644
index 00000000..4ebcc140
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local2/phones.60-48-39.map
@@ -0,0 +1,61 @@
+aa	aa	aa
+ae	ae	ae
+ah	ah	ah
+ao	ao	aa
+aw	aw	aw
+ax	ax	ah
+ax-h	ax	ah
+axr	er	er
+ay	ay	ay
+b	b	b
+bcl	vcl	sil
+ch	ch	ch
+d	d	d
+dcl	vcl	sil
+dh	dh	dh
+dx	dx	dx
+eh	eh	eh
+el	el	l
+em	m	m
+en	en	n
+eng	ng	ng
+epi	epi	sil
+er	er	er
+ey	ey	ey
+f	f	f
+g	g	g
+gcl	vcl	sil
+h#	sil	sil
+hh	hh	hh
+hv	hh	hh
+ih	ih	ih
+ix	ix	ih
+iy	iy	iy
+jh	jh	jh
+k	k	k
+kcl	cl	sil
+l	l	l
+m	m	m
+n	n	n
+ng	ng	ng
+nx	n	n
+ow	ow	ow
+oy	oy	oy
+p	p	p
+pau	sil	sil
+pcl	cl	sil
+q
+r	r	r
+s	s	s
+sh	sh	sh
+t	t	t
+tcl	cl	sil
+th	th	th
+uh	uh	uh
+uw	uw	uw
+ux	uw	uw
+v	v	v
+w	w	w
+y	y	y
+z	z	z
+zh	zh	sh
diff --git a/egs/timit/asr/simple_v1/local2/timit_data_prep.sh b/egs/timit/asr/simple_v1/local2/timit_data_prep.sh
new file mode 100644
index 00000000..c5ca25c7
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local2/timit_data_prep.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+
+# Copyright 2013   (Authors: Bagher BabaAli, Daniel Povey, Arnab Ghoshal)
+#           2014   Brno University of Technology (Author: Karel Vesely)
+# Apache 2.0.
+
+if [ $# -ne 2 ]; then
+   echo "Usage: local/timit_data_prep.sh <TIMIT-data-dir> <num-phones>"
+   echo "e.g.: local/timit_data_prep.sh data 48"
+   exit 1;
+fi
+
+dir=`pwd`/data/local/data
+mkdir -p $dir
+local=`pwd`/local
+utils=`pwd`/utils
+conf=`pwd`/local2
+
+. ./path.sh 
+
+# First check if the train & test directories exist (these can either be upper-
+# or lower-cased
+if [ ! -d $1/TRAIN -o ! -d $1/TEST ] && [ ! -d $1/train -o ! -d $1/test ]; then
+  echo "timit_data_prep.sh: Spot check of command line argument failed"
+  echo "Command line argument must be absolute pathname to TIMIT directory"
+  echo "with name like /export/corpora5/LDC/LDC93S1/timit/TIMIT"
+  exit 1;
+fi
+
+# Now check what case the directory structure is
+uppercased=false
+train_dir=train
+test_dir=test
+if [ -d $1/TRAIN ]; then
+  uppercased=true
+  train_dir=TRAIN
+  test_dir=TEST
+fi
+
+tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
+trap 'rm -rf "$tmpdir"' EXIT
+
+# Get the list of speakers. The list of speakers in the 24-speaker core test
+# set and the 50-speaker development set must be supplied to the script. All
+# speakers in the 'train' directory are used for training.
+if $uppercased; then 
+  ls -d "$1"/TRAIN/DR*/* | sed -e "s:^.*/::" > $tmpdir/train_spk
+else
+  ls -d "$1"/train/dr*/* | sed -e "s:^.*/::" > $tmpdir/train_spk
+fi
+
+cd $dir
+for x in train; do
+  # First, find the list of audio files (use only si & sx utterances).
+  # Note: train & test sets are under different directories, but doing find on
+  # both and grepping for the speakers will work correctly.
+  find $1/{$train_dir,$test_dir} -not \( -iname 'SA*' \) -iname '*.WAV' \
+    | grep -f $tmpdir/${x}_spk > ${x}_sph.flist
+
+  sed -e 's:.*/\(.*\)/\(.*\).\(WAV\|wav\)$:\1_\2:' ${x}_sph.flist \
+    > $tmpdir/${x}_sph.uttids
+  paste $tmpdir/${x}_sph.uttids ${x}_sph.flist \
+    | sort -k1,1 > ${x}_sph.scp
+
+  cat ${x}_sph.scp | awk '{print $1}' > ${x}.uttids
+
+  # Now, Convert the transcripts into our format (no normalization yet)
+  # Get the transcripts: each line of the output contains an utterance
+  # ID followed by the transcript.
+  find $1/{$train_dir,$test_dir} -not \( -iname 'SA*' \) -iname '*.PHN' \
+    | grep -f $tmpdir/${x}_spk > $tmpdir/${x}_phn.flist
+  sed -e 's:.*/\(.*\)/\(.*\).\(PHN\|phn\)$:\1_\2:' $tmpdir/${x}_phn.flist \
+    > $tmpdir/${x}_phn.uttids
+  while read line; do
+    [ -f $line ] || error_exit "Cannot find transcription file '$line'";
+    cut -f3 -d' ' "$line" | tr '\n' ' ' | perl -ape 's: *$:\n:;'
+  done < $tmpdir/${x}_phn.flist > $tmpdir/${x}_phn.trans
+  paste $tmpdir/${x}_phn.uttids $tmpdir/${x}_phn.trans \
+    | sort -k1,1 > ${x}.trans
+
+  # Do normalization steps.
+  cat ${x}.trans | $conf/timit_norm_trans.pl -i - -m $conf/phones.60-48-39.map -to $2 | sort > $x.text || exit 1;
+
+done
+
+echo "Data preparation succeeded"
+
diff --git a/egs/timit/asr/simple_v1/local2/timit_norm_trans.pl b/egs/timit/asr/simple_v1/local2/timit_norm_trans.pl
new file mode 100644
index 00000000..7394323c
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local2/timit_norm_trans.pl
@@ -0,0 +1,92 @@
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
+
+# Copyright 2012  Arnab Ghoshal
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This script normalizes the TIMIT phonetic transcripts that have been 
+# extracted in a format where each line contains an utterance ID followed by 
+# the transcript, e.g.:
+# fcke0_si1111 h# hh ah dx ux w iy dcl d ix f ay n ih q h#
+
+my $usage = "Usage: timit_norm_trans.pl -i transcript -m phone_map -from [60|48] -to [48|39] > normalized\n
+Normalizes phonetic transcriptions for TIMIT, by mapping the phones to a 
+smaller set defined by the -m option. This script assumes that the mapping is 
+done in the \"standard\" fashion, i.e. to 48 or 39 phones.  The input is 
+assumed to have 60 phones (+1 for glottal stop, which is deleted), but that can
+be changed using the -from option. The input format is assumed to be utterance 
+ID followed by transcript on the same line.\n";
+
+use strict;
+use Getopt::Long;
+die "$usage" unless(@ARGV >= 1);
+my ($in_trans, $phone_map, $num_phones_out);
+my $num_phones_in = 60;
+GetOptions ("i=s" => \$in_trans,          # Input transcription
+	    "m=s" => \$phone_map,         # File containing phone mappings
+	    "from=i" => \$num_phones_in,  # Input #phones: must be 60 or 48
+	    "to=i" => \$num_phones_out ); # Output #phones: must be 48 or 39
+
+die $usage unless(defined($in_trans) && defined($phone_map) && 
+		  defined($num_phones_out));
+if ($num_phones_in != 60 && $num_phones_in != 48) {
+  die "Can only used 60 or 48 for -from (used $num_phones_in)."
+}
+if ($num_phones_out != 48 && $num_phones_out != 39) {
+  die "Can only used 48 or 39 for -to (used $num_phones_out)."
+}
+unless ($num_phones_out < $num_phones_in) {
+  die "Argument to -from ($num_phones_in) must be greater than that to -to ($num_phones_out)."
+}
+
+
+open(M, "<$phone_map") or die "Cannot open mappings file '$phone_map': $!";
+my (%phonemap, %seen_phones);
+my $num_seen_phones = 0;
+while (<M>) {
+  chomp;
+  next if ($_ =~ /^q\s*.*$/); # Ignore glottal stops.
+  m:^(\S+)\s+(\S+)\s+(\S+)$: or die "Bad line: $_";
+  my $mapped_from = ($num_phones_in == 60)? $1 : $2;
+  my $mapped_to = ($num_phones_out == 48)? $2 : $3;
+  if (!defined($seen_phones{$mapped_to})) {
+    $seen_phones{$mapped_to} = 1;
+    $num_seen_phones += 1;
+  }
+  $phonemap{$mapped_from} = $mapped_to;
+}
+if ($num_seen_phones != $num_phones_out) {
+  die "Trying to map to $num_phones_out phones, but seen only $num_seen_phones";
+}
+
+open(T, "<$in_trans") or die "Cannot open transcription file '$in_trans': $!";
+while (<T>) {
+  chomp;
+  $_ =~ m:^(\S+)\s+(.+): or die "Bad line: $_";
+  my $utt_id = $1;
+  my $trans = $2;
+
+  $trans =~ s/q//g;  # Remove glottal stops.
+  $trans =~ s/^\s*//; $trans =~ s/\s*$//;  # Normalize spaces
+
+  print $utt_id;
+  for my $phone (split(/\s+/, $trans)) {
+    if(exists $phonemap{$phone}) { print " $phonemap{$phone}"; }
+    if(not exists $phonemap{$phone}) { print " $phone"; }
+  }
+  print "\n";
+}
+
diff --git a/egs/timit/asr/simple_v1/local2/timit_prepare_dict.sh b/egs/timit/asr/simple_v1/local2/timit_prepare_dict.sh
new file mode 100644
index 00000000..5540ee13
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local2/timit_prepare_dict.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+
+# Copyright 2013   (Authors: Daniel Povey, Bagher BabaAli)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# Call this script from one level above, e.g. from the s3/ directory.  It puts
+# its output in data/local/.
+
+# The parts of the output of this that will be needed are
+# [in data/local/dict/ ]
+# lexicon.txt
+# extra_questions.txt
+# nonsilence_phones.txt
+# optional_silence.txt
+# silence_phones.txt
+
+# run this from ../
+srcdir=data/local/data
+dir=data/local/dict
+
+mkdir -p $dir
+
+[ -f path.sh ] && . ./path.sh
+
+#(1) Dictionary preparation:
+
+# Make phones symbol-table (adding in silence and verbal and non-verbal noises at this point).
+# We are adding suffixes _B, _E, _S for beginning, ending, and singleton phones.
+
+# silence phones, one per line.
+echo sil > $dir/silence_phones.txt
+echo sil > $dir/optional_silence.txt
+
+# nonsilence phones; on each line is a list of phones that correspond
+# really to the same base phone.
+
+# Create the lexicon, which is just an identity mapping
+cut -d' ' -f2- $srcdir/train.text | tr ' ' '\n' | sort -u > $dir/phones.txt
+echo "<UNK>" >> $dir/phones.txt
+paste $dir/phones.txt $dir/phones.txt > $dir/lexicon.txt || exit 1;
+grep -v -F -f $dir/silence_phones.txt $dir/phones.txt > $dir/nonsilence_phones.txt 
+
+# A few extra questions that will be added to those obtained by automatically clustering
+# the "real" phones.  These ask about stress; there's also one for silence.
+cat $dir/silence_phones.txt| awk '{printf("%s ", $1);} END{printf "\n";}' > $dir/extra_questions.txt || exit 1;
+cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_)) {
+  $p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
+ >> $dir/extra_questions.txt || exit 1;
+
+echo "Dict prepatation succeeded"
diff --git a/egs/timit/asr/simple_v1/local2/train_lms.sh b/egs/timit/asr/simple_v1/local2/train_lms.sh
new file mode 100644
index 00000000..8bf9221d
--- /dev/null
+++ b/egs/timit/asr/simple_v1/local2/train_lms.sh
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+
+# To be run from one directory above this script.
+. ./path.sh
+
+text=data/local/data/train.text
+lexicon=data/local/dict/lexicon.txt
+
+for f in "$text" "$lexicon"; do
+  [ ! -f $x ] && echo "$0: No such file $f" && exit 1
+done
+
+# This script takes no arguments.  It assumes you have already run
+# timit_data_prep.sh.
+# It takes as input the files
+# data/local/train/text
+# data/local/dict/lexicon.txt
+dir=data/local/lm
+mkdir -p $dir
+
+kaldi_lm=$(which train_lm.sh)
+if [ -z $kaldi_lm ]; then
+  echo "$0: train_lm.sh is not found. That might mean it's not installed"
+  echo "$0: or it is not added to PATH"
+  echo "$0: Please use the following commands to install it"
+  echo "  git clone https://github.com/danpovey/kaldi_lm.git"
+  echo "  cd kaldi_lm"
+  echo "  make -j"
+  echo "Then add the path of kaldi_lm to PATH and rerun $0"
+  exit 1
+fi
+
+cleantext=$dir/text.no_oov
+
+cat $text | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
+  {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<UNK> ");} } printf("\n");}' \
+  >$cleantext || exit 1
+
+cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c |
+  sort -nr >$dir/word.counts || exit 1
+
+# Get counts from acoustic training transcripts, and add  one-count
+# for each word in the lexicon (but not silence, we don't want it
+# in the LM-- we'll add it optionally later).
+cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' |
+	cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') |
+  sort | uniq -c | sort -nr >$dir/unigram.counts || exit 1
+
+# note: we probably won't really make use of <UNK> as there aren't any OOVs
+cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "<s>" "</s>" "<UNK>" >$dir/word_map ||
+  exit 1
+
+# note: ignore 1st field of train.txt, it's the utterance-id.
+cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline<wmap)>0)map[$1]=$2;}
+  { for(n=2;n<=NF;n++) { printf map[$n]; if(n<NF){ printf " "; } else { print ""; }}}' | gzip -c >$dir/train.gz ||
+  exit 1
+
+train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1
+
+# LM is small enough that we don't need to prune it (only about 0.7M N-grams).
+# Perplexity over 128254.000000 words is 90.446690
+
+# note: output is
+# data/local/lm/3gram-mincount/lm_unpruned.gz
+
+exit 0
+
+# From here is some commands to do a baseline with SRILM (assuming
+# you have it installed).
+heldout_sent=10000 # Don't change this if you want result to be comparable with
+# kaldi_lm results
+sdir=$dir/srilm # in case we want to use SRILM to double-check perplexities.
+mkdir -p $sdir
+cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' |
+  head -$heldout_sent >$sdir/heldout
+cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' |
+  tail -n +$heldout_sent >$sdir/train
+
+cat $dir/word_map | awk '{print $1}' | cat - <(
+  echo "<s>"
+  echo "</s>"
+) >$sdir/wordlist
+
+ngram-count -text $sdir/train -order 3 -limit-vocab -vocab $sdir/wordlist -unk \
+  -map-unk "<UNK>" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz
+ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout
+# 0 zeroprobs, logprob= -250954 ppl= 90.5091 ppl1= 132.482
+
+# Note: perplexity SRILM gives to Kaldi-LM model is same as kaldi-lm reports above.
+# Difference in WSJ must have been due to different treatment of <UNK>.
+ngram -lm $dir/3gram-mincount/lm_unpruned.gz -ppl $sdir/heldout
+# 0 zeroprobs, logprob= -250913 ppl= 90.4439 ppl1= 132.379
diff --git a/egs/timit/asr/simple_v1/prepare.py b/egs/timit/asr/simple_v1/prepare.py
new file mode 100644
index 00000000..310d4b08
--- /dev/null
+++ b/egs/timit/asr/simple_v1/prepare.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+
+# Copyright (c)  2020  Xiaomi Corporation (authors: Junbo Zhang, Haowen Qiu)
+# Copyright (c)  2021  Xiaomi Corporation (authors: Mingshuang Luo)
+# Apache 2.0
+
+import argparse
+import os
+import subprocess
+import sys
+from contextlib import contextmanager
+from pathlib import Path
+
+import torch
+from lhotse import CutSet, Fbank, FbankConfig, LilcomHdf5Writer, combine
+from lhotse.recipes import download_timit, prepare_timit, prepare_musan
+
+from snowfall.common import str2bool
+
+# Torch's multithreaded behavior needs to be disabled or it wastes a lot of CPU and
+# slow things down.  Do this outside of main() in case it needs to take effect
+# even when we are not invoking the main (e.g. when spawning subprocesses).
+torch.set_num_threads(1)
+torch.set_num_interop_threads(1)
+num_jobs = min(16, os.cpu_count())
+
+@contextmanager
+def get_executor():
+    # We'll either return a process pool or a distributed worker pool.
+    # Note that this has to be a context manager because we might use multiple
+    # context manager ("with" clauses) inside, and this way everything will
+    # free up the resources at the right time.
+    try:
+        # If this is executed on the CLSP grid, we will try to use the
+        # Grid Engine to distribute the tasks.
+        # Other clusters can also benefit from that, provided a cluster-specific wrapper.
+        # (see https://github.com/pzelasko/plz for reference)
+        #
+        # The following must be installed:
+        # $ pip install dask distributed
+        # $ pip install git+https://github.com/pzelasko/plz
+        name = subprocess.check_output('hostname -f', shell=True, text=True)
+        if name.strip().endswith('.clsp.jhu.edu'):
+            import plz
+            from distributed import Client
+            with plz.setup_cluster() as cluster:
+                cluster.scale(80)
+                yield Client(cluster)
+            return
+    except:
+        pass
+    # No need to return anything - compute_and_store_features
+    # will just instantiate the pool itself.
+    yield None
+
+
+def locate_corpus(*corpus_dirs):
+    for d in corpus_dirs:
+        if os.path.exists(d):
+            return d
+    print("Please create a place on your system to put the downloaded timit data "
+          "and add it to `corpus_dirs`")
+    sys.exit(1)
+
+def get_parser():
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        '--num-jobs',
+        type=int,
+        default=min(15, os.cpu_count()),
+        help='the value of jobs is bigger, it processes the data more quickly.')
+    
+    parser.add_argument(
+        '--num-phones',
+        type=int,
+        default=48,
+        help='the number of phones for modeling, it must be in [60, 48, 39].')
+    
+    return parser
+
+def main():
+    args = get_parser().parse_args()
+    corpus_dir = locate_corpus(
+        Path('/ceph-meixu/luomingshuang/audio-data/timit'),
+        Path('/export/common/data/corpora/timit'),
+        Path('/ceph-fj/data/timit'),
+        )
+
+    musan_dir = locate_corpus(
+        Path('/ceph-meixu/luomingshuang/audio-data/musan'),
+        Path('/export/common/data/corpora/MUSAN/musan'),
+        Path('/ceph-fj/data/musan'),
+        )
+    
+    output_dir = Path('exp/data')
+    splits_dir = Path('splits_dir')
+
+    print('Timit manifest preparation:')
+    timit_manifests = prepare_timit(
+            corpus_dir = corpus_dir,
+            splits_dir = splits_dir,
+            output_dir = output_dir,
+            num_phones = args.num_phones,
+            num_jobs = args.num_jobs)
+
+    print('Musan manifest preparation:')
+    musan_cuts_path = output_dir / 'cuts_musan.json.gz'
+    musan_manifests = prepare_musan(
+        corpus_dir=musan_dir,
+        output_dir=output_dir,
+        parts=('music', 'speech', 'noise')
+    )
+
+    print('Feature extraction:')
+    extractor = Fbank(FbankConfig(num_mel_bins=80))
+    with get_executor() as ex:  # Initialize the executor only once.
+        for partition, manifests in timit_manifests.items():
+            if (output_dir / f'cuts_{partition}.json.gz').is_file():
+                print(f'{partition} already exists - skipping.')
+                continue
+            print('Processing', partition)
+            cut_set = CutSet.from_manifests(
+                recordings=manifests['recordings'],
+                supervisions=manifests['supervisions']
+            )
+            if 'train' in partition:
+                cut_set = cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
+            cut_set = cut_set.compute_and_store_features(
+                extractor=extractor,
+                storage_path=f'{output_dir}/feats_{partition}',
+                # when an executor is specified, make more partitions
+                num_jobs=num_jobs if ex is None else 80,
+                executor=ex,
+                storage_type=LilcomHdf5Writer
+            )
+            timit_manifests[partition]['cuts'] = cut_set
+            cut_set.to_json(output_dir / f'cuts_{partition}.json.gz')
+        # Now onto Musan
+        if not musan_cuts_path.is_file():
+            print('Extracting features for Musan')
+            # create chunks of Musan with duration 5 - 10 seconds
+            musan_cuts = CutSet.from_manifests(
+                recordings=combine(part['recordings'] for part in musan_manifests.values())
+            ).cut_into_windows(10.0).filter(lambda c: c.duration > 5).compute_and_store_features(
+                extractor=extractor,
+                storage_path=f'{output_dir}/feats_musan',
+                num_jobs=num_jobs if ex is None else 80,
+                executor=ex,
+                storage_type=LilcomHdf5Writer
+            )
+            musan_cuts.to_json(musan_cuts_path)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/egs/timit/asr/simple_v1/run.sh b/egs/timit/asr/simple_v1/run.sh
new file mode 100644
index 00000000..ed9da367
--- /dev/null
+++ b/egs/timit/asr/simple_v1/run.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+
+# Copyright 2020 Xiaomi Corporation (Author: Junbo Zhang
+#                                            Mingshuang Luo)
+#           2021 Pingfeng Luo
+# Apache 2.0
+
+# Example of how to build L and G FST for K2. Most scripts of this example are copied from Kaldi.
+
+set -eou pipefail
+
+dataset_path=(
+  /ceph-meixu/luomingshuang/audio-data/timit
+  )
+
+data=${dataset_path[0]}
+for d in ${dataset_path[@]}; do
+  if [ -d $d ]; then
+    data=$d
+    break
+  fi
+done
+
+num_phones=48 ##Choose a number from {60, 48, 39}
+
+if [ ! -d $data ]; then
+  echo "$data does not exist"
+  exit 1
+fi
+
+stage=1
+
+if [ $stage -le 1 ]; then
+  echo "Data preparation"
+  local2/timit_data_prep.sh $data $num_phones
+fi
+
+
+if [ $stage -le 2 ]; then
+  echo "Dict preparation"
+  local2/timit_prepare_dict.sh
+fi
+
+
+if [ $stage -le 3 ]; then
+  echo "Lang preparation"
+  local/prepare_lang.sh \
+  --sil-prob 0.0 \
+  --position-dependent-phones false \
+  --num-sil-states 3 \
+  data/local/dict \
+  "sil" \
+  data/local/lang_tmp_nosp \
+  data/lang_nosp
+
+  echo "To load L:"
+  echo "Use::"
+  echo "  with open('data/lang_nosp/L.fst.txt') as f:"
+  echo "    Lfst = k2.Fsa.from_openfst(f.read(), acceptor=False)"
+  echo ""
+fi
+
+
+if [ $stage -le 4 ]; then
+  echo "LM preparation"
+  local2/train_lms.sh
+  gunzip -c data/local/lm/3gram-mincount/lm_unpruned.gz >data/local/lm/lm_tgmed.arpa
+  # Note: you need to install kaldilm using `pip install kaldilm`
+  # Build G
+  python3 -m kaldilm \
+    --read-symbol-table="data/lang_nosp/words.txt" \
+    --disambig-symbol='#0' \
+    --max-order=1 \
+    data/local/lm/lm_tgmed.arpa >data/lang_nosp/G_uni.fst.txt
+
+  python3 -m kaldilm \
+    --read-symbol-table="data/lang_nosp/words.txt" \
+    --disambig-symbol='#0' \
+    --max-order=3 \
+    data/local/lm/lm_tgmed.arpa >data/lang_nosp/G.fst.txt
+
+  echo ""
+  echo "To load G:"
+  echo "Use::"
+  echo "  with open('data/lang_nosp/G.fst.txt') as f:"
+  echo "    G = k2.Fsa.from_openfst(f.read(), acceptor=False)"
+  echo ""
+fi
+
+
+if [ $stage -le 5 ]; then
+  echo "Feature preparation"
+  python3 ./prepare.py \
+    --num-phones=$num_phones
+fi
+
+if [ $stage -le 6 ]; then
+  echo "Training"
+  python3 ./ctc_train.py
+fi
+
+if [ $stage -le 7 ]; then
+  echo "Decoding"
+  python3 ./ctc_decode.py
+fi
+