diff --git a/leviosam-test.py b/leviosam-test.py
index b23cb49..4fb1cfa 100644
--- a/leviosam-test.py
+++ b/leviosam-test.py
@@ -49,7 +49,8 @@ def setUpClass(cls):
         for param in params:
             process = subprocess.Popen(
                 ['./leviosam', 
-                'lift', '-l', 'testdata/wg-maj.lft', '-a', param['sam'], '-p', param['out_prefix']],
+                'lift', '-l', 'testdata/wg-maj.lft', '-a', param['sam'],
+                '-p', param['out_prefix']],
                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
             stdout, stderr = process.communicate()
             print(f'Lifted {param["out_prefix"]}.sam')
@@ -82,12 +83,12 @@ def test_identical_fields(self):
             assert param['mode'] in ['SE', 'PE']
             
             # Each subtest here is one parameter set (e.g. BWA-SE, BT2-PE)
-            with self.subTest(aln_param_idx=i):
+            with self.subTest(aln_param_idx=param):
                 dict_lifted = self.read_sam_file_as_dict(f'{param["out_prefix"]}.sam')
                 dict_gold = self.read_sam_file_as_dict(f'{param["gold"]}')
 
                 for test_idx, test_field in enumerate(dict_test_field.keys()):
-                    with self.subTest(test_idx=test_idx):
+                    with self.subTest(test_idx=test_field):
                         for k in dict_lifted.keys():
                             # Single-end files.
                             if param['mode'] == 'SE':
diff --git a/leviosam.cpp b/leviosam.cpp
index cbfb069..b5fb90a 100644
--- a/leviosam.cpp
+++ b/leviosam.cpp
@@ -9,19 +9,17 @@
 #include <htslib/sam.h>
 #include <htslib/kstring.h>
 
-using NameMap = std::vector<std::pair<std::string,std::string>>;
-using LengthMap = std::unordered_map<std::string,size_t>;
-
 struct lift_opts {
     std::string vcf_fname = "";
     std::string sample = "";
     std::string outpre = "";
+    std::string out_format = "sam";
     std::string lift_fname = "";
     std::string sam_fname = "";
     std::string cmd = "";
     std::string haplotype = "0";
     int threads = 1;
-    int chunk_size = 64;
+    int chunk_size = 256;
     int verbose = 0;
     NameMap name_map;
     LengthMap length_map;
@@ -61,10 +59,6 @@ LengthMap parse_length_map(const char* fname) {
     return lengths;
 }
 
-lift::LiftMap lift_from_vcf(std::string fname, 
-                            std::string sample, 
-                            std::string haplotype, 
-                            NameMap names, LengthMap lengths);
 
 void serialize_run(lift_opts args) {
     lift::LiftMap l(lift_from_vcf(args.vcf_fname, args.sample, args.haplotype, args.name_map, args.length_map));
@@ -79,58 +73,24 @@ void serialize_run(lift_opts args) {
 }
 
 
-char* get_PG(bam_hdr_t* hdr) {
-    char* hdr_txt = (char*) malloc(sizeof(char) * hdr->l_text + 1);
-    char* buf = (char*) malloc(sizeof(char) * hdr->l_text);
-    buf[0] = '\0';
-    std::strcpy(hdr_txt, hdr->text);
-    char* token = std::strtok(hdr_txt, "\n");
-    while (token != NULL) {
-        if (token[0] == '@' && token[1] == 'P' && token[2] == 'G') {
-            strcat(buf, token);
-        }
-        token = std::strtok(NULL, "\n");
-    }
-    free(hdr_txt);
-    return buf;
-}
-
-/* just gets AS for now till I get around to iterating through all the tags 
- */
-std::string tag_to_string(const bam1_t* rec) {
-    std::string tag_string("");
-    uint8_t* aux = bam_aux_get(rec, "AS");
-    if (aux != NULL) {
-        tag_string += "AS:i:";
-        tag_string += std::to_string(bam_aux2i(aux));
-    }
-    aux = bam_aux_get(rec, "NM");
-    if (aux != NULL) {
-        tag_string += "\tNM:i:";
-        tag_string += std::to_string(bam_aux2i(aux));
-    }
-    return tag_string;
-}
-
 void read_and_lift(
     std::mutex* mutex_fread,
     std::mutex* mutex_fwrite,
     std::mutex* mutex_vec,
     samFile* sam_fp,
-    FILE* out_sam_fp,
+    samFile* out_sam_fp,
     bam_hdr_t* hdr,
     int chunk_size,
     lift::LiftMap* l,
     std::vector<std::string>* chroms_not_found
 ){
     std::vector<bam1_t*> aln_vec;
-    std::vector<std::string> tmp_write_buffer(chunk_size);
     for (int i = 0; i < chunk_size; i++){
         bam1_t* aln = bam_init1();
         aln_vec.push_back(aln);
     }
     int read = 1;
-    while(read >= 0){
+    while (read >= 0){
         int num_actual_reads = chunk_size;
         {
             // read from SAM, protected by mutex
@@ -144,112 +104,54 @@ void read_and_lift(
             }
         }
         for (int i = 0; i < num_actual_reads; i++){
-            std::string sam_out;
             bam1_t* aln = aln_vec[i];
             bam1_core_t c = aln->core;
-            sam_out += bam_get_qname(aln);
-            sam_out += "\t";
-            sam_out += std::to_string(c.flag);
-            sam_out += "\t";
             std::string s1_name, s2_name;
             size_t pos;
-            if (c.flag & BAM_FUNMAP){ // If unmapped
-                if (((c.flag & BAM_FPAIRED) && (c.flag & BAM_FMUNMAP)) || // paired-end, and mate unmapped
-                    !(c.flag & BAM_FPAIRED) // single-end
-                ){
-                    sam_out += "*\t0\t0\t*\t";
-                }
-                else {
-                    s2_name = hdr->target_name[c.mtid];
-                    s1_name = l->get_other_name(s2_name);
-                    // chroms_not_found needs to be protected because chroms_not_found is shared
-                    pos = l->s2_to_s1(s2_name, c.mpos, chroms_not_found, mutex_vec) + 1;
-                    // REF
-                    sam_out += s1_name.data();
-                    sam_out += "\t";
-                    // POS
-                    sam_out += std::to_string(pos);
-                    sam_out += "\t";
-                    // QUAL & CIGAR
-                    sam_out += "0\t*\t";
-                }
-            }
-            else {
+            // If a read is mapped, lift its position.
+            // If a read is unmapped, but its mate is mapped, lift its mates position.
+            // Otherwise leave it unchanged.
+            if (!(c.flag & BAM_FUNMAP)) {
                 s2_name = hdr->target_name[c.tid];
                 s1_name = l->get_other_name(s2_name);
                 // chroms_not_found needs to be protected because chroms_not_found is shared
-                pos = l->s2_to_s1(s2_name, c.pos, chroms_not_found, mutex_vec) + 1;
-                // REF
-                sam_out += s1_name.data();
-                sam_out += "\t";
-                // POS
-                sam_out += std::to_string(pos);
-                sam_out += "\t";
-                // QUAL
-                sam_out += std::to_string(c.qual); 
-                sam_out += "\t";
+                pos = l->s2_to_s1(s2_name, c.pos, chroms_not_found, mutex_vec);
                 // CIGAR
-                sam_out += l->cigar_s2_to_s1(s2_name, aln).data();
-                sam_out += "\t";
+                l->cigar_s2_to_s1(s2_name, aln);
+                aln->core.pos = pos;
+            } else if ((c.flag & BAM_FPAIRED) && !(c.flag & BAM_FMUNMAP)) {
+                s2_name = hdr->target_name[c.mtid];
+                s1_name = l->get_other_name(s2_name);
+                // chroms_not_found needs to be protected because chroms_not_found is shared
+                pos = l->s2_to_s1(s2_name, c.mpos, chroms_not_found, mutex_vec);
+                aln->core.pos = pos;
             }
-            if (c.flag & BAM_FPAIRED) { // mate information
-                if (c.flag & BAM_FMUNMAP){ // mate is unmapped
-                    size_t mpos = l->s2_to_s1(s2_name, c.pos, chroms_not_found, mutex_vec) + 1;
-                    sam_out += "=\t";
-                    sam_out += std::to_string(mpos);
-                    sam_out += "\t0\t";
-                }
-                else{ // mate is mapped
+            // Lift mate position.
+            if (c.flag & BAM_FPAIRED) {
+                // If the mate is unmapped, use the lifted position of the read.
+                // If the mate is mapped, lift its position.
+                if (c.flag & BAM_FMUNMAP){
+                    aln->core.mpos = aln->core.pos;
+                } else {
                     std::string ms2_name(hdr->target_name[c.mtid]);
                     std::string ms1_name(l->get_other_name(ms2_name));
-                    size_t mpos = l->s2_to_s1(ms2_name, c.mpos, chroms_not_found, mutex_vec) + 1;
-                    // RNEXT
-                    // If IDs match, print "="; else, print RNAME for the mate
-                    sam_out += (c.tid == c.mtid)? "=" : ms1_name.data();
-                    sam_out += "\t";
-                    sam_out += std::to_string(mpos); // PNEXT
-                    size_t pos_5, mpos_5; // 5'-end position
+                    size_t mpos = l->s2_to_s1(ms2_name, c.mpos, chroms_not_found, mutex_vec);
+                    aln->core.mpos = mpos;
                     int isize = (c.isize == 0)? 0 : c.isize + (mpos - c.mpos) - (pos - c.pos);
-                    sam_out += "\t";
-                    sam_out += std::to_string(isize); // TLEN (or ISIZE)
-                    sam_out += "\t";
-                }
-            }
-            else { // single-end
-                sam_out += "*\t0\t0\t";
-            }
-            // get query sequence
-            std::string query_seq("");
-            uint8_t* seq = bam_get_seq(aln);
-            for (auto i = 0; i < c.l_qseq; ++i) {
-                query_seq += seq_nt16_str[bam_seqi(seq, i)];
-            }
-            sam_out += query_seq.data();
-            sam_out += "\t";
-            // get quality
-            std::string qual_seq("");
-            uint8_t* qual = bam_get_qual(aln);
-            if (qual[0] == 255) qual_seq = "*";
-            else {
-                for (auto i = 0; i < c.l_qseq; ++i) {
-                    qual_seq += (char) (qual[i] + 33);
+                    aln->core.isize = isize;
                 }
             }
-            sam_out += qual_seq.data();
-            sam_out += "\t";
-            // TODO reconcile any tags that also need to be added.
-            sam_out += tag_to_string(aln).data();
-            sam_out += "\n";
-
-            tmp_write_buffer[i] = sam_out;
-            // fprintf(out_sam_fp, "%s", sam_out.c_str());
         }
         {
             // write to file, thread corruption protected by lock_guard
             std::lock_guard<std::mutex> g(*mutex_fwrite);
             // std::thread::id this_id = std::this_thread::get_id();
             for (int i = 0; i < num_actual_reads; i++){
-                fprintf(out_sam_fp, "%s", tmp_write_buffer[i].c_str());
+                auto flag_write = sam_write1(out_sam_fp, hdr, aln_vec[i]);
+                if (flag_write < 0){
+                    std::cerr << "[Error] Failed to write record " << bam_get_qname(aln_vec[i]) << "\n";
+                    exit(1);
+                }
             }
         }
     }
@@ -276,60 +178,46 @@ void lift_run(lift_opts args) {
 
     fprintf(stderr, "loaded liftmap!\n");
 
-    samFile* sam_fp;
-    // read sam file here
-    if (args.sam_fname == "") {
-        sam_fp = sam_open("-", "r");
-    }
-    else
-        sam_fp = sam_open(args.sam_fname.data(), "r");
+    samFile* sam_fp = (args.sam_fname == "")?
+        sam_open("-", "r") : sam_open(args.sam_fname.data(), "r");
     bam_hdr_t* hdr = sam_hdr_read(sam_fp);
 
     // the "ref" lengths are all stored in the levio structure. How do we loop over it?
     std::vector<std::string> contig_names;
     std::vector<size_t> contig_reflens;
     std::tie(contig_names, contig_reflens) = l.get_s1_lens();
-    // for now we'll just write out the samfile raw
-    FILE* out_sam_fp;
-    if (args.outpre == "") 
-        out_sam_fp = stdout;
-    else 
-        out_sam_fp = fopen((args.outpre + ".sam").data(), "w");
-    fprintf(out_sam_fp, "@HD\tVN:1.6\tSO:unknown\n");
-    for (auto i = 0; i < hdr->n_targets; i++){
+
+    std::string out_mode = (args.out_format == "sam")? "w" : "wb";
+    samFile* out_sam_fp = (args.outpre == "-" || args.outpre == "")?
+        sam_open("-", out_mode.data()) :
+        sam_open((args.outpre + "." + args.out_format).data(), out_mode.data());
+
+    // Write SAM headers. We update contig lengths if needed.
+    sam_hdr_add_pg(hdr, "leviosam", "VN", VERSION, "CL", args.cmd.data(), NULL);
+    for (auto i = 0; i < hdr->n_targets; i++) {
         auto contig_itr = std::find(contig_names.begin(), contig_names.end(), hdr->target_name[i]);
         // If a contig is in contig_names, look up the associated length.
-        if (contig_itr != contig_names.end()){
-            fprintf(out_sam_fp, "@SQ\tSN:%s\tLN:%ld\n",
-                    contig_names[contig_itr - contig_names.begin()].data(),
-                    contig_reflens[contig_itr - contig_names.begin()]);
-        }
-        // if a contig is not found in vcf/lft, print it as it was before levio
-        else{
-            fprintf(out_sam_fp, "@SQ\tSN:%s\tLN:%u\n", hdr->target_name[i], hdr->target_len[i]);
+        if (contig_itr != contig_names.end()) {
+            auto len_str = std::to_string(contig_reflens[contig_itr - contig_names.begin()]);
+            if (sam_hdr_update_line(
+                    hdr, "SQ",
+                    "SN", contig_names[contig_itr - contig_names.begin()].data(),
+                    "LN", len_str.data(), NULL) < 0) {
+                std::cerr << "[Error] failed when converting contig length for "
+                          << hdr->target_name[i] << "\n";
+                exit(1);
+            }
         }
     }
-    /* This is only supported in the latest dev release of htslib as of July 22. 
-     * add back in at next htslib release
-    kstring_t s = KS_INITIALIZE;
-    sam_hdr_find_line_id(hdr, "PG", NULL, NULL, &s);
-    fprintf(out_sam_fp, "%s\n", s.s);
-    ks_free(&s);
-    */
-    char* prev_pg = get_PG(hdr);
-    fprintf(out_sam_fp, "%s\n", prev_pg);
-    free(prev_pg);
-    fprintf(out_sam_fp, "@PG\tID:leviosam\tPN:leviosam\tCL:\"%s\"\n", args.cmd.data());
+    auto write_hdr = sam_hdr_write(out_sam_fp, hdr);
     
-    // store chromosomes found in SAM but not in lft
-    // use a vector to avoid printing out the same warning msg multiple times
+    // Store chromosomes found in SAM but not in the VCF.
+    // We use a vector to avoid printing out the same warning msg multiple times.
     std::vector<std::string> chroms_not_found;
     const int num_threads = args.threads;
     const int chunk_size = args.chunk_size;
     std::vector<std::thread> threads;
-    std::mutex mutex_fread;
-    std::mutex mutex_fwrite;
-    std::mutex mutex_vec;
+    std::mutex mutex_fread, mutex_fwrite, mutex_vec;
     for (int j = 0; j < num_threads; j++){
         threads.push_back(
             std::thread(
@@ -351,9 +239,10 @@ void lift_run(lift_opts args) {
             threads[j].join();
     }
     threads.clear();
-    fclose(out_sam_fp);
+    sam_close(out_sam_fp);
 }
 
+
 lift::LiftMap lift_from_vcf(std::string fname, 
                             std::string sample, 
                             std::string haplotype, 
@@ -376,13 +265,16 @@ std::string make_cmd(int argc, char** argv) {
     }
     return cmd;
 }
+
+
 void print_serialize_help_msg(){
     fprintf(stderr, "\n");
     fprintf(stderr, "Usage:   leviosam serialize [options]\n");
     fprintf(stderr, "Options:\n");
     fprintf(stderr, "         -v string Build a leviosam index using a VCF file.\n");
     fprintf(stderr, "         -s string The sample used to build leviosam index (-v needs to be set).\n");
-    fprintf(stderr, "         -p string The prefix of the output files.\n");
+    fprintf(stderr, "         -p string The prefix of the output file.\n");
+    fprintf(stderr, "         -O format Output file format, can be sam or bam. [sam]\n");
     fprintf(stderr, "         -g 0/1    The haplotype used to index leviosam. [0] \n");
     fprintf(stderr, "         -n string Path to a name map file.\n");
     fprintf(stderr, "                   This can be used to map '1' to 'chr1', or vice versa.\n");
@@ -394,22 +286,24 @@ void print_lift_help_msg(){
     fprintf(stderr, "\n");
     fprintf(stderr, "Usage:   leviosam lift [options]\n");
     fprintf(stderr, "Options:\n");
-    fprintf(stderr, "         -a string The SAM file to be lifted. \n");
+    fprintf(stderr, "         -a string Path to the SAM/BAM file to be lifted. \n");
     fprintf(stderr, "                   Leave empty or set to \"-\" to read from stdin.\n");
-    fprintf(stderr, "         -l string Path to a pre-built leviosam index.\n");
+    fprintf(stderr, "         -l string Path to a leviosam index.\n");
     fprintf(stderr, "         -v string If -l is not specified, can build indexes using a VCF file.\n");
     fprintf(stderr, "         -t INT    Number of threads used. [1] \n");
-    fprintf(stderr, "         -T INT    Size of a processing chunk. [64] \n");
+    fprintf(stderr, "         -T INT    Chunk size for each thread. [256] \n");
+    fprintf(stderr, "                   Each thread queries <-T> reads, lifts, and writes.\n");
+    fprintf(stderr, "                   Setting a higher <-T> uses slightly more memory but might benefit thread scaling.\n");
     fprintf(stderr, "         The options for serialize can also be used here, if -v is set.\n");
     print_serialize_help_msg();
 }
 
 void print_main_help_msg(){
     fprintf(stderr, "\n");
-    fprintf(stderr, "Program: leviosam (lift over SAM based on VCF)\n");
+    fprintf(stderr, "Program: leviosam (lift over SAM/BAM based on VCF)\n");
     fprintf(stderr, "Usage:   leviosam <command> [options]\n\n");
     fprintf(stderr, "Commands:serialize build leviosam indexes\n");
-    fprintf(stderr, "         lift      perform lifting\n");
+    fprintf(stderr, "         lift      lift alignments\n");
     fprintf(stderr, "Options: -h        Print detailed usage.\n");
     fprintf(stderr, "\n");
 }
@@ -424,13 +318,14 @@ int main(int argc, char** argv) {
         {"prefix", required_argument, 0, 'p'},
         {"leviosam", required_argument, 0, 'l'},
         {"sam", required_argument, 0, 'a'},
+        {"out_format", required_argument, 0, 'O'},
         {"haplotype", required_argument, 0, 'g'},
         {"threads", required_argument, 0, 't'},
         {"chunk_size", required_argument, 0, 'T'},
         {"verbose", no_argument, &args.verbose, 1},
     };
     int long_index = 0;
-    while((c = getopt_long(argc, argv, "hv:s:p:l:a:g:n:k:t:T:", long_options, &long_index)) != -1) {
+    while((c = getopt_long(argc, argv, "hv:s:p:l:a:O:g:n:k:t:T:", long_options, &long_index)) != -1) {
         switch (c) {
             case 'v':
                 args.vcf_fname = optarg;
@@ -447,6 +342,9 @@ int main(int argc, char** argv) {
             case 'a':
                 args.sam_fname = optarg;
                 break;
+            case 'O':
+                args.out_format = optarg;
+                break;
             case 'g':
                 args.haplotype = optarg;
                 break;
@@ -480,6 +378,12 @@ int main(int argc, char** argv) {
         fprintf(stderr, "invalid haplotype %s\n", args.haplotype.c_str());
         exit(1);
     }
+
+    if (args.out_format != "sam" && args.out_format != "bam") {
+        fprintf(stderr, "Not supported extension format %s\n", args.out_format.c_str());
+        exit(1);
+    }
+
     if (!strcmp(argv[optind], "lift")) {
         lift_run(args);
     } else if (!strcmp(argv[optind], "serialize")) {
diff --git a/leviosam.hpp b/leviosam.hpp
index ce5f784..08ceb04 100644
--- a/leviosam.hpp
+++ b/leviosam.hpp
@@ -20,6 +20,12 @@
  * Created: July 2019
  */
 
+const char* VERSION("0.2");
+
+using NameMap = std::vector<std::pair<std::string,std::string>>;
+using LengthMap = std::unordered_map<std::string,size_t>;
+
+
 static inline void die(std::string msg) {
     fprintf(stderr, "%s\n", msg.data());
     exit(1);
@@ -83,15 +89,22 @@ class Lift {
         return del_rs0(ins_sls0(p+1));
     }
 
-    // convert a CIGAR string in an s2 alignment to the corresponding CIGAR string in the s1 alignment
-    // input: bam1_t alignment record against s2 sequence
-    // output: CIGAR string (as std::string) wrt s1 sequence
-    std::string cigar_s2_to_s1(bam1_t* b) {
-        auto x = del_sls0(b->core.pos+1);
+
+    /* Core function to lift a CIGAR string from s1 to s2 space.
+     * 
+     * Input:
+     *   An alignment struct we'd like to update.
+     *   This function is not going to update the bam1_t stuct.
+     *
+     * Output:
+     *   A vector, each element is an unit32_t corresponding to htslib CIGAR value
+     */
+    std::vector<uint32_t> cigar_s2_to_s1_core(bam1_t* b){
+        auto x = del_sls0(b->core.pos + 1);
         int y = 0; // read2alt cigar pointer
         uint32_t* cigar = bam_get_cigar(b);
-        std::string out_cigar = "";
         std::vector<uint32_t> cigar_ops;
+        std::vector<uint32_t> new_cigar_ops;
         for (int i = 0; i < b->core.n_cigar; ++i) {
             for (int j = 0; j < bam_cigar_oplen(cigar[i]); ++j) {
                 cigar_ops.push_back(bam_cigar_op(cigar[i]));
@@ -101,24 +114,30 @@ class Lift {
         while (y < iters) {
             int cop = cigar_ops[y];
             if (del[x]) { // skip ahead in alt2ref cigar
-                if (out_cigar[out_cigar.length()-1] == 'I'){
-                    out_cigar.pop_back();
-                    out_cigar += "M";
+                if (new_cigar_ops.empty()){
+                    new_cigar_ops.push_back(BAM_CDEL);
+                } else {
+                    if (new_cigar_ops.back() == BAM_CINS){
+                        new_cigar_ops[new_cigar_ops.size() - 1] = BAM_CMATCH;
+                    } else {
+                        new_cigar_ops.push_back(BAM_CDEL);
+                    }
                 }
-                else
-                    out_cigar += "D";
                 ++x;
             } else if (cop == BAM_CINS) { // skip ahead in read2alt cigar
-                if (out_cigar[out_cigar.length()-1] == 'D'){
-                    out_cigar.pop_back();
-                    out_cigar += "M";
+                if (!new_cigar_ops.empty()){
+                    if (new_cigar_ops.back() == BAM_CDEL){
+                        new_cigar_ops[new_cigar_ops.size() - 1] = BAM_CMATCH;
+                    } else{
+                        new_cigar_ops.push_back(BAM_CINS);
+                    }
+                } else {
+                    new_cigar_ops.push_back(BAM_CINS);
                 }
-                else
-                    out_cigar += "I";
                 ++y;
             } else if (cop == BAM_CSOFT_CLIP || cop == BAM_CHARD_CLIP || cop == BAM_CPAD){
-                out_cigar += (cop == BAM_CSOFT_CLIP)? "S" :
-                    (cop == BAM_CHARD_CLIP)? "H" : "P";
+                // TODO: check the case where S is followed by D
+                new_cigar_ops.push_back(cop);
                 ++y;
             } else if (cop == BAM_CBACK) {
                 // skip. We don't support B Ops.
@@ -126,43 +145,114 @@ class Lift {
                 ++y;
             } else if (cop == BAM_CMATCH || cop == BAM_CDIFF || cop == BAM_CEQUAL) { // M
                 if (ins[x]){
-                    if (out_cigar[out_cigar.length()-1] == 'D'){
-                        out_cigar.pop_back();
-                        out_cigar += "M";
+                    if (new_cigar_ops.empty()){
+                        new_cigar_ops.push_back(BAM_CINS);
+                    } else if (new_cigar_ops.back() == BAM_CDEL){
+                        new_cigar_ops[new_cigar_ops.size() - 1] = BAM_CMATCH;
+                    } else {
+                        new_cigar_ops.push_back(BAM_CINS);
                     }
-                    else
-                        out_cigar += "I"; // IM
+                } else {
+                    new_cigar_ops.push_back(BAM_CMATCH);
                 }
-                else out_cigar += "M"; // MM
                 ++x; ++y;
             } else if (cop == BAM_CDEL || cop == BAM_CREF_SKIP) { // D
-                if (ins[x]) out_cigar += ""; // ID - insertion is invalidated
-                else{
-                    if (out_cigar[out_cigar.length()-1] == 'I'){
-                        out_cigar.pop_back();
-                        out_cigar += "M";
+                if (!ins[x]) {
+                    if (new_cigar_ops.empty()){
+                        new_cigar_ops.push_back(BAM_CDEL);
+                    } else if (new_cigar_ops.back() == BAM_CINS){
+                        new_cigar_ops[new_cigar_ops.size() - 1] = BAM_CMATCH;
+                    } else {
+                        new_cigar_ops.push_back(BAM_CDEL);
                     }
-                    else
-                        out_cigar += "D"; // MD
                 }
                 ++x; ++y;
             }
         }
+        return new_cigar_ops;
+    }
 
-        std::string out = "";
+
+    /* Convert a CIGAR string in an s2 alignment to the corresponding CIGAR string in the s1 alignment
+     *
+     * Input: bam1_t alignment record against s2 sequence
+     *
+     * Output:
+     *   The bam1_t struct is updated if there's a change in its CIGAR.
+     *   bam1_t->core.n_cigar specifies numbers of operators in a CIGAR;
+     *   bam_get_cigar(bam1_t) points to the occurrences.
+     */
+    void cigar_s2_to_s1(bam1_t* b) {
+        uint32_t* cigar = bam_get_cigar(b);
+        // Get lifted CIGAR.
+        auto new_cigar_ops = cigar_s2_to_s1_core(b);
+        // Prepare to update CIGAR in the bam1_t struct.
+        std::vector<int> cigar_op_len;
+        std::vector<char> cigar_op;
         int z = 1;
-        for (size_t i = 1; i < out_cigar.size(); ++i) {
-            if (out_cigar[i] == out_cigar[i-1]) {
+        for (int i = 1; i < new_cigar_ops.size(); i++){
+            if (new_cigar_ops[i] == new_cigar_ops[i-1]){
                 z++;
             } else {
-                out += std::to_string(z);
-                out += out_cigar[i-1];
+                cigar_op_len.push_back(z);
+                cigar_op.push_back(new_cigar_ops[i - 1]);
                 z = 1;
             }
         }
-        out += std::to_string(z);
-        out += out_cigar[out_cigar.size() - 1];
-        return out;
+        cigar_op_len.push_back(z);
+        cigar_op.push_back(new_cigar_ops[new_cigar_ops.size() - 1]);
+        std::vector<uint32_t> new_cigar;
+        for (int i = 0; i < cigar_op_len.size(); i++){
+            new_cigar.push_back(bam_cigar_gen(cigar_op_len[i], cigar_op[i]));
+        }
+        // Adjust data position when n_cigar is changed by levioSAM. n_cigar could either be
+        // increased or decreased.
+        //
+        // Adapted from samtools/sam.c
+        // https://github.com/samtools/htslib/blob/2264113e5df1946210828e45d29c605915bd3733/sam.c#L515
+        if (b->core.n_cigar != cigar_op_len.size()){
+            auto cigar_st = (uint8_t*)bam_get_cigar(b) - b->data;
+            auto fake_bytes = b->core.n_cigar * 4;
+            b->core.n_cigar = (uint32_t)cigar_op_len.size();
+            auto n_cigar4 = b->core.n_cigar * 4;
+            auto orig_len = b->l_data;
+            if (n_cigar4 > fake_bytes){
+                // Check if we need to update `b->m_data`.
+                //
+                // Adapted from htslib/sam_internal.h
+                // https://github.com/samtools/htslib/blob/31f0a76d338c9bf3a6893b71dd437ef5fcaaea0e/sam_internal.h#L48
+                auto new_m_data = (size_t) b->l_data + n_cigar4 - fake_bytes;
+                kroundup32(new_m_data);
+                if (new_m_data > b->m_data){
+                    auto new_data = static_cast<uint8_t *>(realloc(b->data, new_m_data));
+                    if (!new_data){
+                        std::cerr << "[Error] Failed to expand a bam1_t struct for " << bam_get_qname(b) << "\n";
+                        std::cerr << "This is likely due to out of memory\n";
+                        exit(1);
+                    }
+                    b->data = new_data;
+                    b->m_data = new_m_data;
+                    cigar = bam_get_cigar(b);
+                }
+            }
+            // Update memory usage of data.
+            b->l_data = b->l_data - fake_bytes + n_cigar4;
+            // Move data to the correct place.
+            memmove(b->data + cigar_st + n_cigar4,
+                    b->data + cigar_st + fake_bytes,
+                    orig_len - (cigar_st + fake_bytes));
+            // If new n_cigar is greater, copy the real CIGAR to the right place.
+            // Skipped this if new n_cigar is smaller than the original value.
+            if (n_cigar4 > fake_bytes){
+                memcpy(b->data + cigar_st,
+                       b->data + (n_cigar4 - fake_bytes) + 8,
+                       n_cigar4);
+            }
+            b->core.n_cigar = cigar_op_len.size();
+        }
+        for (int i = 0; i < b->core.n_cigar; i++){
+            *(cigar + i) = new_cigar[i];
+        }
     }
 
     // returns size of s1 sequence
@@ -413,48 +503,22 @@ class LiftMap {
             return n;
     }
 
-    // converts CIGAR string of s2 alignment to corresponding CIGAR string for the s1 alignment
-    // input: sequence name, bam1_t alignment record object (via htslib)
-    // ouput: CIGAR string of s1 alignment (as std::string)
-    // if liftover is not defined, then returns empty string
-    std::string cigar_s2_to_s1(std::string n, bam1_t* b) {
+    /* Convert a CIGAR string in an s2 alignment to the corresponding CIGAR string in the s1 alignment
+     *
+     * Input: bam1_t alignment record against s2 sequence
+     *
+     * The bam1_t struct is updated if there's a change in its CIGAR.
+     * bam1_t->core.n_cigar specifies numbers of operators in a CIGAR;
+     * bam_get_cigar(bam1_t) points to the occurrences.
+     *
+     * If liftover is not defined, do not update the bam1_t struct.
+     */
+    void cigar_s2_to_s1(const std::string& n, bam1_t* b) {
         auto it = s2_map.find(n);
         if (it != s2_map.end()) {
-            return lmap[it->second].cigar_s2_to_s1(b);
-        } else { // returns original cigar string
-            std::string out_cigar = "";
-            auto cigar = bam_get_cigar(b);
-            for (int i = 0; i < b->core.n_cigar; ++i) {
-                for (int j = 0; j < bam_cigar_oplen(cigar[i]); ++j) {
-                    if (bam_cigar_op(cigar[i]) == BAM_CINS)
-                        out_cigar += "I";
-                    else if (bam_cigar_op(cigar[i]) == BAM_CDEL)
-                        out_cigar += "D";
-                    else if (bam_cigar_op(cigar[i]) == BAM_CMATCH)
-                        out_cigar += "M";
-                    else if (bam_cigar_op(cigar[i]) == BAM_CSOFT_CLIP)
-                        out_cigar += "S";
-                    else if (bam_cigar_op(cigar[i]) == BAM_CHARD_CLIP)
-                        out_cigar += "H";
-                    else if (bam_cigar_op(cigar[i]) == BAM_CPAD)
-                        out_cigar += "P";
-                }
-            }
-            std::string out = "";
-            int z = 1;
-            for (size_t i = 1; i < out_cigar.size(); ++i) {
-                if (out_cigar[i] == out_cigar[i-1]) {
-                    z++;
-                } else {
-                    out += std::to_string(z);
-                    out += out_cigar[i-1];
-                    z = 1;
-                }
-            }
-            out += std::to_string(z);
-            out += out_cigar[out_cigar.size() - 1];
-            return out;
+            lmap[it->second].cigar_s2_to_s1(b);
         }
+        // If not found, no update.
     }
 
     // saves to stream
@@ -588,6 +652,12 @@ class LiftMap {
 };
 };
 
+lift::LiftMap lift_from_vcf(std::string fname, 
+                            std::string sample, 
+                            std::string haplotype, 
+                            NameMap names, LengthMap lengths);
+
+
 void print_main_help_msg();
 void print_lift_help_msg();
 void print_serialize_help_msg();
diff --git a/scripts/chain_utils.py b/scripts/chain_utils.py
new file mode 100644
index 0000000..8e43b5f
--- /dev/null
+++ b/scripts/chain_utils.py
@@ -0,0 +1,315 @@
+'''
+This script includes two functionalities: chain2vcf and chain2bed
+
+chain2vcf: Converts a chain file to a VCF file.
+
+    This conversion is not capble of handling translocations and inversions. Users should specify
+    a list of "chain ids" that contain only indels.
+
+Example: 
+    # This output VCF is used to convert h38 coordinates to T2T coordinates.
+    python chain_utils.py chain2vcf -i hg38.t2t-chm13-v1.0.over.chain -c 1-23 -o main_chain-h38.t2t_chm13.vcf
+
+
+chain2bed: Converts a chain file to a BED file.
+
+    The conversion only supports one chain per contig (we currently don't support translocations).
+    Users need to provide a list of chain ids for conversion.
+    The output BED file will contain the "chain block" regions.
+'''
+
+import argparse
+import sys
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    subparsers = parser.add_subparsers()
+    #subparsers = parser.add_subparsers(dest='subparser')
+    parser_c2v = subparsers.add_parser('chain2vcf')
+    parser_c2v.add_argument(
+        '-i', '--in_chain', required=True,
+        help='Path to the chain file to be converted.'
+    )
+    parser_c2v.add_argument(
+        '-c', '--chain_ids', required=True,
+        help='Chain ids to be converted, separated by commas and hyphens. E.g., "1-9", "1,5-8".'
+    )
+    parser_c2v.add_argument(
+        '-o', '--out_vcf',
+        help='Path to the output VCF file.'
+    )
+    parser_c2v.set_defaults(func=chain2vcf)
+
+    parser_c2b = subparsers.add_parser('chain2bed')
+    parser_c2b.add_argument(
+        '-i', '--in_chain', required=True,
+        help='Path to the chain file to be converted.'
+    )
+    parser_c2b.add_argument(
+        '-c', '--chain_ids', required=True,
+        help='Chain ids to be converted, separated by commas and hyphens. E.g., "1-9", "1,5-8".'
+    )
+    parser_c2b.add_argument(
+        '-o', '--out_bed',
+        help='Path to the output BED file.'
+    )
+    parser_c2b.set_defaults(func=chain2bed)
+
+    args = parser.parse_args()
+    args.func(args)
+
+    #kwargs = vars(parser.parse_args())
+    #globals()[kwargs.pop('subparser')](**kwargs)
+
+
+class Chain_Fields():
+    HEADER = 0
+    SCORE = 1
+    TNAME = 2
+    TSIZE = 3
+    TSTRAND = 4
+    TSTART = 5
+    TEND = 6
+    QNAME = 7
+    QSIZE = 8
+    QSTRAND = 9
+    QSTART = 10
+    QEND = 11
+    CID = 12
+    ALN_SIZE = 0
+    ALN_DT = 1
+    ALN_DQ = 2
+
+
+class Chain2Bed():
+    def __init__(self, out_bed=sys.stdout, chain_hdr='', CF=None):
+        assert(chain_hdr[CF.HEADER] == 'chain')
+        assert(chain_hdr[CF.TNAME] == chain_hdr[CF.QNAME])
+        self.out_bed = out_bed
+        self.CF = CF
+        self.contig = chain_hdr[self.CF.QNAME]
+        self.qstart = int(chain_hdr[self.CF.QSTART])
+        self.tstart = int(chain_hdr[self.CF.TSTART])
+        self.bed_records = []
+
+    def add(self, chain_aln):
+        if len(self.bed_records) == 0 or self.tstart != self.bed_records[-1][1]:
+            self.bed_records.append([self.tstart, self.tstart + int(chain_aln[self.CF.ALN_SIZE])])
+        else:
+            self.bed_records[-1][1] = self.tstart + int(chain_aln[self.CF.ALN_SIZE])
+
+        self.tstart += int(chain_aln[self.CF.ALN_SIZE]) + int(chain_aln[self.CF.ALN_DT])
+        self.qstart += int(chain_aln[self.CF.ALN_SIZE]) + int(chain_aln[self.CF.ALN_DQ])
+
+    def write(self):
+        for record in self.bed_records:
+            print(f'{self.contig}\t{record[0]}\t{record[1]}', file=self.out_bed)
+
+
+
+class Chain2Vcf():
+    def __init__(self, out_vcf=sys.stdout, chain_hdr='', CF=None):
+        assert(chain_hdr[CF.HEADER] == 'chain')
+        # assert(chain_hdr[CF.TNAME] == chain_hdr[CF.QNAME])
+
+        self.out_vcf = out_vcf
+        self.CF = CF
+
+        self.contig = chain_hdr[self.CF.QNAME]
+        # Add one b/c VCF format is one-based.
+        self.qstart = int(chain_hdr[self.CF.QSTART]) + 1
+        self.tstart = int(chain_hdr[self.CF.TSTART]) + 1
+
+        if chain_hdr[self.CF.TNAME] == chain_hdr[self.CF.QNAME]:
+            # INS w.r.t the query sequence.
+            if self.qstart < self.tstart:
+                ref = 'A'
+                qry = 'A' * (self.tstart - self.qstart + 1)
+                print(f'{self.contig}\t{self.qstart}\t.\t{ref}\t{qry}\t.\tAUTO\tTPOS={self.tstart}',
+                      file=self.out_vcf)
+            # DEL
+            elif self.qstart > self.tstart:
+                ref = 'A' * (self.qstart - self.tstart + 1)
+                qry = 'A'
+                print(f'{self.contig}\t{self.qstart}\t.\t{ref}\t{qry}\t.\tAUTO\tTPOS={self.tstart}',
+                      file=self.out_vcf)
+        else:
+            len_t = int(chain_hdr[self.CF.TEND]) - int(chain_hdr[self.CF.TSTART])
+            len_q = int(chain_hdr[self.CF.QEND]) - int(chain_hdr[self.CF.QSTART])
+            if len_t > len_q:
+                ref = 'A'
+                qry = 'A' * (len_t - len_q + 1)
+                print(f'{self.contig}\t{self.qstart}\t.\t{ref}\t{qry}\t.\tAUTO\tTPOS={self.tstart}',
+                      file=self.out_vcf)
+            elif len_q > len_t:
+                ref = 'A' * (len_q - len_t + 1)
+                qry = 'A'
+                print(f'{self.contig}\t{self.qstart - len_q}\t.\t{ref}\t{qry}\t.\tAUTO\tTPOS={self.tstart}',
+                      file=self.out_vcf)
+
+
+
+
+    def add(self, chain_aln):
+        self.tstart += int(chain_aln[self.CF.ALN_SIZE])
+        self.qstart += int(chain_aln[self.CF.ALN_SIZE])
+
+        len_t = int(chain_aln[self.CF.ALN_DT])
+        len_q = int(chain_aln[self.CF.ALN_DQ])
+
+        if (len_t > len_q):
+            # Insertion wrt the query seq
+            len_t -= len_q
+            len_q = 0
+            seq_t = 'A' * (len_t + 1)
+            seq_q = 'A' * (len_q + 1)
+            print(f'{self.contig}\t{self.qstart}\t.\t{seq_q}\t{seq_t}\t.\tAUTO\t' +
+                  f'TPOS={self.tstart};ALN_DT={int(chain_aln[self.CF.ALN_DT])};ALN_DQ={int(chain_aln[self.CF.ALN_DQ])}',
+                  file=self.out_vcf)
+        elif (len_t < len_q):
+            # Deletion wrt the query seq
+            len_q -= len_t
+            len_t = 0
+            seq_t = 'A' * (len_t + 1)
+            seq_q = 'A' * (len_q + 1)
+            print(f'{self.contig}\t{self.qstart}\t.\t{seq_q}\t{seq_t}\t.\tAUTO\t' +
+                  f'TPOS={self.tstart};ALN_DT={int(chain_aln[self.CF.ALN_DT])};ALN_DQ={int(chain_aln[self.CF.ALN_DQ])}',
+                  file=self.out_vcf)
+        self.tstart += int(chain_aln[self.CF.ALN_DT])
+        self.qstart += int(chain_aln[self.CF.ALN_DQ])
+
+
+
+# Parse raw `chain_ids` and return a list of chain ids.
+#
+# Input:
+#   chain_ids: A string of chain_ids that are separated by commas and hyphens.
+# Output:
+#   A list containing individual chain ids.
+def parse_chain_id(chain_ids):
+    list_chain = []
+    chain_ids = chain_ids.split(',')
+    for c in chain_ids:
+        c = c.split('-')
+        if len(c) == 1:
+            list_chain.append(c[0])
+        elif len(c) == 2:
+            for i in range(int(c[0]), int(c[1]) + 1):
+                list_chain.append(str(i))
+    return list_chain
+
+
+def get_contig_name(fn, chain_ids, CF):
+    dict_cid_contig = {}
+    with open(fn, 'r') as f:
+        for line in f:
+            line = line.split()
+            if len(line) == 13:
+                if line[CF.CID] in chain_ids:
+                    dict_cid_contig[line[CF.CID]] = line[CF.TNAME]
+    return dict_cid_contig
+
+
+def write_vcf_hdr(f_out, chain_ids, dict_cid_contig):
+    print('##fileformat=VCFv4.3', file=f_out)
+    print('##FILTER=<ID=AUTO,Description="Generated automatically.">', file=f_out)
+    print(dict_cid_contig)
+    for cid, contig in dict_cid_contig.items():
+        print(f'##contig=<ID={contig}>', file=f_out)
+    print('##INFO=<ID=TPOS,Number=A,Type=Integer,Description="Variant position on SEQ_T.">',
+          file=f_out)
+    print('##INFO=<ID=ALN_DT,Number=A,Type=Integer,Description="Length of gap on SEQ_T.">',
+          file=f_out)
+    print('##INFO=<ID=ALN_DQ,Number=A,Type=Integer,Description="Length of gap on SEQ_Q.">',
+          file=f_out)
+    print('#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO', file=f_out)
+
+
+def chain2bed(args):
+    print('chain2bed', file=sys.stderr)
+    # Constants.
+    CF = Chain_Fields()
+
+    chain_ids = parse_chain_id(args.chain_ids)
+    print('Chain IDs to be processed', chain_ids, file=sys.stderr)
+
+    if not args.out_bed:
+        f_out = sys.stdout
+    else:
+        f_out = open(args.out_bed, 'w')
+
+    f = open(args.in_chain, 'r')
+    current_id = ''
+    chain = None
+    for line in f:
+        line = line.split()
+        # Chain header
+        if len(line) == 13:
+            if line[CF.CID] in chain_ids:
+                chain = Chain2Bed(out_bed=f_out, chain_hdr=line, CF=CF)
+                if line[CF.TNAME] != line[CF.QNAME]:
+                    del chain
+                    chain = None
+        elif len(line) == 0:
+            current_id = ''
+            if chain:
+                del chain
+                chain = None
+        elif len(line) == 3:
+            if chain:
+                chain.add(line)
+        elif len(line) == 1:
+            if chain:
+                chain.write()
+                del chain
+                chain = None
+
+
+
+def chain2vcf(args):
+    # Constants.
+    CF = Chain_Fields()
+
+    chain_ids = parse_chain_id(args.chain_ids)
+    if not args.out_vcf:
+        f_out = sys.stdout
+    else:
+        f_out = open(args.out_vcf, 'w')
+
+    write_vcf_hdr(f_out, chain_ids,
+                  get_contig_name(args.in_chain, chain_ids, CF))
+
+    f = open(args.in_chain, 'r')
+    current_id = ''
+    chain = None
+    for line in f:
+        line = line.split()
+        # Chain header
+        if len(line) == 13:
+            if line[CF.CID] in chain_ids:
+            # if line[CF.QNAME] in ['chr2']:
+                chain = Chain2Vcf(out_vcf=f_out, chain_hdr=line, CF=CF)
+                if line[CF.TNAME] != line[CF.QNAME]:
+                    print('[Error] Contig names mismatch',
+                          line[CF.TNAME], line[CF.QNAME], file=sys.stderr)
+                    exit()
+                    del chain
+                    chain = None
+        elif len(line) == 0:
+            current_id = ''
+            if chain:
+                del chain
+                chain = None
+        elif len(line) == 3:
+            if chain:
+                chain.add(line)
+        elif len(line) == 1:
+            if chain:
+                del chain
+                chain = None
+
+
+if __name__ == '__main__':
+    parse_args()
+
diff --git a/scripts/compare_sam.py b/scripts/compare_sam.py
new file mode 100644
index 0000000..cb4dc31
--- /dev/null
+++ b/scripts/compare_sam.py
@@ -0,0 +1,103 @@
+"""Compares two SAM files and report a summary.
+"""
+import argparse
+import sys
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-q', '--input_query',
+        help='Path to a query SAM file.'
+    )
+    parser.add_argument(
+        '-b', '--input_baseline',
+        help='Path to a baseline SAM file.'
+    )
+    parser.add_argument(
+        '-o', '--out', default=sys.stdout,
+        help='Path to the ouput report. [stdout]'
+    )
+    args = parser.parse_args()
+    return args
+
+
+class Summary():
+    def __init__(self):
+        self.pos_diff = []
+        self.mapq_diff = []
+        self.cigar_diff = []
+        self.pos_diff_records = []
+
+    def update(self, query, baseline):
+        if len(query) == 0 or len(baseline) == 0:
+            return
+        if query[0] == baseline[0]: # contig
+            self.pos_diff.append(abs(query[1] - baseline[1]))
+        else:
+            self.pos_diff.append(-1)
+        if not (query[0] == baseline[0] and query[1] == baseline[1]):
+            self.pos_diff_records.append([query, baseline])
+        self.mapq_diff.append(query[2] - baseline[2])
+        self.cigar_diff.append(query[3] == baseline[3])
+
+    def report(self, f_out):
+        print('## Position', file=f_out)
+        print(f'{self.pos_diff.count(0) / len(self.pos_diff)} ({self.pos_diff.count(0)}/{len(self.pos_diff)})', file=f_out)
+        for i in range(30):
+            print(self.pos_diff_records[i], file=f_out)
+
+        print('## MAPQ', file=f_out)
+        print(f'{self.mapq_diff.count(0) / len(self.mapq_diff)} ({self.mapq_diff.count(0)}/{len(self.mapq_diff)})', file=f_out)
+
+        print('## CIGAR', file=f_out)
+        print(f'{self.cigar_diff.count(True) / len(self.cigar_diff)} ({self.cigar_diff.count(True)}/{len(self.cigar_diff)})', file=f_out)
+
+
+def process_sam_line(line, dict_sam):
+    line = line.split()
+    name = line[0]
+    flag = int(line[1])
+    contig = line[2]
+    pos = int(line[3])
+    mapq = int(line[4])
+    cigar = line[5]
+
+    # Ignore unmapped reads.
+    if flag & 4:
+        return
+
+    if flag & 64:
+        if dict_sam.setdefault(name, [[], []]):
+            dict_sam[name][0] = [contig, pos, mapq, cigar]
+    elif flag & 128:
+        if dict_sam.setdefault(name, [[], []]):
+            dict_sam[name][1] = [contig, pos, mapq, cigar]
+    else:
+        print(line)
+
+
+def read_sam_as_dict(fn):
+    dict_sam = {}
+    with open(fn, 'r') as f:
+        for line in f:
+            if line[0] != '@':
+                process_sam_line(line, dict_sam)
+    return dict_sam
+
+
+def compare_sam(args):
+    summary = Summary()
+    dict_query = read_sam_as_dict(args.input_query)
+    dict_baseline = read_sam_as_dict(args.input_baseline)
+    for i_q, [name, [first_seg, second_seg]] in enumerate(dict_query.items()):
+        if dict_baseline.get(name):
+            summary.update(first_seg, dict_baseline[name][0])
+            summary.update(second_seg, dict_baseline[name][1])
+    summary.report(args.out)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    compare_sam(args)
+
+
diff --git a/scripts/gen_length_map.py b/scripts/gen_length_map.py
new file mode 100644
index 0000000..7df83a2
--- /dev/null
+++ b/scripts/gen_length_map.py
@@ -0,0 +1,32 @@
+import argparse
+import sys
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-f', '--faidx',
+        help='Path to the .fa.idx file.'
+    )
+    parser.add_argument(
+        '-o', '--out',
+        help='Path to the output length map.'
+    )
+    args = parser.parse_args()
+    return args
+
+
+def gen_length_map(args):
+    f = open(args.faidx, 'r')
+    if not args.out:
+        f_out = sys.stdout
+    else:
+        f_out = open(args.out, 'w')
+    for line in f:
+        line = line.split()
+        print(f'{line[0]}\t{line[1]}', file=f_out)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    gen_length_map(args)
+
diff --git a/testdata/toy.sam b/testdata/toy.sam
new file mode 100644
index 0000000..3c9dba4
--- /dev/null
+++ b/testdata/toy.sam
@@ -0,0 +1,37 @@
+@HD	VN:1.0	SO:unsorted
+@SQ	SN:chr1	LN:248954750
+@SQ	SN:chr2	LN:242192349
+@SQ	SN:chr3	LN:198294095
+@SQ	SN:chr4	LN:190213022
+@SQ	SN:chr5	LN:181538717
+@SQ	SN:chr6	LN:170806452
+@SQ	SN:chr7	LN:159345940
+@SQ	SN:chr8	LN:145138783
+@SQ	SN:chr9	LN:138394715
+@SQ	SN:chr10	LN:133796194
+@SQ	SN:chr11	LN:135085765
+@SQ	SN:chr12	LN:133274462
+@SQ	SN:chr13	LN:114364425
+@SQ	SN:chr14	LN:107043763
+@SQ	SN:chr15	LN:101991347
+@SQ	SN:chr16	LN:90337982
+@SQ	SN:chr17	LN:83257045
+@SQ	SN:chr18	LN:80372693
+@SQ	SN:chr19	LN:58617405
+@SQ	SN:chr20	LN:64444158
+@SQ	SN:chr21	LN:46709851
+@SQ	SN:chr22	LN:50818266
+@SQ	SN:chrX	LN:156040881
+@SQ	SN:chrY	LN:57227415
+@SQ	SN:chrM	LN:16569
+@PG	ID:bowtie2	PN:bowtie2	VN:2.3.4.3	CL:"/home-1/cnaechy1@jhu.edu/miniconda3/bin/bowtie2-align-s --wrapper basic-0 -p 16 -x /net/langmead-bigmem-ib.bluecrab.cluster/storage2/naechyun/refflow-exp/snakemake/SRR622457_se/major/indexes/wg-maj -U ../../data/NA12878-nygc_1-sorted-10M.fq"
+toy1	0	chr1	949000	42	71M	*	0	0	AATGCCACATGGGGCTCCGTCCCGAGCAACCAGAAGACGCAGCAGCAACAGCAAAGACAGCAAAGAGTGCC	???????????????????????????????????????????????????????????????????????
+toy2	0	chr1	949050	42	30M	*	0	0	GCAAAGACAGCAAAGAGTGCCAGACACAGG	??????????????????????????????
+A00217:76:HFLT3DSXX:1:1101:1606:27978	99	chr1	81889506	42	150M	=	81889757	400	AGGTTAACTCAACAGCCAAGTGCCAGCCGTGGGAGATACTGGGAAGAAACTTGGACTAAGTTGGGGGTCGGGGAGAATTTGGAACAAACAGCAAGAAGGTAATTAAGAGTTGGGAGAGGTGGGGATTAGGAAAAATGTATGCAATATATA	?????????????????????????????+??????????????????????????????????????????????????????????????????+????????+???????????????????+?????????????????+??????	AS:i:-5	XN:i:0	XM:i:1	XO:i:0	XG:i:0	NM:i:1	MD:Z:94A55	YS:i:-8	YT:Z:CP
+A00217:76:HFLT3DSXX:1:1101:1606:27978	147	chr1	81889757	42	19M1I130M	=	81889506	-400	GTAGGGTGCAGAGATACACGGCAGTCTTTGAAAAAACTGGCCCATCTCCATGTTCACCTAGGTCTACAGTCTGGTAGCTCTGAACCAGTAAGATTATAGAAATCTTGATATCTTCTCTTTGGTTCTCAGGGTGAGAAATTGTATATTTTT	??????????????5?????????????????????????????????????????????????+?????????????????????????????????????????????????????????????????????????????????????	AS:i:-8	XN:i:0	XM:i:0	XO:i:1	XG:i:1	NM:i:1	MD:Z:149	YS:i:-5	YT:Z:CP
+A00217:76:HFLT3DSXX:1:1101:1606:27999	99	chr1	81889506	42	150M	=	81889757	400	AGGTTAACTCAACAGCCAAGTGCCAGCCGTGGGAGATACTGGGAAGAAACTTGGACTAAGTTGGGGGTCGGGGAGAATTTGGAACAAACAGCAAGAAGGTAATTAAGAGTTGGGAGAGGTGGGGATTAGGAAAAATGTATGCAATATATA	?????????????????????????????+??????????????????????????????????????????????????????????????????+????????+???????????????????+?????????????????+??????	AS:i:-5	XN:i:0	XM:i:1	XO:i:0	XG:i:0	NM:i:1	MD:Z:94A55	YS:i:-8	YT:Z:CP
+A00217:76:HFLT3DSXX:1:1101:1606:27999	147	chr1	81889757	42	19M1I1M1D1M1I127M	=	81889506	-400	GTAGGGTGCAGAGATACACGGCAGTCTTTGAAAAAACTGGCCCATCTCCATGTTCACCTAGGTCTACAGTCTGGTAGCTCTGAACCAGTAAGATTATAGAAATCTTGATATCTTCTCTTTGGTTCTCAGGGTGAGAAATTGTATATTTTT	??????????????5?????????????????????????????????????????????????+?????????????????????????????????????????????????????????????????????????????????????	AS:i:-8	XN:i:0	XM:i:0	XO:i:1	XG:i:1	NM:i:1	MD:Z:149	YS:i:-5	YT:Z:CP
+A00217:76:HFLT3DSXX:1:1101:1127:2597	89	chr19	44832790	42	150M	=	44832790	0	ACTGTAGCCTCAACATCCCAGGGCTCAGGTGATCCTCCCATCTCAGCCTCCTGAGTAGCTGGGATTACAGGAGCATGCCACCACACCTGGCTAATTTTTGTACTTTTTGTAGAGATAGGGTTTCACCATGTTGCCCAGGCTGGTCTTGAA	???????????????????????????????????????????????????+????????????????????????????????+?5????????????????++???????5????????????????????????5??????5????5	AS:i:0	XN:i:0	XM:i:0	XO:i:0	XG:i:0	NM:i:0	MD:Z:150	YT:Z:UP
+A00217:76:HFLT3DSXX:1:1101:1127:2597	165	chr19	44832790	0	*	=	44832790	0	GGGTGGGGGGATGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGATGAACGATCTCCGCTCACTGCAACCTCTGCATCTAGGGGTCAAGTGATTCGCCTGCATCCACCTACCAATTATCGAGGGCTACAGGCGCCCGCCACCACGCCG	??????5??????????+??????????????????5?5+??+5??++?+?+????5?+?5????????+5???5+?+??5++??+?+?????????&5????+++++5?5+??++++?++&??++????+???5???&??5++?55??'	YT:Z:UP
+A00217:76:HFLT3DSXX:1:1101:1063:33771	99	chr9	26829129	3	150M	=	26829474	491	CCTATAACACTGCAATACCACCTTGTTGTCAGTGTAAACAAGGGCATAGCCCGAAAGCACTGAGGCCACTGACAACCCATAGCCTTCCTATCAAAAATCCTTAACCCAGCAGGTTTCCTAACAGGCGATCTAAATCTTAATTAATTACCA	??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????	AS:i:-40	XS:i:0	XN:i:0	XM:i:8	XO:i:0	XG:i:0	NM:i:8	MD:Z:11C9T23G5T30T4T35T1G24	YS:i:-74	YT:Z:CP
+A00217:76:HFLT3DSXX:1:1101:1063:33771	147	chr9	26829474	3	27M1I51M1I30M2I38M	=	26829129	-491	GCCATCTATACCAATTCTAAGTTAATATGGACCGAACGAGGTTTTATTAATAGCAAAGAAAAATTAAAATCTCAAACTTAACAAGGTTTTGAACCAAAGTAAAGTTTGCTAAAAATTAACAGTGTAACATGCATTATCCTACTACCACAC	??????????5??????????????+?+5??????+??????????????5?????????????5????????????????????????????5????????????????????+???????5?+?????????+???????????????	AS:i:-74	XS:i:-5	XN:i:0	XM:i:10	XO:i:3	XG:i:4	NM:i:14	MD:Z:31T3T34C1C15C3T8T8G7G4T22	YS:i:-40	YT:Z:CP
diff --git a/testdata/toy.vcf b/testdata/toy.vcf
new file mode 100644
index 0000000..0daf56e
--- /dev/null
+++ b/testdata/toy.vcf
@@ -0,0 +1,22 @@
+##fileformat=VCFv4.3
+##FILTER=<ID=PASS,Description="All filters passed">
+##fileDate=26022019_15h52m43s
+##source=IGSRpipeline
+##reference=ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh38_reference_genome/GRCh38_full_analysis_set_plus_decoy_hla.fa
+##contig=<ID=chr1>
+##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1)">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=EAS_AF,Number=A,Type=Float,Description="Allele frequency in the EAS populations calculated from AC and AN, in the range (0,1)">
+##INFO=<ID=EUR_AF,Number=A,Type=Float,Description="Allele frequency in the EUR populations calculated from AC and AN, in the range (0,1)">
+##INFO=<ID=AFR_AF,Number=A,Type=Float,Description="Allele frequency in the AFR populations calculated from AC and AN, in the range (0,1)">
+##INFO=<ID=AMR_AF,Number=A,Type=Float,Description="Allele frequency in the AMR populations calculated from AC and AN, in the range (0,1)">
+##INFO=<ID=SAS_AF,Number=A,Type=Float,Description="Allele frequency in the SAS populations calculated from AC and AN, in the range (0,1)">
+##INFO=<ID=VT,Number=.,Type=String,Description="indicates what type of variant the line represents">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+chr1	949046	.	A	AACAGCAAAG	.	PASS	AC=4418;AN=5096;DP=23115;AF=0.87;EAS_AF=0.92;EUR_AF=0.94;AFR_AF=0.72;AMR_AF=0.89;SAS_AF=0.92;VT=INDEL;NS=2548
+chr1	949049	.	A	AG	.	PASS	AC=4418;AN=5096;DP=23115;AF=0.87;EAS_AF=0.92;EUR_AF=0.94;AFR_AF=0.72;AMR_AF=0.89;SAS_AF=0.92;VT=INDEL;NS=2548
+chr1	949054	.	GA	G	.	PASS	AC=4418;AN=5096;DP=23115;AF=0.87;EAS_AF=0.92;EUR_AF=0.94;AFR_AF=0.72;AMR_AF=0.89;SAS_AF=0.92;VT=INDEL;NS=2548
+chr1	949057	.	T	TGC	.	PASS	AC=4418;AN=5096;DP=23115;AF=0.87;EAS_AF=0.92;EUR_AF=0.94;AFR_AF=0.72;AMR_AF=0.89;SAS_AF=0.92;VT=INDEL;NS=2548