diff --git a/src/cljam/bed.clj b/src/cljam/bed.clj new file mode 100644 index 00000000..e125c03a --- /dev/null +++ b/src/cljam/bed.clj @@ -0,0 +1,165 @@ +(ns cljam.bed + (:require [clojure.string :as cstr] + [cljam.util :as util] + [cljam.util.chromosome :as chr-util]) + (:import [java.io BufferedReader BufferedWriter])) + +(def ^:const bed-columns + [:chr :start :end :name :score :strand :thick-start :thick-end :item-rgb :block-count :block-sizes :block-starts]) + +(defn- str->long-list + "Convert string of comma-separated long values into list of longs. + Comma at the end of input string will be ignored. + Returns nil if input is nil." + [^String s] + (when-not (nil? s) + (map util/str->long (cstr/split s #",")))) + +(defn- long-list->str + "Inverse function of str->long-list." + [xs] + (when-not (nil? xs) + (apply str (interpose "," xs)))) + +(defn- update-some + "Same as update if map 'm' contains key 'k'. Otherwise returns the original map 'm'." + [m k f & args] + (if (get m k) + (apply update m k f args) + m)) + +(defn- deserialize-bed + "Parse BED fields string and returns a map. + Based on information at https://genome.ucsc.edu/FAQ/FAQformat#format1." + [^String s] + {:post [(and (:chr %) (:start %) (:end %)) + ;; First 3 fields are required. + (< (:start %) (:end %)) + ;; The chromEnd base is not included in the display of the feature. + (every? true? (drop-while false? (map nil? ((apply juxt bed-columns) %)))) + ;; Lower-numbered fields must be populated if higher-numbered fields are used. + (if-let [s (:score %)] (<= 0 s 1000) true) + ;; A score between 0 and 1000. + (if-let [xs (:block-sizes %)] (= (count xs) (:block-count %)) true) + ;; The number of items in this list should correspond to blockCount. + (if-let [xs (:block-starts %)] (= (count xs) (:block-count %)) true) + ;; The number of items in this list should correspond to blockCount. + (if-let [[f] (:block-starts %)] (= 0 f) true) + ;; The first blockStart value must be 0. + (if-let [xs (:block-starts %)] (= (+ (last xs) (last (:block-sizes %))) (- (:end %) (:start %))) true) + ;; The final blockStart position plus the final blockSize value must equal chromEnd. + (if-let [xs (:block-starts %)] (apply <= (mapcat (fn [a b] [a (+ a b)]) xs (:block-sizes %))) true) + ;; Blocks may not overlap. + ]} + (reduce + (fn deserialize-bed-reduce-fn [m [k f]] (update-some m k f)) + (zipmap bed-columns (cstr/split s #"\s+")) + {:start util/str->long + :end util/str->long + :score util/str->long + :strand #(case % "." :no-strand "+" :plus "-" :minus) + :thick-start util/str->long + :thick-end util/str->long + :block-count util/str->long + :block-sizes str->long-list + :block-starts str->long-list})) + +(defn- serialize-bed + "Serialize bed fields into string." + [m] + (->> (-> m + (update-some :strand #(case % :plus "+" :minus "-" :no-strand ".")) + (update-some :block-sizes long-list->str) + (update-some :block-starts long-list->str)) + ((apply juxt bed-columns)) + (take-while identity) + (interpose " ") + (apply str))) + +(defn- header-or-comment? + "Checks if given string is neither a header nor a comment line." + [^String s] + (or (empty? s) + (.startsWith s "browser") + (.startsWith s "track") + (.startsWith s "#"))) + +(defn- normalize + "Normalize BED fields. + BED fields are stored in format: 0-origin and inclusive-start / exclusive-end. + This function converts the coordinate into cljam style: 1-origin and inclusice-start / inclusive-end." + [m] + (-> m + (update :chr chr-util/normalize-chromosome-key) + (update :start inc) + (update-some :thick-start inc))) + +(defn- denormalize + "De-normalize BED fields. + This is an inverse function of normalize." + [m] + (-> m + (update :start dec) + (update-some :thick-start dec))) + +(defn read-raw-fields + "Returns a lazy sequence of unnormalized BED fields." + [^BufferedReader rdr] + (sequence + (comp (remove header-or-comment?) + (map deserialize-bed)) + (line-seq rdr))) + +(defn read-fields + "Returns a lazy sequence of normalized BED fields." + [^BufferedReader rdr] + (sequence + (comp (remove header-or-comment?) + (map deserialize-bed) + (map normalize)) + (line-seq rdr))) + +(defn sort-fields + "Sort BED fields based on :chr, :start and :end. + :chr with common names come first, in order of (chr1, chr2, ..., chrX, chrY, chrM). + Other chromosomes follow after in lexicographic order." + [xs] + (sort-by + (fn [m] + [(or (util/str->int (last (re-find #"(chr)?(\d+)" (:chr m)))) Integer/MAX_VALUE) + (or ({"X" 23 "Y" 24 "M" 25} (last (re-find #"(chr)?([X|Y|M])" (:chr m)))) Integer/MAX_VALUE) + (:chr m) + (:start m) + (:end m)]) + xs)) + +(defn merge-fields + "Sort and merge overlapped regions. + Currently, this function affects only :end and :name fields." + [xs] + (reduce + (fn [r m] + (let [l (last r)] + (if (and l (= (:chr l) (:chr m)) (<= (:start m) (:end l)) (<= (:start l) (:end m))) + (update r (dec (count r)) (fn [n m] (-> n (assoc :end (:end m)) (update-some :name str "+" (:name m)))) m) + (conj r m)))) + [] + (sort-fields xs))) + +(defn write-raw-fields + "Write sequence of BED fields to writer without converting :start and :thick-start values." + [^BufferedWriter wtr xs] + (->> xs + (map serialize-bed) + (interpose "\n") + ^String (apply str) + (.write wtr))) + +(defn write-fields + "Write sequence of BED fields to writer." + [^BufferedWriter wtr xs] + (->> xs + (map (comp serialize-bed denormalize)) + (interpose "\n") + ^String (apply str) + (.write wtr))) diff --git a/src/cljam/fasta.clj b/src/cljam/fasta.clj index 05cbd113..4551900c 100644 --- a/src/cljam/fasta.clj +++ b/src/cljam/fasta.clj @@ -31,10 +31,11 @@ (fa-core/read-sequences rdr)) (defn read-sequence - ([rdr name] - (fa-core/read-sequence rdr name)) - ([rdr name start end] - (fa-core/read-sequence rdr name start end))) + [rdr {:keys [chr start end]}] + (cond + (and chr start end) (fa-core/read-sequence rdr chr start end) + chr (fa-core/read-sequence rdr chr) + :else (fa-core/read-sequences rdr))) (defn reset [rdr] diff --git a/src/cljam/fasta/reader.clj b/src/cljam/fasta/reader.clj index d19f04fe..a0c8c4bd 100644 --- a/src/cljam/fasta/reader.clj +++ b/src/cljam/fasta/reader.clj @@ -103,8 +103,12 @@ (defn read-sequence [^FASTAReader rdr name start end] (when-let [fai (.index rdr)] - (let [[offset-start offset-end] (fasta-index/get-span fai name start end)] - (read-sequence-with-offset rdr offset-start offset-end)))) + (let [header (fasta-index/get-header fai name)] + (when-let [[offset-start offset-end] (fasta-index/get-span fai name (dec start) end)] + (->> (concat (repeat (max 0 (- 1 start)) \N) + (read-sequence-with-offset rdr offset-start offset-end) + (repeat (max 0 (- end (:len header))) \N)) + (apply str)))))) (defn read "Reads FASTA sequence data, returning its information as a lazy sequence." diff --git a/src/cljam/pileup/mpileup.clj b/src/cljam/pileup/mpileup.clj index 508328e4..a405a18d 100644 --- a/src/cljam/pileup/mpileup.clj +++ b/src/cljam/pileup/mpileup.clj @@ -83,12 +83,10 @@ ([fa-reader bam-reader rname start end] (try (if-let [r (sam-util/ref-by-name (io/read-refs bam-reader) rname)] - (let [s (if (neg? start) 0 start) + (let [s (if (neg? start) 1 start) e (if (neg? end) (:len r) end) refseq (if fa-reader - (if (zero? s) - (concat [\N] (fa/read-sequence fa-reader rname s e)) - (fa/read-sequence fa-reader rname (dec s) e)) + (fa/read-sequence fa-reader {:chr rname :start s :end e}) (repeat \N)) alns (io/read-alignments bam-reader {:chr rname :start s :end e :depth :deep})] (pileup* refseq alns rname s e))) diff --git a/src/cljam/pileup/pileup.clj b/src/cljam/pileup/pileup.clj index 8a2cf0e6..1de9573e 100644 --- a/src/cljam/pileup/pileup.clj +++ b/src/cljam/pileup/pileup.clj @@ -110,7 +110,7 @@ (if-let [r (sam-util/ref-by-name (io/read-refs bam-reader) rname)] (pileup* bam-reader rname (:len r) - (if (neg? start) 0 start) + (if (neg? start) 1 start) (if (neg? end) (:len r) end))) (catch bgzf4j.BGZFException _ (throw (RuntimeException. "Invalid file format")))))) diff --git a/test-resources/test1.bed b/test-resources/test1.bed new file mode 100644 index 00000000..529b9d8b --- /dev/null +++ b/test-resources/test1.bed @@ -0,0 +1,3 @@ +track name=pairedReads description="Clone Paired Reads" useScore=1 +chr22 1000 5000 cloneA 960 + 1000 5000 0 2 567,488, 0,3512 +chr22 2000 6000 cloneB 900 - 2000 6000 0 2 433,399, 0,3601 \ No newline at end of file diff --git a/test-resources/test2.bed b/test-resources/test2.bed new file mode 100644 index 00000000..7aeddcd2 --- /dev/null +++ b/test-resources/test2.bed @@ -0,0 +1,13 @@ +browser position chr7:127471196-127495720 +browser hide all +track name="ItemRGBDemo" description="Item RGB demonstration" visibility=2 itemRgb="On" +chr7 127471196 127472363 Pos1 0 + 127471196 127472363 255,0,0 +chr7 127472363 127473530 Pos2 0 + 127472363 127473530 255,0,0 +chr7 127473530 127474697 Pos3 0 + 127473530 127474697 255,0,0 +chr7 127474697 127475864 Pos4 0 + 127474697 127475864 255,0,0 +chr7 127475864 127477031 Neg1 0 - 127475864 127477031 0,0,255 +chr7 127477031 127478198 Neg2 0 - 127477031 127478198 0,0,255 +chr7 127478198 127479365 Neg3 0 - 127478198 127479365 0,0,255 +chr7 127479365 127480532 Pos5 0 + 127479365 127480532 255,0,0 +chr7 127480532 127481699 Neg4 0 - 127480532 127481699 0,0,255 + diff --git a/test-resources/test3.bed b/test-resources/test3.bed new file mode 100644 index 00000000..8b1f48ff --- /dev/null +++ b/test-resources/test3.bed @@ -0,0 +1,12 @@ +browser position chr7:127471196-127495720 +browser hide all +track name="ColorByStrandDemo" description="Color by strand demonstration" visibility=2 colorByStrand="255,0,0 0,0,255" +chr7 127471196 127472363 Pos1 0 + +chr7 127472363 127473530 Pos2 0 + +chr7 127473530 127474697 Pos3 0 + +chr7 127474697 127475864 Pos4 0 + +chr7 127475864 127477031 Neg1 0 - +chr7 127477031 127478198 Neg2 0 - +chr7 127478198 127479365 Neg3 0 - +chr7 127479365 127480532 Pos5 0 + +chr7 127480532 127481699 Neg4 0 - \ No newline at end of file diff --git a/test/cljam/t_bed.clj b/test/cljam/t_bed.clj new file mode 100644 index 00000000..e92ad20e --- /dev/null +++ b/test/cljam/t_bed.clj @@ -0,0 +1,175 @@ +(ns cljam.t-bed + (:use [midje.sweet]) + (:require [clojure.java.io :as io] + [cljam.bed :as bed] + [cljam.fasta :as fa] + [cljam.io :as cio] + [cljam.bam :as bam] + [cljam.util.sam-util :as sam-util]) + (:import [java.io BufferedReader InputStreamReader ByteArrayInputStream + ByteArrayOutputStream OutputStreamWriter BufferedWriter])) + +(defn- str->bed [^String s] + (with-open [bais (ByteArrayInputStream. (.getBytes s)) + isr (InputStreamReader. bais) + br (BufferedReader. isr)] + (doall (bed/read-fields br)))) + +(defn- bed->str [xs] + (with-open [bao (ByteArrayOutputStream.) + osw (OutputStreamWriter. bao) + bw (BufferedWriter. osw)] + (bed/write-fields bw xs) + (.flush bw) + (.toString bao))) + +(facts "bed file reader" + (str->bed "1 0 100 N 0 + 0 0 255,0,0 2 10,90 0,10") + => [{:chr "chr1" :start 1 :end 100 :name "N" :score 0 :strand :plus :thick-start 1 :thick-end 0 + :item-rgb "255,0,0" :block-count 2 :block-sizes [10 90] :block-starts [0 10]}] + + (with-open [r (io/reader "test-resources/test1.bed")] + (bed/read-fields r) => + [{:chr "chr22" :start 1001 :end 5000 :name "cloneA" :score 960 :strand :plus :thick-start 1001 + :thick-end 5000 :item-rgb "0" :block-count 2 :block-sizes [567 488] :block-starts [0 3512]} + {:chr "chr22" :start 2001 :end 6000 :name "cloneB" :score 900 :strand :minus :thick-start 2001 + :thick-end 6000 :item-rgb "0" :block-count 2 :block-sizes [433 399] :block-starts [0 3601]}]) + + (with-open [r (io/reader "test-resources/test2.bed")] + (bed/read-fields r) => + [{:chr "chr7" :start 127471197 :end 127472363 :name "Pos1" :score 0 :strand :plus :thick-start 127471197 + :thick-end 127472363 :item-rgb "255,0,0"} + {:chr "chr7" :start 127472364 :end 127473530 :name "Pos2" :score 0 :strand :plus :thick-start 127472364 + :thick-end 127473530 :item-rgb "255,0,0"} + {:chr "chr7" :start 127473531 :end 127474697 :name "Pos3" :score 0 :strand :plus :thick-start 127473531 + :thick-end 127474697 :item-rgb "255,0,0"} + {:chr "chr7" :start 127474698 :end 127475864 :name "Pos4" :score 0 :strand :plus :thick-start 127474698 + :thick-end 127475864 :item-rgb "255,0,0"} + {:chr "chr7" :start 127475865 :end 127477031 :name "Neg1" :score 0 :strand :minus :thick-start 127475865 + :thick-end 127477031 :item-rgb "0,0,255"} + {:chr "chr7" :start 127477032 :end 127478198 :name "Neg2" :score 0 :strand :minus :thick-start 127477032 + :thick-end 127478198 :item-rgb "0,0,255"} + {:chr "chr7" :start 127478199 :end 127479365 :name "Neg3" :score 0 :strand :minus :thick-start 127478199 + :thick-end 127479365 :item-rgb "0,0,255"} + {:chr "chr7" :start 127479366 :end 127480532 :name "Pos5" :score 0 :strand :plus :thick-start 127479366 + :thick-end 127480532 :item-rgb "255,0,0"} + {:chr "chr7" :start 127480533 :end 127481699 :name "Neg4" :score 0 :strand :minus :thick-start 127480533 + :thick-end 127481699 :item-rgb "0,0,255"}]) + + (with-open [r (io/reader "test-resources/test3.bed")] + (bed/read-fields r) => + [{:chr "chr7" :start 127471197 :end 127472363 :name "Pos1" :score 0 :strand :plus} + {:chr "chr7" :start 127472364 :end 127473530 :name "Pos2" :score 0 :strand :plus} + {:chr "chr7" :start 127473531 :end 127474697 :name "Pos3" :score 0 :strand :plus} + {:chr "chr7" :start 127474698 :end 127475864 :name "Pos4" :score 0 :strand :plus} + {:chr "chr7" :start 127475865 :end 127477031 :name "Neg1" :score 0 :strand :minus} + {:chr "chr7" :start 127477032 :end 127478198 :name "Neg2" :score 0 :strand :minus} + {:chr "chr7" :start 127478199 :end 127479365 :name "Neg3" :score 0 :strand :minus} + {:chr "chr7" :start 127479366 :end 127480532 :name "Pos5" :score 0 :strand :plus} + {:chr "chr7" :start 127480533 :end 127481699 :name "Neg4" :score 0 :strand :minus}])) + +(tabular + (fact "bed file reader" + (str->bed ?bed-str) => ?expected) + ?bed-str ?expected + "" [] + "#" [] + "track name=foo" [] + "browser" [] + "1 0 1" [{:chr "chr1" :start 1 :end 1}] + "1 0 1\n" [{:chr "chr1" :start 1 :end 1}] + "1\t0\t1" [{:chr "chr1" :start 1 :end 1}] + "1\t0\t1\n" [{:chr "chr1" :start 1 :end 1}] + "chr1 0 1" [{:chr "chr1" :start 1 :end 1}] + "1 0 1\n1 1 2" [{:chr "chr1" :start 1 :end 1} {:chr "chr1" :start 2 :end 2}] + "1 0 1 Name" [{:chr "chr1" :start 1 :end 1 :name "Name"}] + "1 0 1 N 0" [{:chr "chr1" :start 1 :end 1 :name "N" :score 0}] + "1 0 1 N 0 +" [{:chr "chr1" :start 1 :end 1 :name "N" :score 0 :strand :plus}] + "1 0 1 N 0 + 0" [{:chr "chr1" :start 1 :end 1 :name "N" :score 0 :strand :plus :thick-start 1}] + "1 0 1 N 0 + 0 1" [{:chr "chr1" :start 1 :end 1 :name "N" :score 0 :strand :plus :thick-start 1 :thick-end 1}]) + +(tabular + (fact "bed reader assertions" + (str->bed ?bed-str) => (throws java.lang.AssertionError)) + ?bed-str + "a" + "\na" + "trac" + "1 0" + "1 0 0" + "1,0,1" + "1 O 1" + "chr 1 0 1") + +(facts + "bed sort" + (bed/sort-fields []) => [] + (bed/sort-fields + [{:chr "chr1" :start 1 :end 20} {:chr "chrY" :start 2 :end 4} {:chr "chr10" :start 1 :end 100} + {:chr "chr1" :start 10 :end 20} {:chr "chr2" :start 20 :end 40} {:chr "chr1" :start 1 :end 3}]) + =>[{:chr "chr1" :start 1 :end 3} {:chr "chr1" :start 1 :end 20} {:chr "chr1" :start 10 :end 20} + {:chr "chr2" :start 20 :end 40} {:chr "chr10" :start 1 :end 100} {:chr "chrY" :start 2 :end 4}] + (bed/sort-fields + [{:chr "chr10" :start 1 :end 2} {:chr "chr1_KI270892v1_alt" :start 1 :end 2} {:chr "chr1" :start 2 :end 3} + {:chr "chr1" :start 1 :end 2} {:chr "chr10_KI270825v1_alt" :start 1 :end 2} + {:chr "chr1_KI270714v1_random" :start 1 :end 2} {:chr "chr2" :start 1 :end 2} {:chr "chrM" :start 1 :end 2} + {:chr "chr1_GL383518v1_alt" :start 1 :end 2} {:chr "another" :start 1 :end 2} {:chr "chrX" :start 1 :end 2}]) + => [{:chr "chr1" :start 1 :end 2} {:chr "chr1" :start 2 :end 3} {:chr "chr1_GL383518v1_alt" :start 1 :end 2} + {:chr "chr1_KI270714v1_random" :start 1 :end 2} {:chr "chr1_KI270892v1_alt" :start 1 :end 2} + {:chr "chr2" :start 1 :end 2} {:chr "chr10" :start 1 :end 2} {:chr "chr10_KI270825v1_alt" :start 1 :end 2} + {:chr "chrX" :start 1 :end 2} {:chr "chrM" :start 1 :end 2} {:chr "another" :start 1 :end 2}]) + +(facts + "bed merge" + (bed/merge-fields []) => [] + (bed/merge-fields + [{:chr "chr2" :start 1 :end 10} {:chr "chr1" :start 1 :end 10} {:chr "chr1" :start 4 :end 13} + {:chr "chr1" :start 13 :end 15} {:chr "chr1" :start 16 :end 20}]) + => [{:chr "chr1" :start 1 :end 15} {:chr "chr1" :start 16 :end 20} {:chr "chr2" :start 1 :end 10}] + (bed/merge-fields + [{:chr "chr1" :start 1 :end 10 :name "chr1:1-10"} {:chr "chr1" :start 4 :end 13 :name "chr1:4-13"}]) + => [{:chr "chr1" :start 1 :end 13 :name "chr1:1-10+chr1:4-13"}]) + +(facts "bed reader and bam reader" + (with-open [bam (bam/reader "test-resources/test.sorted.bam")] + (letfn [(ref-pos-end [m] {:rname (:rname m) :pos (:pos m) :end (sam-util/get-end m)}) + (read-region [s] (->> (str->bed s) (mapcat #(cio/read-alignments bam %)) (map ref-pos-end)))] + (read-region "ref 0 6") + => [] + (read-region "ref 6 7") + => [{:rname "ref" :pos 7 :end 22}] + (read-region "ref 7 8") + => [{:rname "ref" :pos 7 :end 22}] + (read-region "ref 8 9") + => [{:rname "ref" :pos 7 :end 22} {:rname "ref" :pos 9 :end 18} {:rname "ref" :pos 9 :end 14}] + (read-region "ref 21 22") + => [{:rname "ref" :pos 7 :end 22} {:rname "ref" :pos 16 :end 40}] + (read-region "ref 22 23") + => [{:rname "ref" :pos 16 :end 40}] + (read-region "ref 0 45") + => [{:rname "ref" :pos 7 :end 22} {:rname "ref" :pos 9 :end 18} {:rname "ref" :pos 9 :end 14} + {:rname "ref" :pos 16 :end 40} {:rname "ref" :pos 29 :end 33} {:rname "ref" :pos 37 :end 45}]))) + +(facts + "bed reader and fasta reader" + (with-open [fa (fa/reader "test-resources/medium.fa")] + (comment "chr1" "TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC...") + (letfn [(read-region [s] (->> (str->bed s) (map #(fa/read-sequence fa %))))] + (read-region "1 0 1") + => ["T"] + (read-region "1 0 10") + => ["TAACCCTAAC"] + (read-region "1 0 10\n1 10 20") + => ["TAACCCTAAC" "CCTAACCCTA"]))) + +(facts + "bed writer" + (bed->str (str->bed "1 0 1")) => "chr1 0 1" + (bed->str (str->bed "1 0 10")) => "chr1 0 10" + (bed->str (str->bed "1 0 1\n1 1 2")) => "chr1 0 1\nchr1 1 2" + (with-open [r (io/reader "test-resources/test1.bed")] (str->bed (bed->str (bed/read-fields r)))) + => (with-open [r (io/reader "test-resources/test1.bed")] (doall (bed/read-fields r))) + (with-open [r (io/reader "test-resources/test2.bed")] (str->bed (bed->str (bed/read-fields r)))) + => (with-open [r (io/reader "test-resources/test2.bed")] (doall (bed/read-fields r))) + (with-open [r (io/reader "test-resources/test3.bed")] (str->bed (bed->str (bed/read-fields r)))) + => (with-open [r (io/reader "test-resources/test3.bed")] (doall (bed/read-fields r)))) diff --git a/test/cljam/t_fasta.clj b/test/cljam/t_fasta.clj index ca6c6696..b6c681ef 100644 --- a/test/cljam/t_fasta.clj +++ b/test/cljam/t_fasta.clj @@ -13,13 +13,15 @@ (let [rdr (fasta/reader test-fa-file)] (fasta/read-sequences rdr)) => test-fa-sequences (let [rdr (fasta/reader test-fa-file)] - (fasta/read-sequence rdr "ref" 4 10) => "TGTTAG" + (fasta/read-sequence rdr {:chr "ref" :start 5 :end 10})) => "TGTTAG" (let [rdr (fasta/reader test-fa-file)] - (fasta/read-sequence rdr "ref2" 0 16) => "AGGTTTTATAAAACAA") + (fasta/read-sequence rdr {:chr "ref2" :start 1 :end 16})) => "AGGTTTTATAAAACAA" (let [rdr (fasta/reader test-fa-file)] - (fasta/read-sequence rdr "ref") => "AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT") + (fasta/read-sequence rdr {:chr "ref2" :start 0 :end 45})) => "NAGGTTTTATAAAACAATTAAGTCTACAGAGCAACTACGCGNNNNN" (let [rdr (fasta/reader test-fa-file)] - (fasta/read-sequence rdr "ref2") => "AGGTTTTATAAAACAATTAAGTCTACAGAGCAACTACGCG"))) + (fasta/read-sequence rdr {:chr "ref"})) => "AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT" + (let [rdr (fasta/reader test-fa-file)] + (fasta/read-sequence rdr {:chr "ref2"})) => "AGGTTTTATAAAACAATTAAGTCTACAGAGCAACTACGCG") (fact "about sequential reading of FASTA file." (fasta/sequential-read test-fa-file) => (map #(update % :sequence str/upper-case) test-fa-sequences) diff --git a/test/cljam/t_pileup.clj b/test/cljam/t_pileup.clj index e502c48b..371e10d1 100644 --- a/test/cljam/t_pileup.clj +++ b/test/cljam/t_pileup.clj @@ -5,22 +5,22 @@ [cljam.fasta :as fa] [cljam.pileup :as plp])) -(def test-bam-pileup-ref '(0 0 0 0 0 0 0 1 1 3 3 3 3 3 3 2 3 3 3 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 1 1 1 2 2 2 2 1 1 1 1 1)) -(def test-bam-pileup-ref2 '(0 1 2 2 2 2 3 3 3 3 4 4 5 5 6 6 6 6 6 6 6 5 5 4 4 4 4 4 3 3 3 3 3 3 3 2 1 0 0 0 0)) +(def test-bam-pileup-ref '(0 0 0 0 0 0 1 1 3 3 3 3 3 3 2 3 3 3 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 1 1 1 2 2 2 2 1 1 1 1 1)) +(def test-bam-pileup-ref2 '(1 2 2 2 2 3 3 3 3 4 4 5 5 6 6 6 6 6 6 6 5 5 4 4 4 4 4 3 3 3 3 3 3 3 2 1 0 0 0 0)) (def test-bam-mpileup-seq-ref - '(() () () () () () () ("T") ("T") ("A" "A" "A") ("G" "G" "G") ("A" "A" "C") ("T" "T" "T") + '(() () () () () () ("T") ("T") ("A" "A" "A") ("G" "G" "G") ("A" "A" "C") ("T" "T" "T") ("A" "A" "A") ("+4AGAG" "+2GG" "A") ("G" "G") ("A" "A" "A") ("T" "T" "T") ("A" "+2AA" "A") ("*" "G") ("C" "C") ("T" "T") ("G" ">") (">") (">") (">") (">") (">") (">") (">" "T") (">" "A") (">" "G") (">" "G") (">" "C") (">") ("+1C") ("T") ("C" "C") ("A" "A") ("G" "G") ("C" "C") ("G") ("C") ("C") ("A") ("T"))) (def test-bam-mpileup-seq-ref2 - '(() () () () () () () (".") (".") ("." "." ".") ("." "." ".") ("." "." "C") ("." "." ".") + '(() () () () () () (".") (".") ("." "." ".") ("." "." ".") ("." "." "C") ("." "." ".") ("." "." ".") ("+4AGAG" "+2GG" ".") ("." ".") ("." "." ".") ("." "." ".") ("." "+2AA" ".") ("*" ".") ("." ".") ("." ".") ("." ">") (">") (">") (">") (">") (">") (">") (">" ".") (">" ".") (">" ".") (">" ".") (">" ".") (">") ("+1C") (".") ("." ".") ("." ".") ("." ".") ("." ".") (".") (".") (".") (".") ("."))) (def test-bam-mpileup-qual-ref - '([] [] [] [] [] [] [] [\~] [\~] [\~ \~ \~] [\~ \~ \~] [\~ \~ \~] [\~ \~ \~] [\~ \~ \~] [\~ \~ \~] + '([] [] [] [] [] [] [\~] [\~] [\~ \~ \~] [\~ \~ \~] [\~ \~ \~] [\~ \~ \~] [\~ \~ \~] [\~ \~ \~] [\~ \~] [\~ \~ \~] [\~ \~ \~] [\~ \~ \~] [\~ \~] [\~ \~] [\~ \~] [\~ \~] [\~] [\~] [\~] [\~] [\~] [\~] [\~ \~] [\~ \~] [\~ \~] [\~ \~] [\~ \~] [\~] [\~] [\~] [\~ \~] [\~ \~] [\~ \~] [\~ \~] [\~] [\~] [\~] [\~] [\~])) @@ -48,21 +48,20 @@ fr (fa/reader test-fa-file)] (let [mplp-ref (doall (plp/mpileup br "ref")) mplp-ref2 (doall (plp/mpileup br "ref2"))] - (map :rname mplp-ref) => (repeat 46 "ref") - (map :ref mplp-ref) => (repeat 46 \N) + (map :rname mplp-ref) => (repeat 45 "ref") + (map :ref mplp-ref) => (repeat 45 \N) (map :count mplp-ref) => test-bam-pileup-ref - (map :pos mplp-ref) => (range 46) + (map :pos mplp-ref) => (range 1 46) (map :seq mplp-ref) => test-bam-mpileup-seq-ref (map :qual mplp-ref) => test-bam-mpileup-qual-ref (map :count mplp-ref2) => test-bam-pileup-ref2) (let [mplp-ref (doall (plp/mpileup fr br "ref"))] - (map :rname mplp-ref) => (repeat 46 "ref") - ;; 0123456789012345678901234567890123456789012345 - ;; AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT - (apply str (map :ref mplp-ref)) => "NAGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT" + (map :rname mplp-ref) => (repeat 45 "ref") + ;; 123456789012345678901234567890123456789012345 + (apply str (map :ref mplp-ref)) => "AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT" (map :count mplp-ref) => test-bam-pileup-ref - (map :pos mplp-ref) => (range 46) + (map :pos mplp-ref) => (range 1 46) (map :seq mplp-ref) => test-bam-mpileup-seq-ref2 (map :qual mplp-ref) => test-bam-mpileup-qual-ref))) @@ -74,8 +73,8 @@ (let [mplp-ref1 (doall (plp/mpileup fr br "ref" 1 40)) mplp-ref2 (doall (plp/mpileup fr br "ref2" 1 40))] (apply str (map :ref mplp-ref1)) => "AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGC" - (map :count mplp-ref1) => (take 40 (rest test-bam-pileup-ref)) + (map :count mplp-ref1) => (take 40 test-bam-pileup-ref) (map :pos mplp-ref1) => (range 1 41) - (map :seq mplp-ref1) => (take 40 (rest test-bam-mpileup-seq-ref2)) + (map :seq mplp-ref1) => (take 40 test-bam-mpileup-seq-ref2) (apply str (map :ref mplp-ref2)) => "AGGTTTTATAAAACAATTAAGTCTACAGAGCAACTACGCG")))