diff --git a/src/cljam/bam/reader.clj b/src/cljam/bam/reader.clj index 21665609..dceb8825 100644 --- a/src/cljam/bam/reader.clj +++ b/src/cljam/bam/reader.clj @@ -95,13 +95,17 @@ (defn- read-to-finish [^BAMReader rdr + ^Long start ^Long finish ^clojure.lang.IFn read-fn] (let [r ^BGZFInputStream (.reader rdr)] - (when (and (not (zero? (.available r))) - (> finish (.getFilePointer r))) - (cons (read-fn rdr (.refs rdr)) - (lazy-seq (read-to-finish rdr finish read-fn)))))) + (when (< start finish) + (.seek r start) + (when-not (zero? (.available r)) + (let [result (read-fn rdr (.refs rdr)) + curr (.getFilePointer r)] + (cons result + (lazy-seq (read-to-finish rdr curr finish read-fn)))))))) (defn- read-alignments-first-only "It should be equivalent to [(first (filter window @candidates))]" @@ -109,9 +113,8 @@ (loop [left-spans spans] (when-let [span (first left-spans)] (let [[^Long begin ^Long finish] span] - (.seek ^BGZFInputStream (.reader rdr) begin) (or - (loop [left (read-to-finish rdr finish read-fn)] + (loop [left (read-to-finish rdr begin finish read-fn)] (when-let [one (first left)] (if (window one) [one] @@ -138,8 +141,7 @@ :deep read-alignment :pointer pointer-read-alignment) candidates (flatten (map (fn [[^Long begin ^Long finish]] - (.seek ^BGZFInputStream (.reader rdr) begin) - (read-to-finish rdr finish read-fn)) spans))] + (read-to-finish rdr begin finish read-fn)) spans))] (if (= deep-or-shallow :first-only) (read-alignments-first-only rdr spans window read-fn) (filter window candidates)))) @@ -182,8 +184,7 @@ (<= start left) (>= end left)))) candidates (flatten (map (fn [[^Long begin ^Long finish]] - (.seek ^BGZFInputStream (.reader rdr) begin) - (read-to-finish rdr finish read-coordinate-alignment-block)) spans))] + (read-to-finish rdr begin finish read-coordinate-alignment-block)) spans))] (filter window candidates))) (defn load-headers diff --git a/test-resources/small.bam b/test-resources/small.bam new file mode 100644 index 00000000..d88560d2 Binary files /dev/null and b/test-resources/small.bam differ diff --git a/test-resources/small.bam.bai b/test-resources/small.bam.bai new file mode 100644 index 00000000..1a098603 Binary files /dev/null and b/test-resources/small.bam.bai differ diff --git a/test/cljam/t_bam_indexer.clj b/test/cljam/t_bam_indexer.clj index c062a03e..f8b729d0 100644 --- a/test/cljam/t_bam_indexer.clj +++ b/test/cljam/t_bam_indexer.clj @@ -46,6 +46,20 @@ ;; (it will use https://gitlab.xcoo.jp/chrovis/cljam/issues/8 later) ))) +(with-state-changes [(before :facts (do (prepare-cache!) + (fs/copy small-bam-file temp-file-sorted))) + (after :facts (clean-cache!))] + (fact "about BAM indexer (small file)" + (bai/create-index + temp-file-sorted (str temp-file-sorted ".bai")) => anything + (fs/exists? (str temp-file-sorted ".bai")) => truthy + (with-open [r (bam/reader temp-file-sorted)] + ;; Random read with different number of spans. + (count (io/read-alignments r {:chr "chr1" :start 23000000 :end 25000000 :depth :deep})) => 14858 + (count (io/read-alignments r {:chr "chr1" :start 23000000 :end 24500000 :depth :deep})) => 11424 + (count (io/read-alignments r {:chr "chr1" :start 23000000 :end 24000000 :depth :deep})) => 10010 + (count (io/read-alignments r {:chr "chr1" :start 23000000 :end 23500000 :depth :deep})) => 3806))) + (with-state-changes [(before :facts (do (prepare-cache!) (fs/copy medium-bam-file temp-file-sorted))) diff --git a/test/cljam/t_common.clj b/test/cljam/t_common.clj index 05abee41..db33104b 100644 --- a/test/cljam/t_common.clj +++ b/test/cljam/t_common.clj @@ -63,6 +63,7 @@ (def test-bam-file "test-resources/test.bam") (def test-sorted-bam-file "test-resources/test.sorted.bam") +(def small-bam-file "test-resources/small.bam") (def medium-bam-file "test-resources/medium.bam") (def large-bam-file (cavia/resource mycavia "large.bam"))