Skip to content

Commit

Permalink
Fix bad expressions.
Browse files Browse the repository at this point in the history
  • Loading branch information
niyarin committed Mar 15, 2020
1 parent 185a99e commit cf1ce51
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 40 deletions.
3 changes: 1 addition & 2 deletions src/cljam/algo/vcf_indexer.clj
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,5 @@
[in-bgziped-vcf out-csi {:keys [shift depth] :or {shift 14 depth 5}}]
(with-open [r (vcf/reader in-bgziped-vcf)]
(let [offsets (vcf/read-file-offsets r)
[bidx loffset] (csi/offsets->index offsets shift depth)
csi (csi/->CSI (count bidx) shift depth bidx loffset)]
csi (csi/offsets->index offsets shift depth)]
(csi/write-index out-csi csi))))
56 changes: 43 additions & 13 deletions src/cljam/io/csi.clj
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,44 @@
(map #(chunk/->Chunk (:file-beg %) (:file-end %)))
reverse)]))
sort
(into (array-map))))
(into (sorted-map))))

(defn- calc-loffsets [begs file-offsets]
(->> begs
(map (fn [beg]
[beg (->> (drop-while #(< (:end %) beg) file-offsets)
(map :file-beg)
first)]))
(into (array-map))))
(defn- fill-in-file-offsets [file-offsets shift]
(let [indexed-offsets
(->> file-offsets
(map-indexed #(assoc %2
:index %1
:beg (-> (:beg %2)
(bit-shift-right shift)
(bit-shift-left shift))))
(into []))]
(letfn [(step [index last-end]
(when-let [offset (and (< index (count indexed-offsets))
(nth indexed-offsets index))]
(if-let [next-index (->> (drop (+ index 1) indexed-offsets)
(drop-while #(< (:end %)
(:end offset)))
first
:index)]
(let [next-offset (nth indexed-offsets next-index)
end (if (< (:beg next-offset) (:end offset))
(:end offset)
(:beg next-offset))]
(lazy-seq (cons (assoc offset
:end end
:beg last-end)
(step next-index end))))
(list offset))))]
(step 0 0))))

(defn- calc-loffsets [begs file-offsets shift]
(let [offsets (fill-in-file-offsets file-offsets shift)]
(->> begs
(map (fn [beg]
[beg
(:file-beg
(first (drop-while #(< (:end %) beg) offsets)))]))
(into (sorted-map)))))

(defn offsets->index
"Calculates loffsets and bidx
Expand All @@ -115,19 +144,20 @@
(let [chr-offsets (->> (partition-by :chr offsets)
(map #(vector (:chr (first %)) %))
sort
(into (array-map)))
(into (sorted-map)))
bidx (->> chr-offsets
(map (fn [[chr offsets]]
[chr (calc-bidx offsets shift depth)]))
(into (array-map)))
(into (sorted-map)))
loffsets (->> chr-offsets
(map (fn [[chr offsets]]
[chr (calc-loffsets
(map #(util-bin/bin-beg % shift depth)
(keys (get bidx chr)))
offsets)]))
(into (array-map)))]
[bidx loffsets]))
offsets
shift)]))
(into (sorted-map)))]
(->CSI (count bidx) shift depth bidx loffsets)))

(defn write-index
[f ^CSI csi]
Expand Down
42 changes: 17 additions & 25 deletions src/cljam/io/vcf/reader.clj
Original file line number Diff line number Diff line change
Expand Up @@ -234,28 +234,20 @@
(map-indexed (fn [index contig]
[(:id contig) index]))
(into {}))
parse-fn (vcf-util/variant-parser (.meta-info rdr) (.header rdr))]
(loop [file-ptr' 0
data-contigs {}
res (transient [])]
(if-let [line (.readLine input-stream)]
(if (or (meta-line? line) (header-line? line))
(recur (.getFilePointer input-stream) data-contigs res)
(let [file-ptr (.getFilePointer input-stream)
parsed-line (parse-fn (parse-data-line line nil))]
(recur file-ptr
(if (contains? data-contigs (:chr parsed-line))
data-contigs
(assoc data-contigs (:chr parsed-line) (count data-contigs)))
(conj! res
{:file-beg file-ptr'
:file-end file-ptr
:beg (:pos parsed-line)
:end (+ (:pos parsed-line)
(count (:ref parsed-line)))
:chr (:chr parsed-line)}))))
(let [contigs (if (< (count meta-info-contigs) (count data-contigs))
data-contigs
meta-info-contigs)]
(map #(assoc % :chr (get contigs (:chr %)))
(persistent! res)))))))
kws (mapv keyword (drop 8 (.header rdr)))
parse (comp (vcf-util/variant-parser (.meta-info rdr) (.header rdr))
#(parse-data-line % kws))]
(letfn [(step [contigs beg-pointer]
(when-let [line (.readLine input-stream)]
(let [end-pointer (.getFilePointer input-stream)]
(if (or (meta-line? line) (header-line? line))
(lazy-seq (step contigs end-pointer))
(let [{:keys [chr pos ref info]} (parse line)
contigs' (if (contains? contigs chr)
contigs
(assoc contigs chr (count contigs)))]
(cons {:file-beg beg-pointer, :file-end end-pointer
:chr (contigs' chr), :beg pos,
:end (or (:END info) (dec (+ pos (count ref))))}
(lazy-seq (step contigs' end-pointer))))))))]
(step meta-info-contigs 0))))
13 changes: 13 additions & 0 deletions test/cljam/algo/vcf_indexer_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,16 @@
"chr1" 1048577 414826497
"chr1" 32769 414859265
"chr1" 414859265 536608769)))

(deftest csi-comparison-test
(let [csi ^CSI (csi/read-index test-vcf-various-bins-csi-file)
computed (with-open [r (vcf/reader test-vcf-various-bins-gz-file)]
(csi/offsets->index (vcf/read-file-offsets r) 14 6))]
(is (= (.n-ref csi) (.n-ref ^CSI computed)))
(is (= (.min-shift csi) (.min-shift ^CSI computed)))
(is (= (.depth csi) (.depth ^CSI computed)))
;;Exclude tail data
;;due to differences in implementation of HTSlib and bgzf4j.
(is (= (dissoc (get (.bidx csi) 0) 75)
(dissoc (get (.bidx ^CSI computed) 0) 75)))
(is (= (.loffset csi) (.loffset ^CSI computed)))))
13 changes: 13 additions & 0 deletions test/cljam/io/csi_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,16 @@
(cio/file test-csi-file)
(cio/as-url (cio/file test-csi-file))
(cio/as-url (str (:uri server) "/csi/test.csi")))))

(deftest calc-loffsets-test
(is (= (#'csi/calc-loffsets '(3) '({:beg 1 :end 2 :file-beg 1}
{:beg 5 :end 6 :file-beg 2}) 0)
{3 1}))

(is (= (#'csi/calc-loffsets '(3) '({:beg 2 :end 3 :file-beg 1}
{:beg 5 :end 6 :file-beg 2}) 1)
{3 1}))

(is (= (#'csi/calc-loffsets '(4) '({:beg 2 :end 3 :file-beg 1}
{:beg 5 :end 6 :file-beg 2}) 1)
{4 1})))

0 comments on commit cf1ce51

Please sign in to comment.