Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parser of optional fields in SAM/BAM. #92

Merged
merged 1 commit into from
Jul 31, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/cljam/io/bam/decoder.clj
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
(definline parse-tag-single [tag-type ^ByteBuffer bb]
`(case (long ~tag-type)
~(long \Z) (lsb/read-null-terminated-string ~bb)
~(long \A) (.get ~bb)
~(long \A) (char (.get ~bb))
~(long \I) (bit-and (.getInt ~bb) 0xffffffff)
~(long \i) (.getInt ~bb)
~(long \s) (int (.getShort ~bb))
Expand All @@ -37,7 +37,7 @@
(defn- parse-tag-array [^ByteBuffer bb]
(let [typ (char (.get bb))
len (.getInt bb)]
(->> (for [i (range len)]
(->> (for [_ (range len)]
(case typ
\c (int (.get bb))
\C (bit-and (int (.get bb)) 0xff)
Expand Down Expand Up @@ -118,7 +118,7 @@
pos (inc (lsb/read-int buffer))
l-read-name (int (lsb/read-ubyte buffer))
mapq (lsb/read-ubyte buffer)
bin (lsb/read-ushort buffer)
_ (lsb/read-ushort buffer) ; bin
n-cigar-op (lsb/read-ushort buffer)
flag (lsb/read-ushort buffer)
l-seq (lsb/read-int buffer)
Expand Down
4 changes: 2 additions & 2 deletions src/cljam/io/sam/util.clj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
(ns cljam.io.sam.util
"Utilities related to SAM/BAM format."
(:require [clojure.string :as cstr]
[proton.core :refer [as-long as-double]]
[proton.core :refer [as-long as-double hex->bytes]]
cljam.io.protocols
[cljam.io.util.cigar :refer [count-ref]]
[cljam.util :refer [ubyte]])
Expand Down Expand Up @@ -88,7 +88,7 @@
\c (as-long val)
\C (as-long val)
\f (as-double val)
\H nil ;;FIXME
\H (hex->bytes val)
(throw (Exception. "Unrecognized tag type"))))

(defn- parse-optional-fields [options]
Expand Down
Binary file added test-resources/bam/opts.bam
Binary file not shown.
10 changes: 10 additions & 0 deletions test-resources/sam/opts.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
@SQ SN:ref LN:45
q001 0 ref 10 60 16M * 0 0 AAAAGGGGTTTTCCCC * Xa:A:p Xi:i:-100 Xf:f:-1.4e3 Xz:Z:AATTGGCC
q002 0 ref 10 60 16M * 0 0 AAAAGGGGTTTTCCCC * Xh:H:1AE301
q003 0 ref 10 60 16M * 0 0 AAAAGGGGTTTTCCCC * Xc:B:c,-128,0,127
q004 0 ref 10 60 16M * 0 0 AAAAGGGGTTTTCCCC * XC:B:C,0,127,255
q005 0 ref 10 60 16M * 0 0 AAAAGGGGTTTTCCCC * Xs:B:s,-32768,0,32767
q006 0 ref 10 60 16M * 0 0 AAAAGGGGTTTTCCCC * XS:B:S,0,32767,65535
q007 0 ref 10 60 16M * 0 0 AAAAGGGGTTTTCCCC * Xi:B:i,-2147483648,0,2147483647
q008 0 ref 10 60 16M * 0 0 AAAAGGGGTTTTCCCC * XI:B:I,0,2147483647,4294967295
q009 0 ref 10 60 16M * 0 0 AAAAGGGGTTTTCCCC * Xf:B:f,-0.3,0.0,0.3
31 changes: 31 additions & 0 deletions test/cljam/io/t_sam.clj
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,34 @@
(are [f] (thrown? Exception (sam/writer (.getAbsolutePath (cio/file util/temp-dir f))))
"temp.baam"
"temp.bai")))

(def test-options
[{:Xa {:type "A", :value \p}}
{:Xi {:type "i", :value -100}}
{:Xf {:type "f", :value -1400.0}}
{:Xz {:type "Z", :value "AATTGGCC"}}
{:Xh {:type "H", :value (map unchecked-byte [0x1A 0xE3 0x01])}}
{:Xc {:type "B", :value "c,-128,0,127"}}
{:XC {:type "B", :value "C,0,127,255"}}
{:Xs {:type "B", :value "s,-32768,0,32767"}}
{:XS {:type "B", :value "S,0,32767,65535"}}
{:Xi {:type "B", :value "i,-2147483648,0,2147483647"}}
{:XI {:type "B", :value "I,0,2147483647,4294967295"}}
{:Xf {:type "B", :value "f,-0.3,0.0,0.3"}}])

(defn bytes-to-seq [t]
(if (= (get-in t [(ffirst t) :type]) "H")
(update-in t [(ffirst t) :value] seq)
t))

(deftest options
(testing "sam"
(is
(= (with-open [r (sam/reader opts-sam-file)]
(->> (sam/read-alignments r) (mapcat :options) (map bytes-to-seq) doall))
test-options)))
(testing "bam"
(is
(= (with-open [r (sam/reader opts-bam-file)]
(->> (sam/read-alignments r) (mapcat :options) (map bytes-to-seq) doall))
test-options))))
3 changes: 3 additions & 0 deletions test/cljam/t_common.clj
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@
(def normalize-before-sam-file "test-resources/sam/normalize_before.sam")
(def normalize-after-sam-file "test-resources/sam/normalize_after.sam")

(def opts-sam-file "test-resources/sam/opts.sam")

;; ### BAM files

(def test-bam-file "test-resources/bam/test.bam")
Expand All @@ -120,6 +122,7 @@

(def normalize-before-bam-file "test-resources/bam/normalize_before.bam")
(def normalize-after-bam-file "test-resources/bam/normalize_after.bam")
(def opts-bam-file "test-resources/bam/opts.bam")

;; ### BAM index files

Expand Down