From 42724dfd847d4017a6ec85e89641ba42f25ba601 Mon Sep 17 00:00:00 2001 From: alumi Date: Thu, 27 Jul 2017 19:18:46 +0900 Subject: [PATCH] Extract depth algorithm and improve performance. --- src/cljam/algo/depth.clj | 128 ++++++++++++++++++++++++ src/cljam/algo/pileup.clj | 23 +---- src/cljam/algo/pileup/pileup.clj | 63 ------------ src/cljam/tools/cli.clj | 35 +++---- src/cljam/util.clj | 46 ++++++++- test-resources/pileup/r2s.pileup | 160 ------------------------------ test-resources/pileup/r2sf.pileup | 160 ------------------------------ test/cljam/algo/t_depth.clj | 81 +++++++++++++++ test/cljam/algo/t_pileup.clj | 31 ------ test/cljam/t_util.clj | 109 ++++++++++++++++++++ 10 files changed, 378 insertions(+), 458 deletions(-) create mode 100644 src/cljam/algo/depth.clj delete mode 100644 src/cljam/algo/pileup/pileup.clj create mode 100644 test/cljam/algo/t_depth.clj diff --git a/src/cljam/algo/depth.clj b/src/cljam/algo/depth.clj new file mode 100644 index 00000000..97db7fc5 --- /dev/null +++ b/src/cljam/algo/depth.clj @@ -0,0 +1,128 @@ +(ns cljam.algo.depth + "Provides algorithms for calculating simple depth of coverage." + (:require [com.climate.claypoole :as cp] + [cljam.common :as common] + [cljam.util :as util] + [cljam.io.sam :as sam] + [cljam.io.sam.util :as sam-util])) + +(def ^:const default-step 1000000) + +;; lazy +;; ---- + +(defn- count-for-positions + "Piles the alignments up and counts them in the positions, returning it as a seq." + [alns beg end] + (let [pile (long-array (inc (- end beg)))] + (doseq [aln alns] + (let [left (max (:pos aln) beg) + right (min (sam-util/get-end aln) end) + left-index (- left beg)] + (dotimes [i (inc (- right left))] + (aset-long pile (+ i left-index) (inc (aget pile (+ i left-index))))))) + (seq pile))) + +(defn- lazy-depth* + "Internal lazy-depth function returning lazy sequence of depth." + [rdr rname start end step] + (let [n-threads (common/get-exec-n-threads) + read-fn (fn [r start end] + (sam/read-alignments r {:chr rname :start start :end end :depth :shallow})) + count-fn (fn [xs] + (if (= n-threads 1) + (map (fn [[start end]] + (count-for-positions (read-fn rdr start end) start end)) xs) + (cp/pmap (dec n-threads) + (fn [[start end]] + (with-open [r (sam/clone-bam-reader rdr)] + (count-for-positions (read-fn r start end) start end))) xs)))] + (->> (util/divide-region start end step) + count-fn + (apply concat)))) + +(defn lazy-depth + "Calculate depth of coverage lazily. Returns a lazy seq of depth for range [start, end]. + Requires a `cljam.io.bam.reader.BAMReader` instance and region. + If start and end are not supplied, piles whole range up. + Note that CIGAR code in alignemnts are ignored and only start/end positions are used." + [bam-reader {:keys [chr start end] :or {start 1 end Long/MAX_VALUE}} + & [{:keys [step n-threads] :or {step default-step n-threads 1}}]] + {:pre [chr start end (pos? start) (pos? end) (<= start end)]} + (when-let [{:keys [len]} (sam-util/ref-by-name (sam/read-refs bam-reader) chr)] + (binding [common/*n-threads* n-threads] + (lazy-depth* bam-reader chr (min len start) (min len end) step)))) + +;; eager +;; ----- + +(defn- unchecked-aset-depth-in-region! + "Piles alignments up and sets depth values to a part of the given int-array." + [alns beg end offset ^ints pile] + (let [beg (int beg) + end (int end) + offset (int offset)] + (doseq [aln alns] + (let [left (Math/max ^int (:pos aln) (unchecked-int beg)) + right (unchecked-inc-int (or (:end aln) (sam-util/get-end aln))) + left-index (unchecked-add-int (unchecked-subtract-int left beg) offset) + right-index (unchecked-add-int (unchecked-subtract-int right beg) offset)] + (aset-int pile left-index (unchecked-inc-int (aget pile left-index))) + (when (<= right end) + (aset-int pile right-index (unchecked-dec-int (aget pile right-index)))))) + (dotimes [i (- end beg)] + (aset-int + pile + (unchecked-add-int (unchecked-inc-int i) offset) + (unchecked-add-int + (aget pile (unchecked-add-int i offset)) + (aget pile (unchecked-add-int (unchecked-inc-int i) offset))))))) + +(defn- aset-depth-in-region! + "Piles alignments up and sets depth values to a part of the given int-array. + It's roughly 15-25% slower than unchecked version." + [alns beg end offset ^ints pile] + (let [beg (int beg) + end (int end) + offset (int offset)] + (doseq [aln alns] + (let [left (Math/max ^int (:pos aln) beg) + right (int ^long (inc (or (:end aln) (sam-util/get-end aln)))) + left-index (+ (- left beg) offset) + right-index (+ (- right beg) offset)] + (aset-int pile left-index (inc (aget pile left-index))) + (when (<= right end) + (aset-int pile right-index (dec (aget pile right-index)))))) + (dotimes [i (- end beg)] + (aset-int pile (+ (inc i) offset) (+ (aget pile (+ i offset)) (aget pile (+ (inc i) offset))))))) + +(defn ^"[I" depth* + "Internal depth function which returns an int-array." + [rdr {:keys [chr start end] :as region} + & [{:keys [step unchecked? n-threads] :or {step default-step unchecked? false n-threads 1}}]] + (let [pile (int-array (inc (- end start))) + f (if unchecked? unchecked-aset-depth-in-region! aset-depth-in-region!)] + (if (= n-threads 1) + (f (sam/read-alignments rdr region {:depth :shallow}) start end 0 pile) + (cp/pdoseq + n-threads + [[s e] (util/divide-region start end step)] + (with-open [r (sam/clone-bam-reader rdr)] + (-> (sam/read-alignments r {:chr chr, :start s, :end e} {:depth :shallow}) + (f s e (- s start) pile))))) + pile)) + +(defn depth + "Calculate depth of coverage eagerly. Returns a seq of depth for range [start, end]. + Requires a `cljam.io.bam.reader.BAMReader` instance and region. + If start and end are not supplied, piles whole range up. + Note that CIGAR code in alignemnts are ignored and only start/end positions are used." + [bam-reader {:keys [chr start end] :or {start 1 end Long/MAX_VALUE}} + & [{:keys [step unchecked? n-threads] :or {step default-step unchecked? false n-threads 1}}]] + {:pre [chr start end (pos? start) (pos? end) (<= start end)]} + (when-let [{:keys [len]} (sam-util/ref-by-name (sam/read-refs bam-reader) chr)] + (seq + (depth* + bam-reader + {:chr chr, :start (min len start), :end (min len end)} + {:step step, :unchecked? unchecked?, :n-threads n-threads})))) diff --git a/src/cljam/algo/pileup.clj b/src/cljam/algo/pileup.clj index 889d28aa..d101eb26 100644 --- a/src/cljam/algo/pileup.clj +++ b/src/cljam/algo/pileup.clj @@ -1,29 +1,8 @@ (ns cljam.algo.pileup "Functions to calculate pileup from the BAM." - (:require [cljam.common :refer [*n-threads*]] - [cljam.io.sam :as sam] - [cljam.algo.pileup.pileup :as plp] + (:require [cljam.io.sam :as sam] [cljam.algo.pileup.mpileup :as mplp])) -(defn first-pos - "Returns a position of first alignment in left-right, or nil." - [bam-reader region] - (plp/first-pos bam-reader region)) - -(def ^:private default-pileup-option - {:n-threads 0}) - -(defn pileup - "Piles alignments up, returning the pileup as a lazy seq. Requires a - cljam.bam.reader.BAMReader instance and region. If start and end are not - supplied, piles whole range up." - ([reader region] - (pileup reader region {})) - ([bam-reader {:keys [chr start end] :or {start -1 end -1} :as region} option] - (let [option* (merge default-pileup-option option)] - (binding [*n-threads* (:n-threads option*)] - (apply plp/pileup bam-reader region (apply concat option*)))))) - (defn mpileup "Returns a lazy sequence of cljam.algo.pileup.mpileup.MPileupElement calculated from FASTA and BAM. If start and end are not supplied, piles whole diff --git a/src/cljam/algo/pileup/pileup.clj b/src/cljam/algo/pileup/pileup.clj deleted file mode 100644 index 23dd383f..00000000 --- a/src/cljam/algo/pileup/pileup.clj +++ /dev/null @@ -1,63 +0,0 @@ -(ns cljam.algo.pileup.pileup - "Provides simple pileup functions." - (:require [com.climate.claypoole :as cp] - [cljam.common :refer [get-exec-n-threads]] - [cljam.util :as util] - [cljam.io.sam :as sam] - [cljam.io.sam.util :as sam-util] - [cljam.algo.pileup.common :as common])) - -(defn- count-for-positions - "Piles the alignments up and counts them in the positions, returning it as a - seq." - [alns beg end] - (let [pile (long-array (inc (- end beg)))] - (doseq [aln alns] - (let [left (max (:pos aln) beg) - right (min (sam-util/get-end aln) end) - left-index (- left beg)] - (dotimes [i (inc (- right left))] - (aset-long pile (+ i left-index) (inc (aget pile (+ i left-index))))))) - (seq pile))) - -(defn- pileup* - "Internal pileup function." - [rdr rname rlength start end step] - (let [n-threads (get-exec-n-threads) - read-fn (fn [r start end] - (sam/read-alignments r {:chr rname :start start :end end :depth :shallow})) - count-fn (fn [xs] - (if (= n-threads 1) - (map (fn [[start end]] - (count-for-positions (read-fn rdr start end) start end)) xs) - (cp/pmap (dec n-threads) - (fn [[start end]] - (with-open [r (sam/clone-bam-reader rdr)] - (count-for-positions (read-fn r start end) start end))) xs)))] - (->> (util/divide-region start end step) - count-fn - (apply concat)))) - -(defn first-pos - "Return a position of first alignment in left-right, or nil." - [bam-reader region] - (-> bam-reader - (sam/read-alignments (assoc region :depth :first-only)) - first - :pos)) - -(defn pileup - "Piles alignments up, returning the pileup as a lazy seq. Requires a - `cljam.bam.reader.BAMReader` instance and region. If start and end are not - supplied, piles whole range up." - [bam-reader {:keys [chr start end] :or {start -1 end -1}} & {:keys [step] :or {step common/step}}] - (try - (if-let [r (sam-util/ref-by-name (sam/read-refs bam-reader) chr)] - (pileup* - bam-reader - chr (:len r) - (if (neg? start) 1 start) - (if (neg? end) (:len r) end) - step)) - (catch bgzf4j.BGZFException _ - (throw (RuntimeException. "Invalid file format"))))) diff --git a/src/cljam/tools/cli.clj b/src/cljam/tools/cli.clj index bef5f044..ace14557 100644 --- a/src/cljam/tools/cli.clj +++ b/src/cljam/tools/cli.clj @@ -12,9 +12,11 @@ [cljam.algo.sorter :as sorter] [cljam.algo.fasta-indexer :as fai] [cljam.algo.dict :as dict] + [cljam.algo.depth :as depth] [cljam.algo.pileup :as plp] [cljam.algo.convert :as convert] - [cljam.algo.level :as level]) + [cljam.algo.level :as level] + [cljam.util :as util]) (:import [java.io Closeable BufferedWriter OutputStreamWriter])) ;; CLI functions @@ -207,11 +209,11 @@ (defn- pileup-simple ([rdr n-threads] (doseq [rname (map :name (sam/read-refs rdr))] - (pileup-simple rdr n-threads rname -1 -1))) - ([rdr n-threads rname start end] + (pileup-simple rdr n-threads {:chr rname}))) + ([rdr n-threads region] (binding [*out* (BufferedWriter. (OutputStreamWriter. System/out)) *flush-on-newline* false] - (doseq [line (plp/pileup rdr {:chr rname :start start :end end} {:n-threads n-threads})] + (doseq [line (depth/lazy-depth rdr region {:n-threads n-threads})] (println line)) (flush)))) @@ -225,9 +227,9 @@ :qual (cstr/join (% line)) :seq (cstr/join (% line)) (% line)) [:rname :pos :ref :count :seq :qual]))))))) - ([rdr ref-fa rname start end] + ([rdr ref-fa region] (with-open [fa-rdr (cseq/reader ref-fa)] - (doseq [line (plp/mpileup fa-rdr rdr {:chr rname :start start :end end})] + (doseq [line (plp/mpileup fa-rdr rdr region)] (if-not (zero? (:count line)) (println (cstr/join \tab (map #(case % :qual (cstr/join (% line)) @@ -243,23 +245,14 @@ :qual (cstr/join (% line)) :seq (cstr/join (% line)) (% line)) [:rname :pos :ref :count :seq :qual])))))) - ([rdr rname start end] - (doseq [line (plp/mpileup nil rdr {:chr rname :start start :end end})] + ([rdr region] + (doseq [line (plp/mpileup nil rdr region)] (if-not (zero? (:count line)) (println (cstr/join \tab (map #(case % :qual (cstr/join (% line)) :seq (cstr/join (% line)) (% line)) [:rname :pos :ref :count :seq :qual]))))))) -(defn- parse-region - [region-str] - (if-let [m (re-find #"^([^:]+)$" region-str)] - (conj (vec (rest m)) -1 -1) - (if-let [m (re-find #"^([^:]+):([0-9]+)-([0-9]+)$" region-str)] - (-> (vec (rest m)) - (update-in [1] #(Integer/parseInt %)) - (update-in [2] #(Integer/parseInt %)))))) - (defn pileup [args] (let [{:keys [options arguments errors summary]} (parse-opts args pileup-cli-options)] (cond @@ -273,11 +266,11 @@ (when-not (sorter/sorted-by? r) (exit 1 "Not sorted")) (if (:region options) - (if-let [region (parse-region (:region options))] + (if-let [region (util/parse-region (:region options))] (cond - (:simple options) (apply pileup-simple r (:thread options) region) - (:ref options) (apply pileup-with-ref r (:ref options) region) - :else (apply pileup-without-ref r region)) + (:simple options) (pileup-simple r (:thread options) region) + (:ref options) (pileup-with-ref r (:ref options) region) + :else (pileup-without-ref r region)) (exit 1 "Invalid region format")) (cond (:simple options) (pileup-simple r (:thread options)) diff --git a/src/cljam/util.clj b/src/cljam/util.clj index 601f39ad..04700012 100644 --- a/src/cljam/util.clj +++ b/src/cljam/util.clj @@ -1,7 +1,8 @@ (ns cljam.util "General utilities." (:require [clojure.java.io :refer [file] :as cio] - [clojure.string :as cstr]) + [clojure.string :as cstr] + [proton.core :as proton]) (:import [org.apache.commons.compress.compressors CompressorStreamFactory CompressorException])) @@ -108,3 +109,46 @@ (map (fn [[s e]] {:chr name :start s :end e}) (divide-region 1 len step))) refs)) + +(defn valid-rname? + "Checks if the given rname conforms to the spec of sam." + [rname] + (and rname (string? rname) (re-matches #"[!-)+-<>-~][!-~]*" rname))) + +(defn valid-region? + "Checks if the given region map is a valid 1-based closed range." + [{:keys [chr start end]}] + (and start end + (valid-rname? chr) + (number? start) (pos? start) + (number? end) (pos? end) + (<= start end))) + +(defn parse-region + "Parse a region string into a map." + [region-str] + (when region-str + (let [[_ chr _ start _ end] (re-matches #"([!-)+-<>-~][!-~]*?)(:(\d+)?(-(\d+))?)?" region-str) + start' (proton/as-long start) + end' (proton/as-long end)] + (when chr + (cond-> {:chr chr} + start' (assoc :start start') + end' (assoc :end end')))))) + +(defn parse-region-strict + "Parse a region string into a map strictly." + [region-str] + (let [region-map (parse-region region-str)] + (when (valid-region? region-map) region-map))) + +(defn format-region + "Format a region map into a string." + [{:keys [chr start end]}] + (let [result (apply str (interleave [nil \: \-] (take-while some? [chr start end])))] + (when-not (cstr/blank? result) result))) + +(defn format-region-strict + "Format a region map into a string strictly." + [region-map] + (when (valid-region? region-map) (format-region region-map))) diff --git a/test-resources/pileup/r2s.pileup b/test-resources/pileup/r2s.pileup index 1473bb3a..88de79ae 100644 --- a/test-resources/pileup/r2s.pileup +++ b/test-resources/pileup/r2s.pileup @@ -29,163 +29,3 @@ 0 0 0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 diff --git a/test-resources/pileup/r2sf.pileup b/test-resources/pileup/r2sf.pileup index 1473bb3a..88de79ae 100644 --- a/test-resources/pileup/r2sf.pileup +++ b/test-resources/pileup/r2sf.pileup @@ -29,163 +29,3 @@ 0 0 0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 diff --git a/test/cljam/algo/t_depth.clj b/test/cljam/algo/t_depth.clj new file mode 100644 index 00000000..9b1b6010 --- /dev/null +++ b/test/cljam/algo/t_depth.clj @@ -0,0 +1,81 @@ +(ns cljam.algo.t-depth + (:require [clojure.test :refer :all] + [cljam.t-common :as common] + [cljam.io.sam :as sam] + [cljam.algo.depth :as depth]) + (:import [clojure.lang LazySeq ArraySeq])) + +(def test-bam-depth-ref + [0 0 0 0 0 0 1 1 3 3 3 3 3 3 2 3 3 3 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 1 1 1 2 2 2 2 1 1 1 1 1]) +(def test-bam-depth-ref2 + [1 2 2 2 2 3 3 3 3 4 4 5 5 6 6 6 6 6 6 6 5 5 4 4 4 4 4 3 3 3 3 3 3 3 2 1 0 0 0 0]) + +(deftest depth-laziness + (testing "lazy-depth returns a LazySeq" + (with-open [r (sam/reader common/test-sorted-bam-file)] + (is (instance? LazySeq (depth/lazy-depth r {:chr "ref"})))) + (with-open [r (sam/reader common/test-sorted-bam-file)] + (is (instance? LazySeq (depth/lazy-depth r {:chr "ref"} {:step 3}))))) + (testing "depth returns a seq" + (with-open [r (sam/reader common/test-sorted-bam-file)] + (is (not (instance? LazySeq (depth/depth r {:chr "ref"}))))) + (with-open [r (sam/reader common/test-sorted-bam-file)] + (is (not (instance? LazySeq (depth/depth r {:chr "ref"} {:step 3}))))))) + +(deftest depth-range-and-length + (testing "in" + (is (= 10 + (with-open [r (sam/reader common/test-sorted-bam-file)] + (count (depth/lazy-depth r {:chr "ref", :start 1, :end 10}))))) + (is (= 10 + (with-open [r (sam/reader common/test-sorted-bam-file)] + (count (depth/depth r {:chr "ref", :start 1, :end 10})))))) + (testing "over" + (is (= 45 + (with-open [r (sam/reader common/test-sorted-bam-file)] + (count (depth/lazy-depth r {:chr "ref", :start 1, :end 100}))))) + (is (= 45 + (with-open [r (sam/reader common/test-sorted-bam-file)] + (count (depth/depth r {:chr "ref", :start 1, :end 100})))))) + (testing "under" + (is (thrown? AssertionError + (with-open [r (sam/reader common/test-sorted-bam-file)] + (count (depth/lazy-depth r {:chr "ref", :start 0, :end 45}))))) + (is (thrown? AssertionError + (with-open [r (sam/reader common/test-sorted-bam-file)] + (count (depth/depth r {:chr "ref", :start 0, :end 45})))))) + (testing "invalid" + (is (thrown? AssertionError + (with-open [r (sam/reader common/test-sorted-bam-file)] + (count (depth/lazy-depth r {:chr "ref", :start 10, :end 5}))))) + (is (thrown? AssertionError + (with-open [r (sam/reader common/test-sorted-bam-file)] + (count (depth/depth r {:chr "ref", :start 10, :end 5}))))))) + +(deftest lazy-depth-returns-same-results + (is (= (with-open [r (sam/reader common/test-sorted-bam-file)] + (doall (depth/lazy-depth r {:chr "ref"}))) + test-bam-depth-ref)) + (is (= (with-open [r (sam/reader common/test-sorted-bam-file)] + (doall (depth/lazy-depth r {:chr "ref2"}))) + test-bam-depth-ref2)) + (is (= (with-open [r (sam/reader common/test-sorted-bam-file)] + (doall (depth/depth r {:chr "ref"}))) + test-bam-depth-ref)) + (is (= (with-open [r (sam/reader common/test-sorted-bam-file)] + (doall (depth/depth r {:chr "ref2"}))) + test-bam-depth-ref2))) + +(deftest depth-values-option + (are [?unchecked] + (are [?n-threads] + (are [?step] + (are [?fn] + (= (with-open [r (sam/bam-reader common/test-sorted-bam-file)] + (doall (?fn r {:chr "ref"} {:step ?step, :n-threads ?n-threads, :unchecked? ?unchecked}))) + test-bam-depth-ref) + depth/depth + depth/lazy-depth) + 2 3 5 7 11 13) + 1 2 3 4) + true false)) diff --git a/test/cljam/algo/t_pileup.clj b/test/cljam/algo/t_pileup.clj index 25967cbc..144affb4 100644 --- a/test/cljam/algo/t_pileup.clj +++ b/test/cljam/algo/t_pileup.clj @@ -33,37 +33,6 @@ [\~] [\~] [\~ \~] [\~ \~] [\~ \~] [\~ \~] [\~ \~] [\~] [\~] [\~] [\~ \~] [\~ \~] [\~ \~] [\~ \~] [\~] [\~] [\~] [\~] [\~])) -(deftest pileup-returns-LazySeq - (with-open [r (sam/bam-reader test-sorted-bam-file)] - (instance? clojure.lang.LazySeq (plp/pileup r {:chr "ref"}))) - (with-open [r (sam/bam-reader test-sorted-bam-file)] - (instance? clojure.lang.LazySeq (plp/pileup r {:chr "ref2"})))) - -(deftest about-pileup - (is (= (with-open [r (sam/bam-reader test-sorted-bam-file)] - (doall (plp/pileup r {:chr "ref"}))) - test-bam-pileup-ref)) - (doseq [n-threads [1 4]] - (are [?param] (= (with-open [r (sam/bam-reader test-sorted-bam-file)] - (doall (plp/pileup r {:chr "ref"} ?param))) - test-bam-pileup-ref) - {:n-threads n-threads} - {:step 2 :n-threads n-threads} - {:step 3 :n-threads n-threads} - {:step 5 :n-threads n-threads} - {:step 7 :n-threads n-threads} - {:step 11 :n-threads n-threads} - {:step 13 :n-threads n-threads})) - (is (= (with-open [r (sam/bam-reader test-sorted-bam-file)] - (doall (plp/pileup r {:chr "ref2"}))) - test-bam-pileup-ref2))) - -(deftest about-first-pos - (with-open [r (sam/bam-reader test-sorted-bam-file)] - (is (= (plp/first-pos r {:chr "ref" :start 0 :end 64}) 7))) - (with-open [r (sam/bam-reader test-sorted-bam-file)] - (is (= (plp/first-pos r {:chr "ref2" :start 0 :end 64}) 1)))) - (deftest about-pileup-seq ;; ---------- diff --git a/test/cljam/t_util.clj b/test/cljam/t_util.clj index 9568878c..f2519aa1 100644 --- a/test/cljam/t_util.clj +++ b/test/cljam/t_util.clj @@ -41,3 +41,112 @@ {:name "chr2" :len 5}] 6 [{:chr "chr1" :start 1 :end 6} {:chr "chr1" :start 7 :end 10} {:chr "chr2" :start 1 :end 5}])) + +(deftest valid-rname? + (are [?rname ?expected] + (= ?expected (boolean (util/valid-rname? ?rname))) + nil false + "" false + "c" true + "chr1" true + "*" false + "=" false + ":" true + "chr:1" true + "chr1:1-10" true)) + +(deftest valid-region? + (are [?region-map ?expected] + (= ?expected (boolean (util/valid-region? ?region-map))) + nil false + {} false + {:chr "chr1"} false + {:chr "chr1", :start 1} false + {:chr "chr1", :start 1, :end 10} true + {:chr "chr1", :start 100, :end 10} false + {:chr "chr1", :start "1", :end 10} false + {:chr "chr1", :start 1, :end "10"} false + {:chr "chr1", :start 0, :end 10} false + {:chr "chr1", :start 1, :end 0} false + {:chr "", :start 100, :end 10} false + {:chr " ", :start 100, :end 10} false)) + +(deftest parse-region + (are [?region-str ?expected] + (= ?expected (util/parse-region ?region-str)) + nil nil + "*" nil + "=" nil + "c" {:chr "c"} + "chr1" {:chr "chr1"} + "chrUn" {:chr "chrUn"} + "*chr1" nil + "=chr1" nil + "chr1*" {:chr "chr1*"} + "chr1=" {:chr "chr1="} + "chr1-" {:chr "chr1-"} + "chr1:" {:chr "chr1"} + "chr1:-" {:chr "chr1:-"} + "!\"#$%&'()*+,-./0123456789;<=>?@[\\]^_`{|}~" {:chr "!\"#$%&'()*+,-./0123456789;<=>?@[\\]^_`{|}~"} + "chr1:1" {:chr "chr1", :start 1} + "chr1:1-" {:chr "chr1:1-"} + "chr1:1-2" {:chr "chr1", :start 1, :end 2} + "chr1:-2" {:chr "chr1", :end 2} + "chr1:001-200" {:chr "chr1", :start 1, :end 200} + "chr1:100-2" {:chr "chr1", :start 100, :end 2} + "chr1:2:3-4" {:chr "chr1:2", :start 3, :end 4} + "chr1:2-3:4-5" {:chr "chr1:2-3", :start 4, :end 5} + "chr1:2-3:4-5:6-7" {:chr "chr1:2-3:4-5", :start 6, :end 7})) + +(deftest parse-region-strict + (are [?region-str ?expected] + (= ?expected (util/parse-region-strict ?region-str)) + nil nil + "*" nil + "=" nil + "c" nil + "chr1" nil + "chrUn" nil + "*chr1" nil + "=chr1" nil + "chr1*" nil + "chr1=" nil + "chr1-" nil + "chr1:" nil + "chr1:-" nil + "!\"#$%&'()*+,-./0123456789;<=>?@[\\]^_`{|}~" nil + "chr1:1" nil + "chr1:1-" nil + "chr1:1-2" {:chr "chr1", :start 1, :end 2} + "chr1:-2" nil + "chr1:001-200" {:chr "chr1", :start 1, :end 200} + "chr1:100-2" nil + "chr1:2:3-4" {:chr "chr1:2", :start 3, :end 4} + "chr1:2-3:4-5" {:chr "chr1:2-3", :start 4, :end 5} + "chr1:2-3:4-5:6-7" {:chr "chr1:2-3:4-5", :start 6, :end 7})) + +(deftest format-region + (are [?region-map ?expected] + (= ?expected (util/format-region ?region-map)) + nil nil + {} nil + {:chr "chr1"} "chr1" + {:chr "chr1", :start 1} "chr1:1" + {:chr "chr1", :start 1, :end 10} "chr1:1-10" + {:chr "chr1", :start 10, :end 1} "chr1:10-1" + {:chr "chr1", :end 10} "chr1" + {:start 1} nil + {:end 10} nil)) + +(deftest format-region-strict + (are [?region-map ?expected] + (= ?expected (util/format-region-strict ?region-map)) + nil nil + {} nil + {:chr "chr1"} nil + {:chr "chr1", :start 1} nil + {:chr "chr1", :start 1, :end 10} "chr1:1-10" + {:chr "chr1", :start 10, :end 1} nil + {:chr "chr1", :end 10} nil + {:start 1} nil + {:end 10} nil))