Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use bgzip as default for writing .gz files #253

Merged
merged 6 commits into from
Dec 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
:url "https://github.com/chrovis/cljam"
:license {:name "Apache License, Version 2.0"
:url "https://www.apache.org/licenses/LICENSE-2.0"}
:dependencies [[org.clojure/core.memoize "1.0.250"]
[org.clojure/tools.logging "1.1.0"]
:dependencies [[org.clojure/core.memoize "1.0.253"]
[org.clojure/tools.logging "1.2.3"]
[org.clojure/tools.cli "1.0.206"]
[org.apache.commons/commons-compress "1.21"]
[clj-sub-command "0.6.0"]
Expand Down Expand Up @@ -35,8 +35,8 @@
:1.9 {:dependencies [[org.clojure/clojure "1.9.0"]]}
:1.10 {:dependencies [[org.clojure/clojure "1.10.3"]]}
:uberjar {:dependencies [[org.clojure/clojure "1.10.3"]
[org.apache.logging.log4j/log4j-api "2.14.1"]
[org.apache.logging.log4j/log4j-core "2.14.1"]]
[org.apache.logging.log4j/log4j-api "2.17.0"]
[org.apache.logging.log4j/log4j-core "2.17.0"]]
:resource-paths ["bin-resources"]
:main cljam.tools.main
:jvm-opts ["-Dclojure.compiler.direct-linking=true"]
Expand Down
49 changes: 29 additions & 20 deletions src/cljam/util.clj
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
(ns cljam.util
"General utilities."
(:require [clojure.java.io :as cio])
(:require [clojure.java.io :as cio]
[clojure.tools.logging :as logging]
[cljam.io.util.bgzf :as bgzf])
(:import [java.net MalformedURLException URL]
[java.nio.file Files FileVisitor FileVisitResult]
[java.nio.file.attribute FileAttribute]
Expand All @@ -25,15 +27,21 @@
(Files/walkFileTree
(.toPath dir)
(reify FileVisitor
(visitFile [this# file# attrs#]
(Files/deleteIfExists file#)
(visitFile [_ file _attrs]
(when-not (Files/deleteIfExists file)
(logging/warnf
"The file could not be deleted because it did not exist: %s"
(str file)))
FileVisitResult/CONTINUE)
(visitFileFailed [this# file# exc#]
(visitFileFailed [_ _file _exc]
FileVisitResult/CONTINUE)
(preVisitDirectory [this# dir# attrs#]
(preVisitDirectory [_ _dir _attrs]
FileVisitResult/CONTINUE)
(postVisitDirectory [this# dir# exc#]
(Files/deleteIfExists dir#)
(postVisitDirectory [_ dir _exc]
(when-not (Files/deleteIfExists dir)
(logging/warnf
"The directory could not be deleted because it did not exist: %s"
(str dir)))
FileVisitResult/CONTINUE))))

(defmacro with-temp-dir
Expand Down Expand Up @@ -111,25 +119,26 @@
[f]
(let [is (cio/input-stream f)]
(try
(-> (CompressorStreamFactory. true)
(.createCompressorInputStream is))
(.createCompressorInputStream (CompressorStreamFactory. true) is)
(catch CompressorException _
is))))

(defn ^java.io.OutputStream compressor-output-stream
"Returns a compressor output stream from f and a compressor type k. k must be
selected from :gzip or :bzip2. Autodetects the compressor type from the
extension of f if k is not passed. Returns java.io.BufferedOutputStream if the
compressor type is not known. Should be used inside with-open to ensure the
OutputStream is properly closed."
"Returns a compressor output stream from `f` and a compressor type `k`. `k`
must be selected from `:bgzip`, `:gzip` or `:bzip2`. Autodetects the
compressor type from the extension of `f` if `k` is not passed. Returns
`java.io.BufferedOutputStream` if the compressor type is not known. Should be
used inside with-open to ensure the OutputStream is properly closed."
([f]
(compressor-output-stream f (condp re-find (.getPath (as-url f))
#"(?i)\.(gz|gzip)$" :gzip
#"(?i)\.(bgz|bgzip|gz)$" :bgzip
#"(?i)\.gzip$" :gzip
#"(?i)\.(bz2|bzip2)$" :bzip2
nil)))
([f k]
(let [os (cio/output-stream f)]
(if-let [s (get compressor-map k)]
(-> (CompressorStreamFactory.)
(.createCompressorOutputStream s os))
os))))
(if (= :bgzip k)
(bgzf/make-bgzf-output-stream f)
(let [os (cio/output-stream f)]
(if-let [s (get compressor-map k)]
(.createCompressorOutputStream (CompressorStreamFactory.) s os)
os)))))
37 changes: 31 additions & 6 deletions test/cljam/util_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,10 @@
(is (deleted? d))
(is (deleted? e))))
(testing "users can delete temp directories before entering a finally clause"
(try
(util/with-temp-dir [d "foo", e "bar"]
(cio/delete-file d true)
(cio/delete-file e true))
(catch Exception e (is false e))
(finally (is true))))
(is (util/with-temp-dir [d "foo", e "bar"]
(cio/delete-file d true)
(cio/delete-file e true)
true)))
(testing "automatically deletes subdirectories created by users"
(let [sub-dirs (util/with-temp-dir [d "foo"]
(let [sub-dirs [(cio/file d "bar") (cio/file d "qux")]]
Expand Down Expand Up @@ -96,3 +94,30 @@

"" nil
nil nil))

(deftest compressor-output-stream-test
(are [?filename ?data]
(util/with-temp-dir [d "compressor-output-stream-test"]
(let [f (cio/file d ?filename)
buf (byte-array (count ?data))]
(with-open [os (util/compressor-output-stream f)]
(.write os (.getBytes "compressor-output-stream-test")))
(with-open [is (cio/input-stream f)]
(.read is buf))
(= (map unchecked-byte ?data) (seq buf))))
;; BGZF
"test.gz" [0x1f 0x8b 0x08 0x04 0x00 0x00 0x00 0x00
0x00 0xff 0x06 0x00 (int \B) (int \C) 0x02 0x00]
"test.bgz" [0x1f 0x8b 0x08 0x04 0x00 0x00 0x00 0x00
0x00 0xff 0x06 0x00 (int \B) (int \C) 0x02 0x00]
"test.bgzip" [0x1f 0x8b 0x08 0x04 0x00 0x00 0x00 0x00
0x00 0xff 0x06 0x00 (int \B) (int \C) 0x02 0x00]

;; raw GZIP
"test.gzip" [0x1f 0x8b 0x08 0x00]

;; BZIP2
"test.bz2" [(int \B) (int \Z) (int \h) (int \9)
0x31 0x41 0x59 0x26 0x53 0x59]
"test.bzip2" [(int \B) (int \Z) (int \h) (int \9)
0x31 0x41 0x59 0x26 0x53 0x59]))