diff --git a/.gitignore b/.gitignore index d9d07e6..b473205 100755 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,4 @@ benchmark/.Rhistory taxonkit/binaries* *names.dmp *nodes.dmp +*.json diff --git a/README.md b/README.md index d3e8f34..620dfe1 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ -# TaxonKit - Crossplatform and Efficient NCBI Taxonomy Toolkit +# TaxonKit - Cross-platform and Efficient NCBI Taxonomy Toolkit **Documents:** [http://bioinf.shenwei.me/taxonkit](http://bioinf.shenwei.me/taxonkit) -([**Usage**](http://bioinf.shenwei.me/taxonkit/usage/)) +([**Usage**](http://bioinf.shenwei.me/taxonkit/usage/), +[**Tutorial**](http://bioinf.shenwei.me/taxonkit/tutorial/)) **Source code:** [https://github.com/shenwei356/taxonkit](https://github.com/shenwei356/taxonkit) [![GitHub stars](https://img.shields.io/github/stars/shenwei356/taxonkit.svg?style=social&label=Star&?maxAge=2592000)](https://github.com/shenwei356/taxonkit) @@ -20,7 +21,7 @@ Go to [Download Page](http://bioinf.shenwei.me/taxonkit/download) for more download options and changelogs. -`taxonkit` is implemented in [Golang](https://golang.org/) programming language, +`TaxonKit` is implemented in [Go](https://golang.org/) programming language, executable binary files **for most popular operating systems** are freely available in [release](https://github.com/shenwei356/taxonkit/releases) page. diff --git a/doc/docs/download.md b/doc/docs/download.md index b867b6e..166ae89 100644 --- a/doc/docs/download.md +++ b/doc/docs/download.md @@ -1,15 +1,17 @@ # Download -`taxonkit` is implemented in [Golang](https://golang.org/) programming language, +`TaxonKit` is implemented in [Go](https://golang.org/) programming language, executable binary files **for most popular operating system** are freely available in [release](https://github.com/shenwei356/taxonkit/releases) page. ## Current Version -[taxonkit v0.1](https://github.com/shenwei356/taxonkit/releases/tag/v0.1) -[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/taxonkit/v0.1/total.svg)](https://github.com/shenwei356/taxonkit/releases/tag/v0.1) +[TaxonKit v0.1.1](https://github.com/shenwei356/taxonkit/releases/tag/v0.1.1) +[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/taxonkit/v0.1.1/total.svg)](https://github.com/shenwei356/taxonkit/releases/tag/v0.1.1) -- first release +- add feature of `taxonkit list`, users can choose output in readable JSON + format by flag `--json` so the taxonomy tree could be collapse and + uncollapse in modern text editor. Links: @@ -33,7 +35,7 @@ Links: [Download Page](https://github.com/shenwei356/taxonkit/releases) -`taxonkit` is implemented in [Golang](https://golang.org/) programming language, +`TaxonKit` is implemented in [Go](https://golang.org/) programming language, executable binary files **for most popular operating systems** are freely available in [release](https://github.com/shenwei356/taxonkit/releases) page. @@ -61,7 +63,9 @@ For Go developer, just one command: ## Previous Versions - +- [TaxonKit v0.1](https://github.com/shenwei356/taxonkit/releases/tag/v0.1) +[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/taxonkit/v0.1/total.svg)](https://github.com/shenwei356/taxonkit/releases/tag/v0.1) + - first release
diff --git a/doc/docs/files/grep_result.png b/doc/docs/files/grep_result.png deleted file mode 100644 index da2acad..0000000 Binary files a/doc/docs/files/grep_result.png and /dev/null differ diff --git a/doc/docs/files/otu_table.csv b/doc/docs/files/otu_table.csv deleted file mode 100755 index 981cd84..0000000 --- a/doc/docs/files/otu_table.csv +++ /dev/null @@ -1,6 +0,0 @@ -Taxonomy,A.1,A.2,A.3,B.1,B.2,B.3,C.1,C.2,C.3 -Proteobacteria,.13,.29,.13,.16,.13,.22,.30,.23,.21 -Firmicutes,.42,.06,.49,.41,.55,.41,.32,.38,.66 -Bacteroidetes,.19,.62,.12,.33,.16,.29,.34,.35,.09 -Deferribacteres,.17,.00,.24,.01,.01,.01,.01,.01,.02 -Tenericutes,.00,.00,.00,.01,.03,.02,.00,.00,.00 diff --git a/doc/docs/files/otu_table.gAB.csv b/doc/docs/files/otu_table.gAB.csv deleted file mode 100644 index 608df9a..0000000 --- a/doc/docs/files/otu_table.gAB.csv +++ /dev/null @@ -1,6 +0,0 @@ -Taxonomy,A.1,A.2,A.3,B.1,B.2,B.3 -Proteobacteria,.13,.29,.13,.16,.13,.22 -Firmicutes,.42,.06,.49,.41,.55,.41 -Bacteroidetes,.19,.62,.12,.33,.16,.29 -Deferribacteres,.17,.00,.24,.01,.01,.01 -Tenericutes,.00,.00,.00,.01,.03,.02 diff --git a/doc/docs/files/otu_table.gAB.t.csv b/doc/docs/files/otu_table.gAB.t.csv deleted file mode 100644 index bf8fe0d..0000000 --- a/doc/docs/files/otu_table.gAB.t.csv +++ /dev/null @@ -1,7 +0,0 @@ -Taxonomy,Proteobacteria,Firmicutes,Bacteroidetes,Deferribacteres,Tenericutes -A.1,.13,.42,.19,.17,.00 -A.2,.29,.06,.62,.00,.00 -A.3,.13,.49,.12,.24,.00 -B.1,.16,.41,.33,.01,.01 -B.2,.13,.55,.16,.01,.03 -B.3,.22,.41,.29,.01,.02 diff --git a/doc/docs/files/otu_table.gAB.t.r.csv b/doc/docs/files/otu_table.gAB.t.r.csv deleted file mode 100644 index a9106d7..0000000 --- a/doc/docs/files/otu_table.gAB.t.r.csv +++ /dev/null @@ -1,7 +0,0 @@ -sample,Proteobacteria,Firmicutes,Bacteroidetes,Deferribacteres,Tenericutes -A.1,.13,.42,.19,.17,.00 -A.2,.29,.06,.62,.00,.00 -A.3,.13,.49,.12,.24,.00 -B.1,.16,.41,.33,.01,.01 -B.2,.13,.55,.16,.01,.03 -B.3,.22,.41,.29,.01,.02 diff --git a/doc/docs/files/otu_table2.csv b/doc/docs/files/otu_table2.csv deleted file mode 100644 index 4daaefe..0000000 --- a/doc/docs/files/otu_table2.csv +++ /dev/null @@ -1,7 +0,0 @@ -sample,Proteobacteria,Firmicutes,Bacteroidetes,Deferribacteres,Tenericutes,group -A.1,.13,.42,.19,.17,.00,A -A.2,.29,.06,.62,.00,.00,A -A.3,.13,.49,.12,.24,.00,A -B.1,.16,.41,.33,.01,.01,B -B.2,.13,.55,.16,.01,.03,B -B.3,.22,.41,.29,.01,.02,B diff --git a/doc/docs/files/otu_table3.csv b/doc/docs/files/otu_table3.csv deleted file mode 100644 index 50d1aad..0000000 --- a/doc/docs/files/otu_table3.csv +++ /dev/null @@ -1,7 +0,0 @@ -sample,Proteobacteria,Firmicutes,Bacteroidetes,Deferribacteres,Tenericutes,group -A.1,.13,.42,.19,.17,.00,Ctrl -A.2,.29,.06,.62,.00,.00,Ctrl -A.3,.13,.49,.12,.24,.00,Ctrl -B.1,.16,.41,.33,.01,.01,Treatment -B.2,.13,.55,.16,.01,.03,Treatment -B.3,.22,.41,.29,.01,.02,Treatment diff --git a/doc/docs/files/taxon.json.png b/doc/docs/files/taxon.json.png new file mode 100644 index 0000000..b21be72 Binary files /dev/null and b/doc/docs/files/taxon.json.png differ diff --git a/doc/docs/tutorial.md b/doc/docs/tutorial.md new file mode 100644 index 0000000..940abc1 --- /dev/null +++ b/doc/docs/tutorial.md @@ -0,0 +1,52 @@ +# Tutorial + +## Extract all sequences of certen taxons from the nr database + +### Dataset + +- [prot.accession2taxid.gz](ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.gz) + +### Steps + +Taking bacteria for example. + +1. Getting all taxids of bacteria (taxid 2): + + $ taxonkit list --nodes nodes.dmp --ids 2 --indent "" > bacteria.taxid.txt + + It takes only 2.5s! Number of taxids: + + $ wc -l bacteria.taxid.txt + 454591 bacteria.taxid.txt + +2. Extacting accessions with [csvtk](http://bioinf.shenwei.me/csvtk/download/): + + $ csvtk -t grep -f taxid -P bacteria.taxid.txt prot.accession2taxid.gz | csvtk -t cut -f accession.version > bacteria.taxid.acc.txt + +3. Extracting nr sequences: + + $ blastdbcmd -db nr -entry all -outfmt "%a\t%T" | \ + csvtk -t grep -f 2 -P bacteria.taxid.acc.txt | \ + csvtk -t cut -f 1 | \ + blastdbcmd -db nr -entry_batch - -out bacteria.fa + + + + diff --git a/doc/docs/usage.md b/doc/docs/usage.md index 5e52635..9ebe448 100644 --- a/doc/docs/usage.md +++ b/doc/docs/usage.md @@ -14,7 +14,7 @@ Usage ``` TaxonKit - NCBI Taxonomy Toolkit -Version: 0.1 +Version: 0.1.1 Author: Wei Shen