diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index c1db0681..466a9098 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -6,10 +6,17 @@ name: CI
# Triggers the workflow on push or pull request events
on: [push, pull_request, workflow_dispatch]
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
-jobs:
+jobs:
# This workflow contains a single job called "build"
build:
+ # avoid to run twice push and PR
+ if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
+
# The type of runner that the job will run on
runs-on: ubuntu-latest
diff --git a/.github/workflows/mkdocs.yml b/.github/workflows/mkdocs.yml
new file mode 100644
index 00000000..7a5cfd33
--- /dev/null
+++ b/.github/workflows/mkdocs.yml
@@ -0,0 +1,40 @@
+name: documentation
+on:
+ push:
+ paths:
+ - 'docs/**'
+ - mkdocs.yml
+ pull_request:
+ branches: [main, master]
+ paths:
+ - 'docs/**'
+ - mkdocs.yml
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
+permissions:
+ contents: write
+jobs:
+ deploy:
+ # avoid to run twice push and PR
+ if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
+
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v4
+ with:
+ python-version: 3.x
+ - uses: actions/cache@v2
+ with:
+ key: ${{ github.ref }}
+ path: .cache
+ - run: pip install mkdocs-material
+ - run: pip install pymdown-extensions
+ - run: pip install mkdocs-minify-plugin
+ - run: pip install mkdocs-macros-plugin
+ - run: pip install mkdocs-embed-external-markdown
+ - run: pip install mkdocs-table-reader-plugin
+ - run: mkdocs gh-deploy --force
diff --git a/.readthedocs.yml b/.readthedocs.yml
deleted file mode 100644
index 2f24b7d6..00000000
--- a/.readthedocs.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-# .readthedocs.yaml
-# Read the Docs configuration file
-# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
-
-# Required
-version: 2
-
-# Set the OS, Python version and other tools you might need
-build:
- os: ubuntu-22.04
- tools:
- python: "3.7"
-
-# Build documentation in the docs/ directory with Sphinx
-sphinx:
- configuration: docs/conf.py
-
-# Optionally build your docs in additional formats such as PDF
-#formats:
-# - pdf
-
-# Optional but recommended, declare the Python requirements required
-# to build your documentation
-# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
-python:
- install:
- - requirements: docs/requirements.txt
\ No newline at end of file
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 00000000..411d20e9
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,52 @@
+### Mkdocs
+
+#### Welcome to MkDocs
+
+For full documentation visit [mkdocs.org](https://www.mkdocs.org).
+For full documentation about the [material mkdocs theme](https://squidfunk.github.io/mkdocs-material/).
+
+#### Installation
+
+##### Manual
+
+As prerequisite you need python >=3.8 and pip.
+
+Install Mkdocs:
+
+`pip install mkdocs`
+
+For the theme:
+`pip install mkdocs-material`
+
+For the extensions:
+`pip install pymdown-extensions`
+
+For the plugins:
+`pip install mkdocs-minify-plugin`
+`pip install mkdocs-macros-plugin`
+`pip install mkdocs-embed-external-markdown`
+
+##### Conda
+
+Clone the repository and move in it.
+Then install all dependencies using conda and the `conda_env.yml` shipped with this repo:
+
+```
+conda env create -f conda_env.yml
+```
+
+Activate the environment and you are good:
+
+```
+conda activate education
+```
+
+#### Testing and building the website
+
+
+* `mkdocs serve` - Start the live-reloading docs server, to test the site locally (http://127.0.0.1:8000/).
+* `mkdocs gh-deploy` - Deploys the site on github pages.
+
+* `mkdocs build` - Build the documentation site.
+* `mkdocs new [dir-name]` - Create a new project.
+* `mkdocs -h` - Print help message and exit.
diff --git a/docs/agat_for_you.md b/docs/agat_for_you.md
index b27636dd..96e03cd6 100644
--- a/docs/agat_for_you.md
+++ b/docs/agat_for_you.md
@@ -12,15 +12,15 @@ task | tool
-- | --
**check, fix, pad** missing information into sorted and standardised gff3 | `agat_convert_sp_gxf2gxf.pl`
- * add missing parent features (e.g. gene and mRNA if only CDS/exon exists).
- * add missing features (e.g. exon and UTR).
- * add missing mandatory attributes (i.e. ID, Parent).
- * fix identifiers to be uniq.
- * fix feature locations.
- * remove duplicated features.
- * group related features (if spread in different places in the file).
- * sort features (tabix optional).
- * merge overlapping loci into one single locus (only if option activated).
+ * add missing parent features (e.g. gene and mRNA if only CDS/exon exists).
+ * add missing features (e.g. exon and UTR).
+ * add missing mandatory attributes (i.e. ID, Parent).
+ * fix identifiers to be uniq.
+ * fix feature locations.
+ * remove duplicated features.
+ * group related features (if spread in different places in the file).
+ * sort features (tabix optional).
+ * merge overlapping loci into one single locus (only if option activated).
* Convert many formats
@@ -61,5 +61,5 @@ make **function statistics** | `agat_sp_functional_statistics.pl`
**specificity sensitivity** | `agat_sp_sensitivity_specificity.pl`
**fusion / split** analysis between two annotations | `agat_sp_compare_two_annotations.pl`
analyze differences between **BUSCO** results | `agat_sp_compare_two_BUSCOs.pl`
-... and much more ...| ... see [here](https://agat.readthedocs.io/en/latest/) ...
+... and much more ...| ... see [here](../tools/agat_convert_sp_gxf2gxf/) ...
diff --git a/docs/agat_how_does_it_work.md b/docs/agat_how_does_it_work.md
index 8da2e7f9..59e2e285 100644
--- a/docs/agat_how_does_it_work.md
+++ b/docs/agat_how_does_it_work.md
@@ -1,14 +1,14 @@
# How does AGAT work?
-All tools taking GFF/GTF as input can be divided in two groups: \_sp\_ and \_sq\_.
+All tools taking GFF/GTF as input can be divided in two groups: `_sp_` and `_sq_`.
-* Tools with \_sp\_ prefix
+* Tools with `_sp_` prefix
\_sp\_ stands for SLURP. Those tools will charge the file in memory in a specific data structure. It has a memory cost but makes life smoother. Indeed, it allows to perform complicated tasks in a more time efficient way ( Any features can be accessed at any time by AGAT).
Moreover, it allows to fix all potential errors in the limit of the possibilities given by the format itself.
See the AGAT parser section for more information about it.
-* with \_sq\_ prefix
+* with `_sq_` prefix
\_sq\_ stands for SEQUENTIAL. Those tools will read and process GFF/GTF files from the top to the bottom, line by line, performing tasks on the fly. This is memory efficient but the sanity check of the file is minimum. Those tools are not intended to perform complex tasks.
@@ -41,10 +41,19 @@ $omniscient{level3}{tag_l3}{idZ} = @featureListL3 <= tag could be exon,cds,utr3
### How does the AGAT parser work
+The AGAT parser phylosophy will use several approach to understand the links/relationships betwen the featrures:
+
+ * 1) Parse by Parent/child relationship or gene_id/transcript_id relationship.
+ * 2) ELSE Parse by a common tag (an attribute value shared by feature that must be grouped together. By default we are using locus_tag but can be set by parameter).
+ * 3) ELSE Parse sequentially (mean group features in a bucket, and the bucket change at each level2 feature, and bucket are join in a common tag at each new L1 feature).
+
To resume by priority of way to parse: **Parent/child or gene_id/transcript_id relationship > common attribute/tag > sequential.**
+
+![](img/agat_parsing_overview.jpg){ width=800px }
+
The parser may used only one or a mix of these approaches according of the peculiarity of the gtf/gff file you provide.
- 1. Parsing approach 1: by Parent/child relationship
+ **1. Parsing approach 1: by Parent/child relationship**
Example of Parent/ID relationship used by the GFF format:
@@ -60,7 +69,7 @@ Example of gene_id/transcript_id relationship used by the GTF format:
chr12 HAVANA exon 100 500 . + . gene_id "gene1"; transcript_id "transcript1"; exon_id=exon1;
chr12 HAVANA CDS 100 500 . + 0 gene_id "gene1"; transcript_id "transcript1"; cds_id=cds-1;
- 2. ELSE Parsing approach 2: by a common attribute/tag
+ **2. ELSE Parsing approach 2: by a common attribute/tag**
a common attribute (or common tag) is an attribute value shared by feature that must be grouped together. AGAT uses default attributes (`gene_id` and `locus_tag`) displayed in the log but can be set by the user modifying the AGAT configuration file `agat_config.yaml`.
You can modify the `agat_config.yaml` either running `agat config --expose` to access it (it will be copied in the current directory) and then modifying it manually; or running `agat config --expose --locus_tag attribute_name` that will copy the `agat_config.yaml` locally with the modification of the `locus_tag` parameter accordingly.
@@ -72,7 +81,7 @@ Example of relationship made using a common tag (here locus_tag):
chr12 HAVANA exon 100 500 . + . locus_tag="gene1";ID=exon1;
chr12 HAVANA CDS 100 500 . + 0 locus_tag="gene1";ID=cds-1;
- 3. ELSE Parsing approach 3: sequentially.
+ **3. ELSE Parsing approach 3: sequentially**
Reading from top to the botom of the file, level3 features (e.g. exon, CDS, UTR) are attached to the last level2 feature (e.g. mRNA) met, and level2 feature are attached to the last L1 feature (e.g. gene) met. To see the list of features of each level see the feature_levels.yaml file (In the share folder in the github repo or using `agat levels --expose`).
@@ -87,6 +96,7 @@ Example of relationship made sequentially:
chr12 HAVANA exon 1000 5000 . + . ID="zzz"
chr12 HAVANA CDS 1000 5000 . + 0 ID="www"
+/!\\ Cases with only level3 features (i.e rast or some prokka files), sequential parsing may not work as expected if Parent/ID gene_id/transcript_id attributes are missing. Indeed all features will be the child of only one newly created Parent. To create a parent per feature or group of features, a common tag must be used to group them correctly (by default gene_id and locus_tag but you can set up the ones of your choice). See [Particular case](#particular-case).
### Particular case
@@ -186,11 +196,11 @@ This will work well even if transcript isoforms exist. This will use the parsing
In such case the sequential approach cannot be used (Indeed no level1 (e.g. gene) and no lelve2 (e.g. mrna) feature is present in the file). So the presence of parent/ID transcript_id/gene_id relationships and/or a proper common attribute is crucial.
-1. Case with Parent/ID transcript_id/gene_id relationships.
+##### 1. Case with Parent/ID transcript_id/gene_id relationships.
If you have isoforms (for Eukaryote organism) in your files and the `common attribute` used is not set properly you can end up with isoforms having independent parent gene features. See below for more details.
-1.1
+**1.1**
Input (testB.gff):
@@ -237,7 +247,7 @@ If you are lucky those attributes already exist. Here they are absent, you can u
chr12 HAVANA CDS 700 900 . + 0 ID=cds-b;Parent=transcriptb;locus_id="gene2"
-1.2.
+**1.2**
Here we have only level3 features, Parent/ID transcript_id/gene_id relationships present, default `common attributes` ( `locus_tag` or `gene_id`) is set for some features.
@@ -272,9 +282,9 @@ Input testF.gff:
The `common attributes` is used to attach isoforms to a common gene feature. As transcript4 has no common attribute, it will have its own parent features.
-2. Case without Parent/ID transcript_id/gene_id relationships. Only `common attribute` approach to parse the file can be used.
+##### 2. Case without Parent/ID transcript_id/gene_id relationships. Only `common attribute` approach to parse the file can be used.
-2.1.
+**2.1**
Here we have only level3 features, no Parent/ID transcript_id/gene_id relationships, but a default `common attributes` ( `locus_tag` or `gene_id`) is present.
@@ -347,7 +357,7 @@ As the default `common attribute` are absent (gene_id or locus_tag), you have to
/!\\ In Eukaryote annotation containing isoforms it will not work properly. Indeed, it will result of isoforms merged in chimeric transcripts (It will be really unlucky to end up in such situation, because even a human cannot resolve such type of situation. There is no information about isoforms structure...).
In Eukaryote cases (even for multi-exon CDS) with absence of isoforms, it will work correctly.
-3. In the extreme case where you have only one type of feature, you may decide to use the ID as common attribute.
+##### 3. In the extreme case where you have only one type of feature, you may decide to use the ID as common attribute.
This is the same problem as seen previously. Here the worse case that can append: only level3 features, no Parent/ID transcript_id/gene_id relationships, and the default `common attributes` ( `locus_tag` and `gene_id`) are absent. Sequential approach will be used by AGAT but as there are only level3 features,
all will be linked to only one parent. See below for more details.
@@ -392,7 +402,7 @@ This case is fine for Prokaryote annotation.
A) The annotation should not contain isoforms (Indeed, there is no existing information to decipher to which isoform a CDS will be part of. If isoforms are present, each one will be linked to its own gene feature).
B) If there are multi-exon CDS, CDS parts must share the same ID (Indeed multi-exon CDS can share or not the same ID. Both way are allowed by the GFF format. If the CDS parts share the same ID, the CDS parts will be collected properly. If the CDS parts do not share the same ID, AGAT will slice it and create a gene/mRNA feature by CDS part!).
-4. Case where you have only one type of feature, and some feature have Parent attributes and some other have common attributes.
+##### 4. Case where you have only one type of feature, and some feature have Parent attributes and some other have common attributes.
Input (testG.gff):
diff --git a/docs/how_to_cite.md b/docs/how_to_cite.md
index 691355f7..e14da16d 100644
--- a/docs/how_to_cite.md
+++ b/docs/how_to_cite.md
@@ -1,6 +1,6 @@
-## How to cite?
+# How to cite?
-This work has not been published yet(I will think about it). But if you wish to cite AGAT you can do it as follow (Adapt the version for the one you have used):
+This work has not been published yet(I will think about it). But if you wish to cite AGAT you can do it as follow (Adapt the version for the one you have used):
```
Dainat J. AGAT: Another Gff Analysis Toolkit to handle annotations in any GTF/GFF format.
diff --git a/docs/howto/how_to_aggregate_annotation.md b/docs/howto/how_to_aggregate_annotation.md
new file mode 100644
index 00000000..49ba543f
--- /dev/null
+++ b/docs/howto/how_to_aggregate_annotation.md
@@ -0,0 +1,7 @@
+# How to aggregate several annotations?
+
+There are two scripts in AGAT in order to aggregate annotations. They do not behave the same way. Find below some explanation on their behaviors.
+
+![](../img/aggregate_annotations.png "example")
+
+
\ No newline at end of file
diff --git a/docs/howto/how_to_extract_sequences.md b/docs/howto/how_to_extract_sequences.md
new file mode 100644
index 00000000..cb9f22eb
--- /dev/null
+++ b/docs/howto/how_to_extract_sequences.md
@@ -0,0 +1,7 @@
+# How to extract sequences?
+
+You can use the script agat_sp_extract_sequences.pl. Find below exmaples on how to use this script.
+
+![](../img/agat_sp_extract_sequences_2.png "gff_example")
+
+![](../img/agat_sp_extract_sequences_1.png "extraction_example")
\ No newline at end of file
diff --git a/docs/img/aggregate_annotations.png b/docs/img/aggregate_annotations.png
new file mode 100644
index 00000000..e1f711ec
Binary files /dev/null and b/docs/img/aggregate_annotations.png differ
diff --git a/docs/img/aggregate_annotations.pptx b/docs/img/aggregate_annotations.pptx
new file mode 100644
index 00000000..76e214e0
Binary files /dev/null and b/docs/img/aggregate_annotations.pptx differ
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 00000000..a7ce8db2
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,16 @@
+Welcome to AGAT's documentation!
+================================
+
+AGAT: Another GTF/GFF Analysis Toolkit
+----------------------------------------
+
+**A GFF/GTF toolkit allowing you to perform almost everything you might want to achieve ^^**
+
+The GTF/GFF formats are 9-column text formats used to describe and represent genomic features.
+The formats have quite evolved since 1997, and despite well-defined specifications existing nowadays they have a great flexibility allowing holding wide variety of information.
+This flexibility has a drawback aspect, there is an incredible amount of flavor of the formats: GFF / GFF1 / GFF2 / GFF2.5 / GFF3 / GTF / GTF2 / GTF2.1 / GTF2.2 / GTF2.5 / GTF3
+
+It's often hard to understand and differentiate all GFF/GTF formats/flavors. Many tools using GTF/GFF formats fails due to specific expectations.
+AGAT is a suite of tools able to deal with any GTF/GFF formats and perform most of the possible tasks you would need.
+
+![](img/wordcloud.png){ width=600px }
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index 26677eb2..00000000
--- a/docs/index.rst
+++ /dev/null
@@ -1,133 +0,0 @@
-Welcome to AGAT's documentation!
-================================
-
-AGAT: Another GTF/GFF Analysis Toolkit
-----------------------------------------
-
-**A GFF/GTF toolkit allowing you to perform almost everything you might want to achieve ^^**
-
-The GTF/GFF formats are 9-column text formats used to describe and represent genomic features.
-The formats have quite evolved since 1997, and despite well-defined specifications existing nowadays they have a great flexibility allowing holding wide variety of information.
-This flexibility has a drawback aspect, there is an incredible amount of flavor of the formats: GFF / GFF1 / GFF2 / GFF2.5 / GFF3 / GTF / GTF2 / GTF2.1 / GTF2.2 / GTF2.5 / GTF3
-
-It's often hard to understand and differentiate all GFF/GTF formats/flavors. Many tools using GTF/GFF formats fails due to specific expectations.
-AGAT is a suite of tools able to deal with any GTF/GFF formats and perform most of the possible tasks you would need.
-
-.. figure:: img/wordcloud.png
-
-
-Contents
-========
-
-.. toctree::
- :maxdepth: 2
- :caption: General
-
- agat_for_you.md
- agat_how_does_it_work.md
- troubleshooting.md
- how_to_cite.md
- why_agat.md
-
-
-.. toctree::
- :maxdepth: 3
- :caption: Knowledge
-
- gxf.md
-
-.. toctree::
- :maxdepth: 2
- :caption: AGAT vs other tools
-
- topological-sorting-of-gff-features.md
- gff_to_bed.md
- gff_to_gtf.md
-
-
-.. toctree::
- :maxdepth: 1
- :caption: List of tools
-
- tools/agat_convert_bed2gff.md
- tools/agat_convert_embl2gff.md
- tools/agat_convert_genscan2gff.md
- tools/agat_convert_mfannot2gff.md
- tools/agat_convert_minimap2_bam2gff.md
- tools/agat_convert_sp_gff2bed.md
- tools/agat_convert_sp_gff2gtf.md
- tools/agat_convert_sp_gff2tsv.md
- tools/agat_convert_sp_gff2zff.md
- tools/agat_convert_sp_gxf2gxf.md
- tools/agat_sp_Prokka_inferNameFromAttributes.md
- tools/agat_sp_add_intergenic_regions.md
- tools/agat_sp_add_introns.md
- tools/agat_sp_add_splice_sites.md
- tools/agat_sp_add_start_and_stop.md
- tools/agat_sp_alignment_output_style.md
- tools/agat_sp_clipN_seqExtremities_and_fixCoordinates.md
- tools/agat_sp_compare_two_BUSCOs.md
- tools/agat_sp_compare_two_annotations.md
- tools/agat_sp_complement_annotations.md
- tools/agat_sp_ensembl_output_style.md
- tools/agat_sp_extract_attributes.md
- tools/agat_sp_extract_sequences.md
- tools/agat_sp_filter_by_ORF_size.md
- tools/agat_sp_filter_by_locus_distance.md
- tools/agat_sp_filter_by_mrnaBlastValue.md
- tools/agat_sp_filter_feature_by_attribute_presence.md
- tools/agat_sp_filter_feature_by_attribute_value.md
- tools/agat_sp_filter_feature_from_keep_list.md
- tools/agat_sp_filter_feature_from_kill_list.md
- tools/agat_sp_filter_gene_by_intron_numbers.md
- tools/agat_sp_filter_gene_by_length.md
- tools/agat_sp_filter_incomplete_gene_coding_models.md
- tools/agat_sp_filter_record_by_coordinates.md
- tools/agat_sp_fix_cds_phases.md
- tools/agat_sp_fix_features_locations_duplicated.md
- tools/agat_sp_fix_fusion.md
- tools/agat_sp_fix_longest_ORF.md
- tools/agat_sp_fix_overlaping_genes.md
- tools/agat_sp_fix_small_exon_from_extremities.md
- tools/agat_sp_flag_premature_stop_codons.md
- tools/agat_sp_flag_short_introns.md
- tools/agat_sp_functional_statistics.md
- tools/agat_sp_gxf_to_gff3.md
- tools/agat_sp_keep_longest_isoform.md
- tools/agat_sp_kraken_assess_liftover.md
- tools/agat_sp_list_short_introns.md
- tools/agat_sp_load_function_from_protein_align.md
- tools/agat_sp_manage_IDs.md
- tools/agat_sp_manage_UTRs.md
- tools/agat_sp_manage_attributes.md
- tools/agat_sp_manage_functional_annotation.md
- tools/agat_sp_manage_introns.md
- tools/agat_sp_merge_annotations.md
- tools/agat_sp_move_attributes_within_records
- tools/agat_sp_prokka_fix_fragmented_gene_annotations.md
- tools/agat_sp_sensitivity_specificity.md
- tools/agat_sp_separate_by_record_type.md
- tools/agat_sp_split_by_level2_feature.md
- tools/agat_sp_statistics.md
- tools/agat_sp_to_tabulated.md
- tools/agat_sp_webApollo_compliant.md
- tools/agat_sq_add_attributes_from_tsv.md
- tools/agat_sq_add_hash_tag.md
- tools/agat_sq_add_locus_tag.md
- tools/agat_sq_filter_feature_from_fasta.md
- tools/agat_sq_list_attributes.md
- tools/agat_sq_manage_IDs.md
- tools/agat_sq_manage_attributes.md
- tools/agat_sq_mask.md
- tools/agat_sq_remove_redundant_entries.md
- tools/agat_sq_repeats_analyzer.md
- tools/agat_sq_reverse_complement.md
- tools/agat_sq_rfam_analyzer.md
- tools/agat_sq_split.md
- tools/agat_sq_stat_basic.md
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`search`
diff --git a/docs/install.md b/docs/install.md
new file mode 100644
index 00000000..f22ecb38
--- /dev/null
+++ b/docs/install.md
@@ -0,0 +1,159 @@
+# Installation
+
+## Using Docker
+
+
+First you must have [Docker](https://docs.docker.com/get-docker/) installed and running.
+Secondly have look at the availabe AGAT biocontainers at [quay.io](https://quay.io/repository/biocontainers/agat?tab=tags).
+
+Then:
+```
+# get the chosen AGAT container version
+docker pull quay.io/biocontainers/agat:0.8.0--pl5262hdfd78af_0
+# use an AGAT's tool e.g. agat_convert_sp_gxf2gxf.pl
+docker run quay.io/biocontainers/agat:0.8.0--pl5262hdfd78af_0 agat_convert_sp_gxf2gxf.pl --help
+```
+
+## Using Singularity
+
+First you must have [Singularity](https://sylabs.io/guides/3.5/user-guide/quick_start.html) installed and running.
+Secondly have look at the availabe AGAT biocontainers at [quay.io](https://quay.io/repository/biocontainers/agat?tab=tags).
+
+Then:
+```
+# get the chosen AGAT container version
+singularity pull docker://quay.io/biocontainers/agat:1.0.0--pl5321hdfd78af_0
+# run the container
+singularity run agat_1.0.0--pl5321hdfd78af_0.sif
+```
+
+You are now in the container. You can use an AGAT's tool e.g. agat_convert_sp_gxf2gxf.pl doing
+```
+agat_convert_sp_gxf2gxf.pl --help
+```
+
+
+## Using Bioconda
+
+### Install AGAT
+
+ ```
+ conda install -c bioconda agat
+ ```
+
+or in a fresh environment:
+
+ ```
+ conda create -c bioconda -n agat agat
+ ```
+
+### Update AGAT
+
+ ```
+ conda update agat
+ ```
+
+### Uninstall AGAT
+ ```
+ conda uninstall agat
+ ```
+
+
+## Old school - Manually
+
+You will have to install all prerequisites and AGAT manually.
+
+### Install prerequisites
+ * R (optional)
+ You can install it by conda (`conda install r-base`), through [CRAN](https://cran.r-project.org) ([See here for a nice tutorial](https://www.datacamp.com/community/tutorials/installing-R-windows-mac-ubuntu)) or using your package management tool (e.g apt for Debian, Ubuntu, and related Linux distributions). R is optional and can be used to perform some plots. You will need to install the perl depency Statistics::R
+
+ * Perl >= 5.8
+ It should already be available on your computer. If you are unlucky [perl.org](https://www.perl.org/get.html) is the place to go.
+
+ * Perl modules
+ They can be installed in different ways:
+
+ * using cpan or cpanm
+
+ ```
+ cpanm install bioperl Clone Graph::Directed LWP::UserAgent Carp Sort::Naturally File::Share File::ShareDir::Install Moose YAML LWP::Protocol::https Term::ProgressBar
+ ```
+
+ * using conda
+
+ * using the provided yaml file
+
+ ```
+ conda env create -f conda_environment_AGAT.yml
+ conda activate agat
+ ```
+
+ * manually
+
+ ```
+ conda install perl-bioperl perl-clone perl-graph perl-lwp-simple perl-carp perl-sort-naturally perl-file-share perl-file-sharedir-install perl-moose perl-yaml perl-lwp-protocol-https perl-term-progressbar
+ ```
+
+ * using your package management tool (e.g apt for Debian, Ubuntu, and related Linux distributions)
+
+ ```
+ apt install libbio-perl-perl libclone-perl libgraph-perl liblwp-useragent-determined-perl libstatistics-r-perl libcarp-clan-perl libsort-naturally-perl libfile-share-perl libfile-sharedir libfile-sharedir-install-perl libyaml-perl liblwp-protocol-https-perl libterm-progressbar-perl
+ ```
+
+ * Optional
+ Some scripts offer the possibility to perform plots. You will need R and Statistics::R which are not included by default.
+
+ * R
+ You can install it by conda (`conda install r-base`), through [CRAN](https://cran.r-project.org) ([See here for a nice tutorial](https://www.datacamp.com/community/tutorials/installing-R-windows-mac-ubuntu)) or using your package management tool (e.g apt for Debian, Ubuntu, and related Linux distributions).
+
+ * Statistics::R
+ You can install it through conda (`conda install perl-statistics-r`), using cpan/cpanm (`cpanm install Statistics::R`), or your package management tool (`apt install libstatistics-r-perl`)
+
+
+
+### Install AGAT
+
+ ```
+ git clone https://github.com/NBISweden/AGAT.git # Clone AGAT
+ cd AGAT # move into AGAT folder
+ perl Makefile.PL # Check all the dependencies*
+ make # Compile
+ make test # Test
+ make install # Install
+ ```
+
+*If dependencies are missing you will be warn. Please refer to the [Install prerequisites](#install-prerequisites) section.
+
+**Remark**: On MS Windows, instead of make you'd probably have to use dmake or nmake depending the toolchain you have.
+
+### Update AGAT
+From the folder where the repository is located.
+
+ ```
+ git pull # Update to last AGAT
+ perl Makefile.PL # Check all the dependencies*
+ make # Compile
+ make test # Test
+ make install # Install
+ ```
+*If dependencies are missing you will be warn. Please refer to the [Install prerequisites](#install-prerequisites) section.
+
+### Change to a specific version
+From the folder where the repository is located.
+
+ ```
+ git pull # Update the code
+ git checkout v0.1 # use version v0.1 (See releases tab for a list of available versions)
+ perl Makefile.PL # Check all the dependencies*
+ make # Compile
+ make test # Test
+ make install # Install
+ ```
+*If dependencies are missing you will be warn. Please refer to the [Install prerequisites](#install-prerequisites) section.
+
+### Uninstall AGAT
+
+ ```
+ perl uninstall_AGAT
+ ```
+
diff --git a/docs/tools/agat_convert_bed2gff.md b/docs/tools/agat_convert_bed2gff.md
index bf93fa20..f2731ade 100644
--- a/docs/tools/agat_convert_bed2gff.md
+++ b/docs/tools/agat_convert_bed2gff.md
@@ -1,11 +1,11 @@
-# agat\_convert\_bed2gff.pl
+# agat_convert_bed2gff.pl
## DESCRIPTION
The script takes a bed file as input, and will translate it in gff format.
The BED format is described [here](https://genome.ucsc.edu/FAQ/FAQformat.html##format1)
-The script converts 0-based, half-open \[start-1, end) bed file to
-1-based, closed \[start, end\] General Feature Format v3 (GFF3).
+The script converts 0-based, half-open [start-1, end) bed file to
+1-based, closed [start, end] General Feature Format v3 (GFF3).
## SYNOPSIS
@@ -23,23 +23,23 @@ agat_convert_bed2gff.pl -h
- **--source**
The source informs about the tool used to produce the data and is stored in 2nd field of a gff file.
- Example: Stringtie,Maker,Augustus,etc. \[default: data\]
+ Example: Stringtie,Maker,Augustus,etc. [default: data]
-- **--primary\_tag**
+- **--primary_tag**
- The primary\_tag corresponds to the data type and is stored in 3rd field of a gff file.
- Example: gene,mRNA,CDS,etc. \[default: gene\]
+ The primary_tag corresponds to the data type and is stored in 3rd field of a gff file.
+ Example: gene,mRNA,CDS,etc. [default: gene]
-- **--inflate\_off**
+- **--inflate_off**
By default we inflate the block fields (blockCount, blockSizes, blockStarts) to create subfeatures
- of the main feature (primary\_tag). The type of subfeature created is based on the
- inflate\_type parameter. If you do not want this inflating behaviour you can deactivate it
- by using the --inflate\_off option.
+ of the main feature (primary_tag). The type of subfeature created is based on the
+ inflate_type parameter. If you do not want this inflating behaviour you can deactivate it
+ by using the --inflate_off option.
-- **--inflate\_type**
+- **--inflate_type**
- Feature type (3rd column in gff) created when inflate parameter activated \[default: exon\].
+ Feature type (3rd column in gff) created when inflate parameter activated [default: exon].
- **--verbose**
diff --git a/docs/tools/agat_convert_embl2gff.md b/docs/tools/agat_convert_embl2gff.md
index 584f8b32..63991ea5 100644
--- a/docs/tools/agat_convert_embl2gff.md
+++ b/docs/tools/agat_convert_embl2gff.md
@@ -1,4 +1,4 @@
-# agat\_convert\_embl2gff.pl
+# agat_convert_embl2gff.pl
## DESCRIPTION
@@ -22,7 +22,7 @@ agat_converter_embl2gff.pl --embl infile.embl [ -o outfile ]
This is an EMBL format dedicated for submission and contains particularity to deal with.
This parameter is needed to get a proper sequence id in the GFF3 from an embl made with EMBLmyGFF3.
-- **--primary\_tag**, **--pt**, **-t**
+- **--primary_tag**, **--pt**, **-t**
List of "primary tag". Useful to discard or keep specific features.
Multiple tags must be coma-separated.
diff --git a/docs/tools/agat_convert_genscan2gff.md b/docs/tools/agat_convert_genscan2gff.md
index 9a1d9e0a..b0e95b90 100644
--- a/docs/tools/agat_convert_genscan2gff.md
+++ b/docs/tools/agat_convert_genscan2gff.md
@@ -1,10 +1,10 @@
-# agat\_convert\_genscan2gff.pl
+# agat_convert_genscan2gff.pl
## DESCRIPTION
The script takes a genscan file as input, and will translate it in gff format.
-The genscan format is described here: http://genome.crg.es/courses/Bioinformatics2003\_genefinding/results/genscan.html
-/!\\ vvv Known problem vvv /!\\
+The genscan format is described here: http://genome.crg.es/courses/Bioinformatics2003_genefinding/results/genscan.html
+/! vvv Known problem vvv /!
You must have submited only DNA sequence, wihtout any header!!
Indeed the tool expects only DNA sequences and does not crash/warn if an header
is submited along the sequence.
@@ -12,7 +12,7 @@ e.g If you have an header ">seq" s-e-q are seen as the 3 first nucleotides of th
Then all prediction location are shifted accordingly.
(checked only on the online version http://argonaute.mit.edu/GENSCAN.html. I don't
know if there is the same pronlem elsewhere.)
-/!\\ ^^^ Known problem ^^^^ /!\\
+/! ^^^ Known problem ^^^^ /!
## SYNOPSIS
@@ -30,12 +30,12 @@ agat_convert_genscan2gff.pl -h
- **--source**
The source informs about the tool used to produce the data and is stored in 2nd field of a gff file.
- Example: Stringtie,Maker,Augustus,etc. \[default: data\]
+ Example: Stringtie,Maker,Augustus,etc. [default: data]
-- **--primary\_tag**
+- **--primary_tag**
- The primary\_tag corresponf to the data type and is stored in 3rd field of a gff file.
- Example: gene,mRNA,CDS,etc. \[default: gene\]
+ The primary_tag corresponf to the data type and is stored in 3rd field of a gff file.
+ Example: gene,mRNA,CDS,etc. [default: gene]
- **--verbose**
diff --git a/docs/tools/agat_convert_mfannot2gff.md b/docs/tools/agat_convert_mfannot2gff.md
index a3c211f4..b9a1c336 100644
--- a/docs/tools/agat_convert_mfannot2gff.md
+++ b/docs/tools/agat_convert_mfannot2gff.md
@@ -1,4 +1,4 @@
-# agat\_convert\_mfannot2gff.pl
+# agat_convert_mfannot2gff.pl
## DESCRIPTION
diff --git a/docs/tools/agat_convert_minimap2_bam2gff.md b/docs/tools/agat_convert_minimap2_bam2gff.md
index 46e2d388..f68f9efb 100644
--- a/docs/tools/agat_convert_minimap2_bam2gff.md
+++ b/docs/tools/agat_convert_minimap2_bam2gff.md
@@ -1,11 +1,11 @@
-# agat\_convert\_minimap2\_bam2gff.pl
+# agat_convert_minimap2_bam2gff.pl
## DESCRIPTION
The script converts output from minimap2 (bam or sam) into GFF file.
To get bam from minimap2 use the following command:
-minimap2 -ax splice:hq genome.fa Asecodes\_parviclava.nucest.fa | samtools sort -O BAM -o output.bam
+minimap2 -ax splice:hq genome.fa Asecodes_parviclava.nucest.fa | samtools sort -O BAM -o output.bam
To use bam with this script you will need samtools in your path.
diff --git a/docs/tools/agat_convert_sp_gff2bed.md b/docs/tools/agat_convert_sp_gff2bed.md
index a71cd62c..10552c7e 100644
--- a/docs/tools/agat_convert_sp_gff2bed.md
+++ b/docs/tools/agat_convert_sp_gff2bed.md
@@ -1,4 +1,4 @@
-# agat\_convert\_sp\_gff2bed.pl
+# agat_convert_sp_gff2bed.pl
## DESCRIPTION
diff --git a/docs/tools/agat_convert_sp_gff2gtf.md b/docs/tools/agat_convert_sp_gff2gtf.md
index 7947acf1..1937115c 100644
--- a/docs/tools/agat_convert_sp_gff2gtf.md
+++ b/docs/tools/agat_convert_sp_gff2gtf.md
@@ -1,4 +1,4 @@
-# agat\_convert\_sp\_gff2gtf.pl
+# agat_convert_sp_gff2gtf.pl
## DESCRIPTION
@@ -6,16 +6,16 @@ The script aims to convert any GTF/GFF file into a proper GTF file.
Full information about the format can be found here: [https://agat.readthedocs.io/en/latest/gxf.html](https://agat.readthedocs.io/en/latest/gxf.html)
You can choose among 7 different GTF types (1, 2, 2.1, 2.2, 2.5, 3 or relax).
Depending the version selected the script will filter out the features that are not accepted.
-For GTF2.5 and 3, every level1 feature (e.g nc\_gene pseudogene) will be converted into
+For GTF2.5 and 3, every level1 feature (e.g nc_gene pseudogene) will be converted into
gene feature and every level2 feature (e.g mRNA ncRNA) will be converted into
transcript feature.
You can even produce a GFF-like GTF using the relax option. It allows to keep all
original feature types (3rd column). No modification will occur e.g. mRNA to transcript.
-To be fully GTF compliant all feature have a gene\_id and a transcript\_id attribute.
-The gene\_id is unique identifier for the genomic source of the transcript, which is
+To be fully GTF compliant all feature have a gene_id and a transcript_id attribute.
+The gene_id is unique identifier for the genomic source of the transcript, which is
used to group transcripts into genes.
-The transcript\_id is a unique identifier for the predicted transcript,
+The transcript_id is a unique identifier for the predicted transcript,
which is used to group features into transcripts.
## SYNOPSIS
@@ -31,22 +31,22 @@ agat_convert_sp_gff2gtf -h
Input GFF file that will be read
-- **--gtf\_version**
+- **--gtf_version**
version of the GTF output (1,2,2.1,2.2,2.5,3 or relax). Default 3.
relax: all feature types are accepted.
- 3: GTF3 (9 feature types accepted): gene, transcript, exon, CDS, Selenocysteine, start\_codon, stop\_codon, three\_prime\_utr and five\_prime\_utr
+ 3: GTF3 (9 feature types accepted): gene, transcript, exon, CDS, Selenocysteine, start_codon, stop_codon, three_prime_utr and five_prime_utr
- 2.5: GTF2.5 (8 feature types accepted): gene, transcript, exon, CDS, UTR, start\_codon, stop\_codon, Selenocysteine
+ 2.5: GTF2.5 (8 feature types accepted): gene, transcript, exon, CDS, UTR, start_codon, stop_codon, Selenocysteine
- 2.2: GTF2.2 (9 feature types accepted): CDS, start\_codon, stop\_codon, 5UTR, 3UTR, inter, inter\_CNS, intron\_CNS and exon
+ 2.2: GTF2.2 (9 feature types accepted): CDS, start_codon, stop_codon, 5UTR, 3UTR, inter, inter_CNS, intron_CNS and exon
- 2.1: GTF2.1 (6 feature types accepted): CDS, start\_codon, stop\_codon, exon, 5UTR, 3UTR
+ 2.1: GTF2.1 (6 feature types accepted): CDS, start_codon, stop_codon, exon, 5UTR, 3UTR
- 2: GTF2 (4 feature types accepted): CDS, start\_codon, stop\_codon, exon
+ 2: GTF2 (4 feature types accepted): CDS, start_codon, stop_codon, exon
- 1: GTF1 (5 feature types accepted): CDS, start\_codon, stop\_codon, exon, intron
+ 1: GTF1 (5 feature types accepted): CDS, start_codon, stop_codon, exon, intron
- **-o** , **--output** , **--out** , **--outfile** or **--gtf**
diff --git a/docs/tools/agat_convert_sp_gff2tsv.md b/docs/tools/agat_convert_sp_gff2tsv.md
index a44c76ce..d895d152 100644
--- a/docs/tools/agat_convert_sp_gff2tsv.md
+++ b/docs/tools/agat_convert_sp_gff2tsv.md
@@ -1,4 +1,4 @@
-# agat\_convert\_sp\_gff2tsv.pl
+# agat_convert_sp_gff2tsv.pl
## DESCRIPTION
diff --git a/docs/tools/agat_convert_sp_gff2zff.md b/docs/tools/agat_convert_sp_gff2zff.md
index 0f8b0931..475435b8 100644
--- a/docs/tools/agat_convert_sp_gff2zff.md
+++ b/docs/tools/agat_convert_sp_gff2zff.md
@@ -1,4 +1,4 @@
-# agat\_convert\_sp\_gff2zff.pl
+# agat_convert_sp_gff2zff.pl
## DESCRIPTION
diff --git a/docs/tools/agat_convert_sp_gxf2gxf.md b/docs/tools/agat_convert_sp_gxf2gxf.md
index 885baa23..32c702dc 100644
--- a/docs/tools/agat_convert_sp_gxf2gxf.md
+++ b/docs/tools/agat_convert_sp_gxf2gxf.md
@@ -1,4 +1,4 @@
-# agat\_convert\_sp\_gxf2gxf.pl
+# agat_convert_sp_gxf2gxf.pl
## DESCRIPTION
diff --git a/docs/tools/agat_sp_Prokka_inferNameFromAttributes.md b/docs/tools/agat_sp_Prokka_inferNameFromAttributes.md
index 804cb5f1..8db9b674 100644
--- a/docs/tools/agat_sp_Prokka_inferNameFromAttributes.md
+++ b/docs/tools/agat_sp_Prokka_inferNameFromAttributes.md
@@ -1,4 +1,4 @@
-# agat\_sp\_Prokka\_inferNameFromAttributes.pl
+# agat_sp_Prokka_inferNameFromAttributes.pl
## DESCRIPTION
diff --git a/docs/tools/agat_sp_add_attribute_shortest_exon_size.md b/docs/tools/agat_sp_add_attribute_shortest_exon_size.md
index 06bf344d..07c555e5 100644
--- a/docs/tools/agat_sp_add_attribute_shortest_exon_size.md
+++ b/docs/tools/agat_sp_add_attribute_shortest_exon_size.md
@@ -1,8 +1,8 @@
-# agat\_sp\_add\_attribute\_shortest\_exon\_size.pl
+# agat_sp_add_attribute_shortest_exon_size.pl
## DESCRIPTION
-The script add the attribute \ to each gene and rna, which will hold the size of the shortest exon in bp.
+The script add the attribute to each gene and rna, which will hold the size of the shortest exon in bp.
## SYNOPSIS
diff --git a/docs/tools/agat_sp_add_attribute_shortest_intron_size.md b/docs/tools/agat_sp_add_attribute_shortest_intron_size.md
index 878c977b..58061dab 100644
--- a/docs/tools/agat_sp_add_attribute_shortest_intron_size.md
+++ b/docs/tools/agat_sp_add_attribute_shortest_intron_size.md
@@ -1,8 +1,8 @@
-# agat\_sp\_add\_attribute\_shortest\_intron\_size.pl
+# agat_sp_add_attribute_shortest_intron_size.pl
## DESCRIPTION
-The script add the attribute \ to each gene and rna, which will hold the size of the shortest intron in bp.
+The script add the attribute to each gene and rna, which will hold the size of the shortest intron in bp.
## SYNOPSIS
diff --git a/docs/tools/agat_sp_add_intergenic_regions.md b/docs/tools/agat_sp_add_intergenic_regions.md
index 0f606d2b..23bd2efa 100644
--- a/docs/tools/agat_sp_add_intergenic_regions.md
+++ b/docs/tools/agat_sp_add_intergenic_regions.md
@@ -1,8 +1,8 @@
-# agat\_sp\_add\_intergenic\_regions.pl
+# agat_sp_add_intergenic_regions.pl
## DESCRIPTION
-The script aims to add intergenic features (intergenic\_region) to gtf/gff file.
+The script aims to add intergenic features (intergenic_region) to gtf/gff file.
The intergenic regions are deduced from gene features (feature type gene from the 3rd column).
## SYNOPSIS
@@ -24,8 +24,8 @@ agat_sp_add_intergenic_regions.pl --help
- **-c** or **--config**
- String - Input agat config file. By default AGAT takes as input agat\_config.yaml file from the working directory if any,
- otherwise it takes the orignal agat\_config.yaml shipped with AGAT. To get the agat\_config.yaml locally type: "agat config --expose".
+ String - Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any,
+ otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose".
The --config option gives yo the possibility to use your own AGAT config file (located elsewhere or named differently).
- **-v** or **--verbose**
diff --git a/docs/tools/agat_sp_add_introns.md b/docs/tools/agat_sp_add_introns.md
index 41dbbb9d..a8297cb6 100644
--- a/docs/tools/agat_sp_add_introns.md
+++ b/docs/tools/agat_sp_add_introns.md
@@ -1,4 +1,4 @@
-# agat\_sp\_add\_introns.pl
+# agat_sp_add_introns.pl
## DESCRIPTION
diff --git a/docs/tools/agat_sp_add_splice_sites.md b/docs/tools/agat_sp_add_splice_sites.md
index f2d9f798..05187f15 100644
--- a/docs/tools/agat_sp_add_splice_sites.md
+++ b/docs/tools/agat_sp_add_splice_sites.md
@@ -1,8 +1,8 @@
-# agat\_sp\_add\_splice\_sites.pl
+# agat_sp_add_splice_sites.pl
## DESCRIPTION
-The script aims to add splice sites features (five\_prime\_cis\_splice\_site and three\_prime\_cis\_splice\_site) to gtf/gff file.
+The script aims to add splice sites features (five_prime_cis_splice_site and three_prime_cis_splice_site) to gtf/gff file.
The splice sites are deduced from CDS features.
## SYNOPSIS
@@ -24,8 +24,8 @@ agat_sp_add_splice_sites.pl --help
- **-c** or **--config**
- String - Input agat config file. By default AGAT takes as input agat\_config.yaml file from the working directory if any,
- otherwise it takes the orignal agat\_config.yaml shipped with AGAT. To get the agat\_config.yaml locally type: "agat config --expose".
+ String - Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any,
+ otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose".
The --config option gives yo the possibility to use your own AGAT config file (located elsewhere or named differently).
- **--help** or **-h**
diff --git a/docs/tools/agat_sp_add_start_and_stop.md b/docs/tools/agat_sp_add_start_and_stop.md
index f0bf5b39..40de0675 100644
--- a/docs/tools/agat_sp_add_start_and_stop.md
+++ b/docs/tools/agat_sp_add_start_and_stop.md
@@ -1,4 +1,4 @@
-# agat\_sp\_add\_start\_and\_stop.pl.pl
+# agat_sp_add_start_and_stop.pl.pl
## DESCRIPTION
@@ -25,7 +25,7 @@ agat_sp_add_start_and_stop.pl.pl --help
- **--ct**, **--codon** or **--table**
- Codon table to use. \[default 1\]
+ Codon table to use. [default 1]
- **--out**, **--output** or **-o**
diff --git a/docs/tools/agat_sp_alignment_output_style.md b/docs/tools/agat_sp_alignment_output_style.md
index f3f6d554..ec80f0b4 100644
--- a/docs/tools/agat_sp_alignment_output_style.md
+++ b/docs/tools/agat_sp_alignment_output_style.md
@@ -1,9 +1,9 @@
-# agat\_sp\_alignment\_output\_style.pl
+# agat_sp_alignment_output_style.pl
## DESCRIPTION
The script takes a normal gtf/gff annotation format file and convert it
-to gff3 alignment format. It means it add a structure of match / match\_part
+to gff3 alignment format. It means it add a structure of match / match_part
as relationship between the different features.
## SYNOPSIS
@@ -22,7 +22,7 @@ agat_sp_alignment_output_style.pl --help
- **-c** or **--ct**
When the gff file provided is not correcly formated and features are linked
- to each other by a comon tag (by default locus\_tag), this tag can be provided
+ to each other by a comon tag (by default locus_tag), this tag can be provided
to parse the file correctly.
- **-v**
diff --git a/docs/tools/agat_sp_clipN_seqExtremities_and_fixCoordinates.md b/docs/tools/agat_sp_clipN_seqExtremities_and_fixCoordinates.md
index c808a6a2..6abeea3e 100644
--- a/docs/tools/agat_sp_clipN_seqExtremities_and_fixCoordinates.md
+++ b/docs/tools/agat_sp_clipN_seqExtremities_and_fixCoordinates.md
@@ -1,4 +1,4 @@
-# agat\_sp\_clipN\_seqExtremities\_and\_fixCoordinates.pl
+# agat_sp_clipN_seqExtremities_and_fixCoordinates.pl
## DESCRIPTION
diff --git a/docs/tools/agat_sp_compare_two_BUSCOs.md b/docs/tools/agat_sp_compare_two_BUSCOs.md
index f54bc04f..df37ab86 100644
--- a/docs/tools/agat_sp_compare_two_BUSCOs.md
+++ b/docs/tools/agat_sp_compare_two_BUSCOs.md
@@ -1,4 +1,4 @@
-# agat\_sp\_compare\_two\_BUSCOs.pl
+# agat_sp_compare_two_BUSCOs.pl
## DESCRIPTION
@@ -12,8 +12,8 @@ Where EOG090W00UK is the BUSCO name/label/group investigated, and complete2dupli
By loading these gff tracks in a web browser and helped by other tracks (e.g the genome annotation/prediction)
can help to understand why the BUSCO have been classified differently from run1 to run2.
In other term it allows to catch potential problems in an annotation.
-agat\_sp\_compare\_two\_BUSCOs.pl has been tested with results from BUSCO version 3 and 4.
-/!\\ The tool expects a BUSCO run in genome mode as input folder 1 and a BUSCO run in proteins mode
+agat_sp_compare_two_BUSCOs.pl has been tested with results from BUSCO version 3 and 4.
+/! The tool expects a BUSCO run in genome mode as input folder 1 and a BUSCO run in proteins mode
as input folder 2. You can also decide to provide twice (--f1 --f2) the same BUSCO run in genome mode,
the tool will only extract the annotation of the complete,fragmented and duplicated annotated BUSCOs from the 1st run in gff.
diff --git a/docs/tools/agat_sp_compare_two_annotations.md b/docs/tools/agat_sp_compare_two_annotations.md
index d5a2f270..c990af48 100644
--- a/docs/tools/agat_sp_compare_two_annotations.md
+++ b/docs/tools/agat_sp_compare_two_annotations.md
@@ -1,4 +1,4 @@
-# agat\_sp\_compare\_two\_annotations.pl
+# agat_sp_compare_two_annotations.pl
## DESCRIPTION
diff --git a/docs/tools/agat_sp_complement_annotations.md b/docs/tools/agat_sp_complement_annotations.md
index 5d2ccd6f..0b309d9c 100644
--- a/docs/tools/agat_sp_complement_annotations.md
+++ b/docs/tools/agat_sp_complement_annotations.md
@@ -1,4 +1,4 @@
-# agat\_sp\_complement\_annotations.pl
+# agat_sp_complement_annotations.pl
## DESCRIPTION
@@ -8,7 +8,7 @@ A l1 feature from the addfile.gff without a CDS that overlaps a l1 feature with
A l1 feature from the addfile.gff with a CDS that overlaps a l1 feature without a CDS from the reference annotation will be added.
A l1 feature from the addfile.gff with a CDS that overlaps a l1 feature with a CDS from the reference annotation will be added only if the CDSs don't overlap.
A l1 feature from the addfile.gff without a CDS that overlaps a l1 feature without a CDS from the reference annotation will be added only if none of the l3 features overlap.
-/!\\ It is sufficiant that only one isoform is overlapping to prevent the whole gene (l1 feature) from the addfile.gff to be added in the output.
+/! It is sufficiant that only one isoform is overlapping to prevent the whole gene (l1 feature) from the addfile.gff to be added in the output.
## SYNOPSIS
@@ -26,10 +26,10 @@ agat_sp_complement_annotations.pl --help
- **--add** or **-a**
Annotation(s) file you would like to use to complement the reference annotation. You can specify as much file you want like so: -a addfile1 -a addfile2 -a addfile3
- /!\\ The order you provide these files matter. Once the reference file has been complemented by file1, this new annotation becomes the new reference that will be complemented by file2 etc.
- /!\\ The result with -a addfile1 -a addfile2 will differ to the result from -a addfile2 -a addfile1. So, be aware of what you want if you use several addfiles.
+ /! The order you provide these files matter. Once the reference file has been complemented by file1, this new annotation becomes the new reference that will be complemented by file2 etc.
+ /! The result with -a addfile1 -a addfile2 will differ to the result from -a addfile2 -a addfile1. So, be aware of what you want if you use several addfiles.
-- **--size\_min** or **-s**
+- **--size_min** or **-s**
Option to keep the non-overlping gene only if the CDS size (in nucleotide) is over the minimum size defined. Default = 0 that means all of them are kept.
diff --git a/docs/tools/agat_sp_ensembl_output_style.md b/docs/tools/agat_sp_ensembl_output_style.md
index 1868a4c6..3bc94fe2 100644
--- a/docs/tools/agat_sp_ensembl_output_style.md
+++ b/docs/tools/agat_sp_ensembl_output_style.md
@@ -1,4 +1,4 @@
-# agat\_sp\_ensembl\_output\_style.pl
+# agat_sp_ensembl_output_style.pl
## DESCRIPTION
@@ -21,7 +21,7 @@ agat_sp_ensembl_output_style.pl --help
- **-c** or **--ct**
When the gff file provided is not correcly formated and features are linked
- to each other by a comon tag (by default locus\_tag), this tag can be provided
+ to each other by a comon tag (by default locus_tag), this tag can be provided
to parse the input file correctly.
- **-v**
diff --git a/docs/tools/agat_sp_extract_attributes.md b/docs/tools/agat_sp_extract_attributes.md
index 33b0f6a2..43e2fd89 100644
--- a/docs/tools/agat_sp_extract_attributes.md
+++ b/docs/tools/agat_sp_extract_attributes.md
@@ -1,4 +1,4 @@
-# agat\_sp\_extract\_attributes.pl
+# agat_sp_extract_attributes.pl
## DESCRIPTION
diff --git a/docs/tools/agat_sp_extract_sequences.md b/docs/tools/agat_sp_extract_sequences.md
index 45956e84..8c63eb73 100644
--- a/docs/tools/agat_sp_extract_sequences.md
+++ b/docs/tools/agat_sp_extract_sequences.md
@@ -1,4 +1,4 @@
-# agat\_sp\_extract\_sequences.pl
+# agat_sp_extract_sequences.pl
## Briefly in pictures
@@ -16,17 +16,17 @@ The result is written to the specified output file, or to STDOUT.
Features spanning several locations (e.g. UTR, CDS), are extracted chunk by chunk
and merged to create the biological feature. If you wish to extract each chunck independently,
please refer to the --split parameter. To see the list of features that may span over several locations
-within AGAT run: agat\_convert\_sp\_gxf2gxf.pl --expose
-and then look at the file called features\_spread.json.
+within AGAT run: agat_convert_sp_gxf2gxf.pl --expose
+and then look at the file called features_spread.json.
The headers are formated like that:
-\>ID gene=gene\_ID name=NAME seq\_id=Chromosome\_ID type=cds 5'extra=VALUE
+>ID gene=gene_ID name=NAME seq_id=Chromosome_ID type=cds 5'extra=VALUE
The ID is the identifier of the feature (ID attribute in the 9th column.
If missing it is created by AGAT)
The gene value will be the ID of the level1 feature (the top feature of the record)
The name value is optional and will be written only if the Name attribute exists in the gff.
-The seq\_id value is the value from 1st column within the gff.
+The seq_id value is the value from 1st column within the gff.
The type value holds the information of the feature type extracted.
5'extra or 3'extra is optional, it holds the information of extra nucleotides
removed or added when using the downstream and/or upstream parameter.
@@ -71,7 +71,7 @@ agat_sp_extract_sequences.pl --help
## OPTIONS
-- **--alternative\_start\_codon** or **--asc**
+- **--alternative_start_codon** or **--asc**
Bolean - When activated it can affect the translation of the start codon.
Indeed alternative start codons exist, and are translated by the cells'machinery
@@ -88,34 +88,34 @@ agat_sp_extract_sequences.pl --help
then reverse complemented). It corresponds to extract the exons sequences,
merge them, and reverse complement the sequence (--type exon --merge --revcomp).
-- **--clean\_final\_stop** or **--cfs**
+- **--clean_final_stop** or **--cfs**
Boolean - The Clean Final Stop option allows removing the translation of the
- final stop codons that is represented by the <\*> character.
+ final stop codons that is represented by the <*> character.
This character can be disturbing for many programs (e.g interproscan)
-- **--clean\_internal\_stop** or **--cis**
+- **--clean_internal_stop** or **--cis**
Boolean - The Clean Internal Stop option allows replacing the translation of the
- stop codons present among the sequence that is represented by the <\*> character
- by . Indeed the <\*> character can be disturbing for many programs
+ stop codons present among the sequence that is represented by the <*> character
+ by . Indeed the <*> character can be disturbing for many programs
(e.g interproscan)
- **--codon**, **--table** or **--ct**
- Integer - Allow to choose the codon table for the translation. \[default 1\]
+ Integer - Allow to choose the codon table for the translation. [default 1]
- **--do**, **-3**, **--three**, **-down** or **-downstream**
Integer - It will take that number of nucleotide in more at the 3' extremity.
- /!\\ You must activate the option "--full" if you with to extract only the most downstream part of certain feature (exon,cds,utr)
+ /! You must activate the option "--full" if you with to extract only the most downstream part of certain feature (exon,cds,utr)
otherwise you will extract each downstream parts of the subfeatures (e.g many cds parts may be needed to shape a cds in its whole).
- **--eo**
Boolean - Called 'extremity only', this option will extract only the adjacent parts of a feature.
This option has to be activated with -u and/or -p option.
- /!\\ using -u and -p together builds a chimeric sequence which will be the concatenation of the left and right extremities of a feature.
+ /! using -u and -p together builds a chimeric sequence which will be the concatenation of the left and right extremities of a feature.
- **-f** or **--fasta**
@@ -129,7 +129,7 @@ agat_sp_extract_sequences.pl --help
The use of that option with '--type exon' will extract the pre-mRNA sequence (i.e with introns).
Use of that option on cds will give the pre-mRNA without the untraslated regions (UTRs).
(To extract an mRNA as it is defined biologicaly you need to use the
- \`-t exon\` option with the --merge option)
+ `-t exon` option with the --merge option)
- **-g**, **--gff** or **-ref**
@@ -178,12 +178,12 @@ agat_sp_extract_sequences.pl --help
String - Output fasta file. If no output file is specified, the output will be
written to STDOUT.
-- **--plus\_strand\_only**
+- **--plus_strand_only**
Boolean - By default the extrated feature sequences from a minus strand is
reverse complemented. Activating this option you will always get sequence from plus
strand ( not reverse complemented).
- You can get the opposite (minus strand only) by using --plus\_strand\_only --revcomp
+ You can get the opposite (minus strand only) by using --plus_strand_only --revcomp
- **-p**, **--protein** or **--aa**
@@ -191,7 +191,7 @@ agat_sp_extract_sequences.pl --help
By default the codon table used is the 1 (Standard).
See --table parameter for more options.
-- **--remove\_orf\_offset** or **--roo**
+- **--remove_orf_offset** or **--roo**
Boolean - CDS can start with a phase different from 0 when a gene model is fragmented.
When asking for protein translation this (start) offset is trimmed out automatically.
@@ -201,7 +201,7 @@ agat_sp_extract_sequences.pl --help
- **--revcomp**
- Boolean - To reverse complement the extracted sequence \[default - False\].
+ Boolean - To reverse complement the extracted sequence [default - False].
By default the extrated feature sequences from a minus strand is
reverse complemented. Consequently, for minus strand features that option will
extract the sequences from plus strand from left to right.
@@ -218,19 +218,19 @@ agat_sp_extract_sequences.pl --help
String - Define the feature you want to extract the sequence from.
Default 'cds'.
- Most common choice are: gene,mrna,exon,cds,trna,three\_prime\_utr,five\_prime\_utr.
+ Most common choice are: gene,mrna,exon,cds,trna,three_prime_utr,five_prime_utr.
When you choose exon (or cds,utr,etc.), all the exons of a same parent feature
are attached together before to extract the sequence. If you wish to extract each
exon of an mRNA independently, see option --split.
- /!\\ \`-t mRNA\` will extract the features labeled as "mRNA" and corresponds to the cdna\*
+ /! `-t mRNA` will extract the features labeled as "mRNA" and corresponds to the cdna*
because it contains the introns if any. It does not actually extract the mRNAs as
- it is defined biologicaly. To extract the mRNA as defined biologicaly you must use \`-t exon\`.
- \*Not a real cdna because it is not reversed
+ it is defined biologicaly. To extract the mRNA as defined biologicaly you must use `-t exon`.
+ *Not a real cdna because it is not reversed
- **--up**, **-5**, **--five** or **-upstream**
Integer - It will take that number of nucleotide in more at the 5' extremity.
- /!\\ You must activate the option "--full" if you wish to extract only the most
+ /! You must activate the option "--full" if you wish to extract only the most
upstream part of certain features (exon,cds,utr)
otherwise you will extract each upstream parts of the subfeatures
(e.g many cds parts may be needed to shape a cds in its whole).
diff --git a/docs/tools/agat_sp_filter_by_ORF_size.md b/docs/tools/agat_sp_filter_by_ORF_size.md
index 3ca23ff9..2b67f5eb 100644
--- a/docs/tools/agat_sp_filter_by_ORF_size.md
+++ b/docs/tools/agat_sp_filter_by_ORF_size.md
@@ -1,4 +1,4 @@
-# agat\_sp\_filter\_by\_ORF\_size.pl
+# agat_sp_filter_by_ORF_size.pl
## DESCRIPTION
diff --git a/docs/tools/agat_sp_filter_by_locus_distance.md b/docs/tools/agat_sp_filter_by_locus_distance.md
index d64cc9d2..f9ca2bcd 100644
--- a/docs/tools/agat_sp_filter_by_locus_distance.md
+++ b/docs/tools/agat_sp_filter_by_locus_distance.md
@@ -1,4 +1,4 @@
-# agat\_sp\_filter\_by\_locus\_distance.pl
+# agat_sp_filter_by_locus_distance.pl
## DESCRIPTION
@@ -26,9 +26,9 @@ agat_sp_filter_by_locus_distance.pl --help
The minimum inter-loci distance to allow. No default (will not apply
filter by default).
-- **--add** or **--add\_flag**
+- **--add** or **--add_flag**
- Instead of filter the result into two output files, write only one and add the flag <low\_dist> in the gff.(tag = Lvalue or tag = Rvalue where L is left and R right and the value is the distance with accordingle the left or right locus)
+ Instead of filter the result into two output files, write only one and add the flag <low_dist> in the gff.(tag = Lvalue or tag = Rvalue where L is left and R right and the value is the distance with accordingle the left or right locus)
- **-o** , **--output** , **--out** or **--outfile**
diff --git a/docs/tools/agat_sp_filter_by_mrnaBlastValue.md b/docs/tools/agat_sp_filter_by_mrnaBlastValue.md
index a4a8df92..917c1eb1 100644
--- a/docs/tools/agat_sp_filter_by_mrnaBlastValue.md
+++ b/docs/tools/agat_sp_filter_by_mrnaBlastValue.md
@@ -1,4 +1,4 @@
-# agat\_sp\_filter\_by\_mrnaBlastValue.pl
+# agat_sp_filter_by_mrnaBlastValue.pl
## DESCRIPTION
diff --git a/docs/tools/agat_sp_filter_feature_by_attribute_presence.md b/docs/tools/agat_sp_filter_feature_by_attribute_presence.md
index 83b0658c..9fd46d7f 100644
--- a/docs/tools/agat_sp_filter_feature_by_attribute_presence.md
+++ b/docs/tools/agat_sp_filter_feature_by_attribute_presence.md
@@ -1,11 +1,11 @@
-# agat\_sp\filter\_feature\_by\_attribute\_presence.pl
+# agat_spfilter_feature_by_attribute_presence.pl
## DESCRIPTION
The script aims to filter features according to attribute presence (9th column).
If the attribute exists, the feature is discarded.
Attribute are stored in the 9th column and have this shape: tag=value
-/!\\ Removing a level1 or level2 feature will automatically remove all linked subfeatures, and
+/! Removing a level1 or level2 feature will automatically remove all linked subfeatures, and
removing all children of a feature will automatically remove this feature too.
## SYNOPSIS
diff --git a/docs/tools/agat_sp_filter_feature_by_attribute_value.md b/docs/tools/agat_sp_filter_feature_by_attribute_value.md
index b9fa6e50..ff5a182f 100644
--- a/docs/tools/agat_sp_filter_feature_by_attribute_value.md
+++ b/docs/tools/agat_sp_filter_feature_by_attribute_value.md
@@ -1,4 +1,4 @@
-# agat\_sp\_filter\_feature\_by\_attribute\_value.pl
+# agat_sp_filter_feature_by_attribute_value.pl
## DESCRIPTION
@@ -8,9 +8,9 @@ The script aims to filter features according to attribute value (9th column).
- If the attribute tag is missing (test cannot be applyed), the feature will be written into