Add placeholder sphinx documentation

LUMC · Oct 30, 2023 · 051a36e · 051a36e
1 parent bb997f9
commit 051a36e
Show file tree

Hide file tree

Showing 9 changed files with 233 additions and 0 deletions.
diff --git a/docs/Makefile b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?= -W
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/fusion.md b/docs/fusion.md
@@ -0,0 +1,30 @@
+# fusion
+
+The `fusion` module uses [Arriba](https://github.com/suhrig/arriba) to call fusion events.
+
+## Tools
+This module uses the bam file from [STAR](https://github.com/alexdobin/STAR) to
+call fusion events.
+
+The fusion events are filtered based on the `blacklist` from Arriba itself. Also, only fusions where at least one of the involved genes is in `report_genes` will be included in the final output.
+
+For each fusion event that remains after filtering, we also generate a figure using the `draw_fusions.R` script provided by Arriba.
+
+## Input
+The input for this module is a single bam file, generated by STAR per sample, specified in a PEP configuration file, as is shown [here](../test/pep/chrM-bam.csv).
+
+## Output
+The output of this module are a JSON file with an overview of the most important results, as well as a number of other output files:
+- The final Arriba output file, after filtering.
+- One figure per fusion event
+
+## configuration
+| Option                      | description                             | required |
+| --------------------------- | --------------------------------------- | -------- |
+| `genome_fasta`              | Reference genome, in FASTA format       | yes      |
+| `gtf`                       | GTF file with transcript information    | yes      |
+| `blacklist`                 | File of blacklisted variants            | yes      |      
+| `known_fusions`             | A file of known fusion events           | yes      |
+| `report_genes`              | A file of genes to report fusions for   | yes      |
+| `cytobands`                 | A file with cytoband information        | yes      |
+| `protein_domains`           | A file with protein domains             | yes      |
diff --git a/docs/itd.md b/docs/itd.md
@@ -0,0 +1,28 @@
+# itd
+
+The `itd` module is responsible for finding Internal Tandem Duplications in select genes, specifically *FLT3* and *KMT2A*.
+
+## Tools
+First, this module uses [bwa]() to align the trimmed reads to a custom reference, which contains the transcript sequence of *FLT3* and *KMT2A*. Next, a custom tool, [rose-dt](https://git.lumc.nl/hem/rose-dt),
+is used to detect and visualise Internal Tandem Duplications, using evindence from soft-clipped reads.
+
+## Input
+The input for this module is a single pair of FastQ files per sample, specified in a PEP configuration file, as is shown [here](../test/pep/itd.csv).
+
+## Output
+The output of this module are a JSON file with an overview of the most important results, as well as a number of other output files:
+- For both *FLT3 and *KMT2A*, a .csv file with the detected tandem duplications.
+- For both *FLT3* and *KMT2A*, a figure to visualise the detected tandem duplications.
+
+## configuration
+The configuration for this module is tailored to the provided reference files, be very careful if you want to modify any of these settings.
+
+| Option                      | description                                   | required |
+| --------------------------- | --------------------------------------------- | -------- |
+| `fasta`                     | The fasta file, which contains FLT3 and KMT2A | yes      |
+| `flt3_name`                 | The name of the FLT3 sequence                 | yes      |
+| `flt3_start`                | The start of the FLT3 region to investigate   | yes      |
+| `flt3_end`                  | The end of the FLT3 region to investigate     | yes      |
+| `kmt2a_name`                | The name of the KMT2A sequence                | yes      |
+| `kmt2a_start`               | The start of the KMT2A region to investigate  | yes      |
+| `kmt2a_end`                 | The end of the KMT2A region to investigate    | yes      |
diff --git a/docs/make.bat b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/qc-seq.md b/docs/qc-seq.md
@@ -0,0 +1,25 @@
+# qc-seq
+
+The `qc-seq` module is responsible for removing adapter sequences and low
+quality reads, and generating read-level statistics. It also merges the FastQ
+files per sample, so they can be used by the other modules. Every set of FastQ
+files can be analysed in parallel.
+
+## Tools
+This module uses [cutadapt](https://cutadapt.readthedocs.io/en/stable/) to remove adapter sequences and low quality bases.
+[FastQC](https://cutadapt.readthedocs.io/en/stable/) is used to generate detailed quality statistics.
+
+## Input
+The input for this module is one or more pairs of FastQ files per sample, specified in a PEP configuration file, as is shown [here](../test/pep/chrM-trio-subsamples.csv).
+
+## Output
+The output of this module are one set of merged FastQ files per sample, as well as a JSON file with statistics.
+
+## configuration
+The only configurable option for this module is adapter sequences for
+[cutadapt](https://cutadapt.readthedocs.io/en/stable/) to remove.
+
+| Option            | description                  | required |
+| ---------------   | ---------------------------- | -------- |
+| `forward_adapter` | The forward adapter sequence | yes      |
+| `reverse_adapter` | The reverse adapter sequence | yes      |
diff --git a/docs/snv-indels.md b/docs/snv-indels.md
@@ -0,0 +1,47 @@
+# snv-indels
+
+The `snv-indels` module is responsible for aligning the reads to the reference, and calling SNVs and insertions/deletion.
+
+## Tools
+This module uses [STAR](https://github.com/alexdobin/STAR) to align the reads to the reference using twopass mode.[VarDict](https://github.com/AstraZeneca-NGS/VarDictJava) is used to call variants, which are annotated using [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html).
+For each variant, this module determines if it is located inside one of the defined `bed_variant_hotspots`.
+
+The variants annotated by VEP are then filtered based on a number of different criteria:
+1. Variants that are present on the `blacklist` are excluded.
+2. Only variants that are present on one of the specified transcripts in
+   `ref_id_mapping` are included.
+3. Only variants that match one of the consequences defined in
+   `vep_include_consequence` are included.
+4. Variant that have a population frequency of more than 1% in the `gnomADe`
+   population are excluded.
+
+Picard is used to generate various alignment statistics.
+
+## Input
+The input for this module is a single pair of FastQ files per sample, specified in a PEP configuration file, as is shown [here](../test/pep/targetted.csv).
+
+## Output
+The output of this module are a JSON file with an overview of the most important results, as well as a number of other output files:
+- A .bam and .bai per sample, which contain the aligned reads.
+- A VEP output file (`vep_high`), which contains the final set of filtered variants.
+- A VEP output file (`vep_target`), which contains the variants on the transcripts of interest. These variants have not been filtered on `vep_include_consequence` terms.
+- A VCF file that only contains those variants that fall in one of the `bed_variant_hotspots` regions.
+
+## configuration
+
+| Option                      | description                             | required |
+| --------------------------- | --------------------------------------- | -------- |
+| `forward_adapter`           | The forward adapter sequence            | yes      |
+| `reverse_adapter`           | The reverse adapter sequence            | yes      |
+| `genome_fasta`              | Reference genome, in FASTA format       | yes      |
+| `genome_fai`                | .fai index for the reference            | yes      |
+| `genome_dict`               | .dict index for the reference           | yes      |
+| `star_index`                | STAR index database                     | yes      |
+| `ref_id_mapping`            | File of transcripts of interest         | yes      |
+| `rrna_refflat`              | File of rRNA transcripts                | yes      |
+| `bed_variant_hotspots`      | BED file of hotspot regions             | yes      |
+| `bed_variant_call_regions`  | BED file of regions to call variants    | yes      |
+| `gtf`                       | GTF file with transcripts, used by STAR | yes      |
+| `annotation_refflat`        | File used to determine exon coverage    | yes      |
+| `blacklist`                 | File of blacklisted variants            | yes      |      
+| `vep_include_consequence`   | List of [VEP consequences](http://www.ensembl.org/info/genome/variation/prediction/predicted_data.html) to include   | yes      |
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -0,0 +1,35 @@
+# Configuration file for the Sphinx documentation builder.
+
+# -- Project information
+
+project = 'HAMLET'
+copyright = '2018, LUMC'
+author = 'Wibowo Arindrarto, Redmar van den Berg'
+
+release = '2.0'
+version = '2.0.0'
+
+# -- General configuration
+
+extensions = [
+    'sphinx.ext.duration',
+    'sphinx.ext.autodoc',
+    'sphinx.ext.autosummary',
+    'sphinx.ext.intersphinx',
+]
+
+intersphinx_mapping = {
+    'python': ('https://docs.python.org/3/', None),
+    'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
+}
+intersphinx_disabled_domains = ['std']
+
+templates_path = ['_templates']
+master_doc = 'index'
+
+# -- Options for HTML output
+
+html_theme = 'sphinx_rtd_theme'
+
+# -- Options for EPUB output
+epub_show_urls = 'footnote'
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -0,0 +1,5 @@
+Welcome to the documentation for HAMLET
+================================================
+
+This is currently a placeholder. You can see the  full documentation
+on the `HAMLET github page<https://github.com/LUMC/HAMLET/tree/d75f27ef249b1018fa3a2ad8c513bd8fecf3592b>`.
diff --git a/test/test_docs.yml b/test/test_docs.yml
@@ -0,0 +1,8 @@
+# Test generating the report in html format
+- name: test-docs
+  tags:
+    - hamlet
+    - docs
+  command: make -C docs/ html
+  files:
+    - path: docs/build/html/genindex.html