From 4029d4a3aa2e1357aa4e3df615fdb93c5e15c53a Mon Sep 17 00:00:00 2001 From: Kurt Showmaker <36570582+kurtshowmaker@users.noreply.github.com> Date: Mon, 11 Mar 2024 09:44:15 -0500 Subject: [PATCH] minor updates --- config.yaml | 25 +- episodes/00-introduction.md | 36 + episodes/01-intro-to-computers.md | 55 + episodes/02-singularity-commands.md | 39 + episodes/03-singularity-pull-hello-world.md | 408 ++ episodes/04-repos-and-registries.md | 350 ++ episodes/05-singularity-build.md | 96 + episodes/06-bioinformatics-qc.md | 264 + episodes/07-bioinformatics-bwa.md | 112 + episodes/08-circos.md | 91 + .../09-singularity-build-blast-example.md | 301 + episodes/10-singularity_build_rstudio.md | 195 + episodes/fig/01_computer_parts.svg | 1833 ++++++ episodes/fig/circos_1_1.svg | 65 + episodes/fig/circos_8_11.svg | 1810 ++++++ episodes/fig/circos_8_6.svg | 4942 +++++++++++++++++ episodes/introduction.md | 114 - 17 files changed, 10615 insertions(+), 121 deletions(-) create mode 100644 episodes/00-introduction.md create mode 100644 episodes/01-intro-to-computers.md create mode 100644 episodes/02-singularity-commands.md create mode 100644 episodes/03-singularity-pull-hello-world.md create mode 100644 episodes/04-repos-and-registries.md create mode 100644 episodes/05-singularity-build.md create mode 100644 episodes/06-bioinformatics-qc.md create mode 100644 episodes/07-bioinformatics-bwa.md create mode 100644 episodes/08-circos.md create mode 100644 episodes/09-singularity-build-blast-example.md create mode 100644 episodes/10-singularity_build_rstudio.md create mode 100644 episodes/fig/01_computer_parts.svg create mode 100644 episodes/fig/circos_1_1.svg create mode 100644 episodes/fig/circos_8_11.svg create mode 100644 episodes/fig/circos_8_6.svg delete mode 100644 episodes/introduction.md diff --git a/config.yaml b/config.yaml index b3bfd79..9df26d5 100644 --- a/config.yaml +++ b/config.yaml @@ -8,32 +8,32 @@ # lc: Library Carpentry # cp: Carpentries (to use for instructor training for instance) # incubator: The Carpentries Incubator -carpentry: 'incubator' +carpentry: 'none' # Overall title for pages. -title: 'Lesson Title' # FIXME +title: 'Singularity Workshop 2023' # FIXME # Date the lesson was created (YYYY-MM-DD, this is empty by default) created: ~ # FIXME # Comma-separated list of keywords for the lesson -keywords: 'software, data, lesson, The Carpentries' # FIXME +keywords: 'software, data, lesson' # FIXME # Life cycle stage of the lesson # possible values: pre-alpha, alpha, beta, stable -life_cycle: 'pre-alpha' # FIXME +life_cycle: 'none' # FIXME # License of the lesson license: 'CC-BY 4.0' # Link to the source repository for this lesson -source: 'https://github.com/carpentries/workbench-template-md' # FIXME +source: 'https://github.com/TheJacksonLaboratory/singularity-workshop-2023' # FIXME # Default branch of your lesson branch: 'main' # Who to contact if there are any issues -contact: 'team@carpentries.org' # FIXME +contact: 'kurt.showmaker@jax.org' # FIXME # Navigation ------------------------------------------------ # @@ -59,7 +59,18 @@ contact: 'team@carpentries.org' # FIXME # Order of episodes in your lesson episodes: -- introduction.md +- 00-introduction.md +- 01-intro-to-computers.md +- 02-singularity-commands.md +- 03-singularity-pull-hello-world.md +- 04-repos-and-registries.md +- 05-singularity-build.md +- 06-bioinformatics-qc.md +- 07-bioinformatics-bwa.md +- 08-circos.md +- 09-singularity-build-blast-example.md +- 10-singularity_build_rstudio.md + # Information for Learners learners: diff --git a/episodes/00-introduction.md b/episodes/00-introduction.md new file mode 100644 index 0000000..43a7652 --- /dev/null +++ b/episodes/00-introduction.md @@ -0,0 +1,36 @@ +--- +title: "00-introduction" +teaching: 10 +exercises: 2 +--- + +## Introduction + +This workshop is designed for a beginner with little to now + +By the end of this workshop you will have: + +- Created 16 contianers. + +- Executed 12+ commands with containers. + +- Build 2 custom containers. + +- Build 2 websites and viewed them with containers. + +- Run the most common singularity commands (pull, build, exec) + +- Run the exec command command with bind mounts to access data. + +- Encounter 3 gotchas and discuss solutions to deal with them. + + - Gotcha 1: Repositories are managed by other users. + - Solution 1: Keep good documentiaton on how your software was built. + + - Gotcha 2: Cant mount the current working directory. + - Solution 2: Try the **-B $PWD** flag. + + - Gotcha 3: Cant mount the the directory with the data. + - Solution 3: Try the **-B :** flag. + + \ No newline at end of file diff --git a/episodes/01-intro-to-computers.md b/episodes/01-intro-to-computers.md new file mode 100644 index 0000000..149ca89 --- /dev/null +++ b/episodes/01-intro-to-computers.md @@ -0,0 +1,55 @@ +--- +title: "01-intro-to-computers" +teaching: 30 +exercises: 0 +--- + +![parts example](episodes/fig/01_computer_parts.svg){alt='A collection of computer parts including disks, ram, cpu'} + +## Computer Component Review + + - **CPU:** CPUs are the data process unit, they are composed of multiple cores. For legacy reasons software often refers the number of cores as the number of CPUs, so yeah that is confusing. + + - **RAM (a.k.a MEMORY):** RAM is fast digital storage. Most programs utilize RAM for access to data needed more than once. RAM is generally non-persistent when the powered off RAM memory is lost. + + - **DISK:** Disk is persistent digital storage that is not as fast as RAM. Disk storage can be made up of one or more disks such as hard drives (HDD) and/or Solid State Harddrives (SSD). Multiple disk can be configured together for increased performance and drive failure protection. + + - **NETWORKING:** Switches and network access cards within computers allow for computers to be networked together. + + - **GPU:** A Graphics Processing Unit (GPU) is a computer component that is capable of rendering graphics, these are also useful for conducting certain mathematical calculations. + +## Consumer Computer vs Servers vs HPC vs Sumhpc + +| Component | Home/Busines Computer | Server | Typical Individual Node in HPC | Typical Total HPC System | Individual Node on Sumhpc | Total Sumhpc System | +|-----------|-----------------------|------------|-------------------------------|--------------------------|---------------------------|---------------------| +| CPU (cores)| 4 - 8 | 12 - 128 | 32 - 128 | 1000s | 70\* | 7,000 | +| RAM(GB) | 8 -16 | 64 - 960 | 240 - 3000 | 64,000 | 754 - 3TB | 76.8 TB| +| DISK (TB)| .5 - 1 TB | 8 - 100 | None - 1 TB | 100s (Networked) | NA | 2.7 PB | +| Networking (Gbe)| .1 - 1 | 1 - 10 | 40 - 100 | 40 - 100 | 40 | 40 + | + + +## Computer Ports + +A port is a communication endpoint. + +## Introduction to OS, Virtual Machines and Containers + + +Why can't I use Docker? +Docker images are not secure because they allow users to gain root access to the compute nodes. Singularity effectively runs as the user running the command and does not result in elevated access. Also, docker interacts with the slurm job scheduler in a way that causes resource requests and usages to not match up, making it difficult to keep job queueing fair for all users. In that the clusters are multi-user systems, we want to make sure people can work without worry that others are accessing their data or unfairly using up resources. + +## Important notes on how they relate to singularity + +-CPU There are 2 common CPU architectures in modern systems x86_64 and ARM. Singularity containers are architecture specific. +```--arch string architecture to pull from library (default "amd64")``` + +### citations + +Lesson adapted from: + +https://github.com/TheJacksonLaboratory/intro-to-hpc + + +https://crc.pitt.edu/singularity + + diff --git a/episodes/02-singularity-commands.md b/episodes/02-singularity-commands.md new file mode 100644 index 0000000..0a0631a --- /dev/null +++ b/episodes/02-singularity-commands.md @@ -0,0 +1,39 @@ +--- +title: "02-singularity-commands" +teaching: 30 +exercises: 0 +--- + + +### Fequently used singularity commands + +- **singularity pull** + +- **singularity shell** + +- **singularity cache** + +- **singularity build** + +- **singularity exec** + +**These hands on tutorial are being conducted on a base Centos7 OS VM with the following programs installed.** + +- apptainer (including the alias singularity) + +- nano + +- wget + +- unzip + +**The following ports have been made available to the system:** + +- 8080,8787,8789 + +**The following folders have been setup on the remote system:** + +- /projects/my-lab +- /flashscratch +- /workshop_data + diff --git a/episodes/03-singularity-pull-hello-world.md b/episodes/03-singularity-pull-hello-world.md new file mode 100644 index 0000000..5b94a1c --- /dev/null +++ b/episodes/03-singularity-pull-hello-world.md @@ -0,0 +1,408 @@ +--- +title: "03-singularity-pull-hello-world" +teaching: 30 +exercises: 0 +--- + +### The Singularity Pull command + +The most basic command: +```singularity pull ``` + +Can specify a new container (*.sif) name: +```singularity pull ``` + +Example (dont run yet): +```singularity pull docker://rocker/tidyverse:4.2.1 ``` + +Example making a new name (dont run yet): +```singularity pull awesome_container.sif docker://rocker/tidyverse:4.2.1 ``` + +### Tags + +Singularity and docker uses tags, these can be used to access specific versions or distributions of the softwares. + +Of note the images and tags can change inside the repo, so for complete repoducibility purposes you may want to retain the software or build from source. + +### Singularity can pull from 5 types of URIs + +- **library** : Pull an image from the currently configured library. See here for configuring a libary. +``` library://user/collection/container:tag ``` + +- **docker** : Pull a Docker/OCI image from Docker Hub, or another OCI registry. OCI stands for open container registry. +``` docker://user/image:tag ``` + +- **shub** : Pull an image from Singularity Hub +``` shub://user/image:tag ``` + +- **oras** : Pull a SIF image from an OCI registry that supports ORAS. GCP artifcact registry supports. +``` oras://registry/namespace/image:tag ``` + +- **http, https** : Pull an image using the http(s?) protocol +``` https://library.sylabs.io/v1/imagefile/library/default/alpine:latest ``` + + +### A note on security and saftey + +Only pull from **trusted sources**. Generally sepeaking containers provided by trusted orginizations or the developer are ideal sources. + +Dockerhub shows how many pulls each container had had as well as Dockerhub provided tags: *Docker Official Image*, *Verified Publisher*, and *Sponsored OSS* + +Lets take look a some dockerhub docker images sources that we will be using today. + +- **staphb** +``` https://hub.docker.com/search?q=staphb&image_filter=open_source ``` + +- **biocontainers** +``` https://hub.docker.com/r/biocontainers/biocontainers ``` + + +Now lets look at a non-standard example of a program called Busco that can be used for Phylogenetic analysis as well as assessing genomic data quality (assemblies). + +The home page: +``` https://busco.ezlab.org/ ``` + +The User Guide: +``` https://busco.ezlab.org/busco_userguide.html ``` + +Here we see where to access the docker image and the directions to run it with docker (but not sinuglarity can we get it to work? we will see later). +``` https://busco.ezlab.org/busco_userguide.html#docker-image ``` + + +### Hello World Example +### Verify R in not on system + +Try to start *R* on they command line by typing *R*. + +```bash +R --help +``` + +We get an error because *R* is not available. + +```output +-bash: R: command not found +``` + + + +### Lets pull a R containter from Docker Hub + + +```bash +singularity pull docker://rocker/tidyverse:4.2.1 +``` + + +Singularity builds the image. Singularity copies the OCI blobs/layers to the local cache then builds the image (SIF file). + +```output +INFO: Converting OCI blobs to SIF format +INFO: Starting build... +Getting image source signatures +Copying blob 7917df3ef3d8 done +Copying blob 270b4100b33a done +Copying blob 56e0351b9876 done +Copying blob 2faad9a83b09 done +Copying blob 81c9ee1c97bb done +Copying blob d518d22d5d29 done +Copying blob 27c3a6114c0b done +Copying blob 58e0d5c15b4e done +Copying blob 3ccbc1cfa6d1 done +Copying config 1bec811255 done +Writing manifest to image destination +Storing signatures +2023/07/31 02:43:50 info unpack layer: sha256:56e0351b98767487b3c411034be95479ed1710bb6be860db6df0be3a98653027 +2023/07/31 02:43:50 info unpack layer: sha256:270b4100b33a95ddd4b4e0d4cce9c4a262eaf5043a4d6a33a82fc71224e7f857 +2023/07/31 02:43:50 info unpack layer: sha256:2faad9a83b09e8155e7084ed53957d556333d8c78dbd66288dda084362d9a8a0 +2023/07/31 02:43:56 info unpack layer: sha256:d518d22d5d29e561be6588568fd73aff10b6e658a3a3a9e8e98c0470e1b21a8a +2023/07/31 02:43:56 info unpack layer: sha256:81c9ee1c97bb79e966a4ea76644eb05ebc6b72f67dfdccb9e8f4bce3190cdd0a +2023/07/31 02:43:57 info unpack layer: sha256:7917df3ef3d8605361342bc11f7d527ebb4fea3f95704bb6b72e6a4f043faa6d +2023/07/31 02:44:11 info unpack layer: sha256:27c3a6114c0bacba4ceb4e0523ee67bfcc5bec7f7824247b6578cdcb629f4978 +2023/07/31 02:44:11 info unpack layer: sha256:58e0d5c15b4e6c88ede882864475388b1479a3d81c1b4060aeb919a3a3b5f322 +2023/07/31 02:44:11 info unpack layer: sha256:3ccbc1cfa6d1cbc33689c9e7c2ebcafcb0af4f895b38c84363f57417e6fbb7cb +INFO: Creating SIF file... +``` + +Note when pulling the image it downloads each layer, storing them in the **cache** and stiches them together in the singularity image file. +The singularity image is immutable. + +### Use ls to view the new file + +```bash +ls -lF +``` + +```output +total 333280 +-rw-rw-r-- 1 student student 0 Jul 30 22:43 test_scp.txt +-rwxrwxr-x 1 student student 324775936 Apr 1 01:09 tidyverse_4.2.1.sif* +``` + + +### Use ls -lFa to see the hidden files. + +```bash +ls -lFa +``` + +We can see the hidden directory **.apptainer** that contains the cache. + +```output +total 692628 +drwxr-xr-x 4 student student 155 Jul 31 02:45 ./ +drwx------ 3 student student 19 Jul 31 02:43 .apptainer/ +-rw-r--r-- 1 student student 18 Nov 24 2021 .bash_logout +-rw-r--r-- 1 student student 193 Nov 24 2021 .bash_profile +-rw-r--r-- 1 student student 231 Nov 24 2021 .bashrc +drwx------ 3 student student 19 Jul 31 02:43 .local/ +-rw-r--r-- 1 student root 38 Jul 31 02:34 test_scp.txt +-rwxrwxr-x 1 student student 709230592 Jul 31 02:45 tidyverse_4.2.1.sif* +-rw-r--r-- 1 student student 658 Apr 7 2020 .zshrc +``` + + +```bash +ls -lF .apptainer/ +``` + +```output +cache +``` + +```bash +ls -lF .apptainer/cache/blob/blobs/sha256/ +``` + +```output +-rw-r--r-- 1 student student 7763 Jul 31 02:43 1bec8112559e0494f45c74ee43af6d28b117a6faa7ff8e3aaefea9e741aedc47 +-rw-r--r-- 1 student student 1807 Jul 31 02:43 270b4100b33a95ddd4b4e0d4cce9c4a262eaf5043a4d6a33a82fc71224e7f857 +-rw-r--r-- 1 student student 27244 Jul 31 02:43 27c3a6114c0bacba4ceb4e0523ee67bfcc5bec7f7824247b6578cdcb629f4978 +-rw-r--r-- 1 student student 250108813 Jul 31 02:43 2faad9a83b09e8155e7084ed53957d556333d8c78dbd66288dda084362d9a8a0 +-rw-r--r-- 1 student student 164652314 Jul 31 02:43 3ccbc1cfa6d1cbc33689c9e7c2ebcafcb0af4f895b38c84363f57417e6fbb7cb +-rw-r--r-- 1 student student 27506421 Jul 31 02:43 56e0351b98767487b3c411034be95479ed1710bb6be860db6df0be3a98653027 +-rw-r--r-- 1 student student 54081 Jul 31 02:43 58e0d5c15b4e6c88ede882864475388b1479a3d81c1b4060aeb919a3a3b5f322 +-rw-r--r-- 1 student student 243220058 Jul 31 02:43 7917df3ef3d8605361342bc11f7d527ebb4fea3f95704bb6b72e6a4f043faa6d +-rw-r--r-- 1 student student 37945014 Jul 31 02:43 81c9ee1c97bb79e966a4ea76644eb05ebc6b72f67dfdccb9e8f4bce3190cdd0a +-rw-r--r-- 1 student student 2016 Jul 31 02:43 ae0677065e80ef796cdadccfbfb18370b194bc057bec8200d2dbe6b173048935 +-rw-r--r-- 1 student student 22169 Jul 31 02:43 d518d22d5d29e561be6588568fd73aff10b6e658a3a3a9e8e98c0470e1b21a8a +``` + +```bash +singularity cache clean +``` + +```bash +ls -lF .apptainer/cache/blob/blobs/sha256/ +``` + +### Run the R help command + +```bash +singularity exec tidyverse_4.2.1.sif R --help +``` + + +It works now you can see the R program output. + +```output +Usage: R [options] [< infile] [> outfile] + or: R CMD command [arguments] + +Start R, a system for statistical computation and graphics, with the +specified options, or invoke an R tool via the 'R CMD' interface. + +Options: + -h, --help Print short help message and exit + --version Print version info and exit + --encoding=ENC Specify encoding to be used for stdin + --encoding ENC + RHOME Print path to R home directory and exit + --save Do save workspace at the end of the session + --no-save Don't save it + --no-environ Don't read the site and user environment files + --no-site-file Don't read the site-wide Rprofile + --no-init-file Don't read the user R profile + --restore Do restore previously saved objects at startup + --no-restore-data Don't restore previously saved objects + --no-restore-history Don't restore the R history file + --no-restore Don't restore anything + --vanilla Combine --no-save, --no-restore, --no-site-file, + --no-init-file and --no-environ + --no-readline Don't use readline for command-line editing + --max-ppsize=N Set max size of protect stack to N + --min-nsize=N Set min number of fixed size obj's ("cons cells") to N + --min-vsize=N Set vector heap minimum to N bytes; '4M' = 4 MegaB + -q, --quiet Don't print startup message + --silent Same as --quiet + -s, --no-echo Make R run as quietly as possible + --interactive Force an interactive session + --verbose Print more information about progress + -d, --debugger=NAME Run R through debugger NAME + --debugger-args=ARGS Pass ARGS as arguments to the debugger + -g TYPE, --gui=TYPE Use TYPE as GUI; possible values are 'X11' (default) + and 'Tk'. + --arch=NAME Specify a sub-architecture + --args Skip the rest of the command line + -f FILE, --file=FILE Take input from 'FILE' + -e EXPR Execute 'EXPR' and exit + +FILE may contain spaces but not shell metacharacters. + +Commands: + BATCH Run R in batch mode + COMPILE Compile files for use with R + SHLIB Build shared library for dynamic loading + INSTALL Install add-on packages + REMOVE Remove add-on packages + build Build add-on packages + check Check add-on packages + LINK Front-end for creating executable programs + Rprof Post-process R profiling files + Rdconv Convert Rd format to various other formats + Rd2pdf Convert Rd format to PDF + Rd2txt Convert Rd format to pretty text + Stangle Extract S/R code from Sweave documentation + Sweave Process Sweave documentation + Rdiff Diff R output ignoring headers etc + config Obtain configuration information about R + javareconf Update the Java configuration variables + rtags Create Emacs-style tag files from C, R, and Rd files + +Please use 'R CMD command --help' to obtain further information about +the usage of 'command'. + +Options --arch, --no-environ, --no-init-file, --no-site-file and --vanilla +can be placed between R and CMD, to apply to R processes run by 'command' + +Report bugs at . +``` + + +### Run the Rscript help command + +```bash +singularity exec tidyverse_4.2.1.sif Rscript --help +``` + +It works now you can see the Rscript program output. + +```output +Usage: Rscript [options] file [args] + or: Rscript [options] -e expr [-e expr2 ...] [args] +A binary front-end to R, for use in scripting applications. + +Options: + --help Print usage and exit + --version Print version and exit + --verbose Print information on progress + --default-packages=LIST Attach these packages on startup; + a comma-separated LIST of package names, or 'NULL' +and options to R (in addition to --no-echo --no-restore), for example: + --save Do save workspace at the end of the session + --no-environ Don't read the site and user environment files + --no-site-file Don't read the site-wide Rprofile + --no-init-file Don't read the user R profile + --restore Do restore previously saved objects at startup + --vanilla Combine --no-save, --no-restore, --no-site-file, + --no-init-file and --no-environ + +Expressions (one or more '-e ') may be used *instead* of 'file'. +Any additional 'args' can be accessed from R via 'commandArgs(TRUE)'. +See also ?Rscript from within R. + +``` + +Lets go inside of the container and see what we see. + +First lets verify that we can see other directories. +```bash +ls /projects/my-lab +``` + + +```bash +singularity shell tidyverse_4.2.1.sif +``` +Notice the prompt changed to **Apptainer** + +```bash +ls -lFa +``` + +Same outout as before. + +```output +total 692628 +drwxr-xr-x 4 student student 155 Jul 31 02:45 ./ +drwxr-xr-x 4 student student 80 Jul 31 02:58 ../ +drwx------ 3 student student 19 Jul 31 02:43 .apptainer/ +-rw-r--r-- 1 student student 18 Nov 24 2021 .bash_logout +-rw-r--r-- 1 student student 193 Nov 24 2021 .bash_profile +-rw-r--r-- 1 student student 231 Nov 24 2021 .bashrc +drwx------ 3 student student 19 Jul 31 02:43 .local/ +-rw-r--r-- 1 student student 38 Jul 31 02:34 test_scp.txt +-rwxrwxr-x 1 student student 709230592 Jul 31 02:45 tidyverse_4.2.1.sif* +-rw-r--r-- 1 student student 658 Apr 7 2020 .zshrc +``` + +Try to view the projects directory. +```bash +ls /projects/my-lab +``` + +We get an error because the container can not see the directory. +By default the container only mounts the *home* and current *working* directory. +If you need to access something outside these directories you’ll need to use a bind mount (more on this later). + +```output +ls: cannot access '/projects/my-lab': No such file or directory +``` + +Lets exit the container +```bash +exit +``` + +View the container os + +```bash +singularity exec tidyverse_4.2.1.sif grep -E '^(VERSION|NAME)=' /etc/os-release +``` + +Lets use the inspect command to see details about the container +```bash +singularity inspect tidyverse_4.2.1.sif +``` + +We get some several detais about this contianer. Lets see if we can find out more about this container build by researching the source link. + +```output +org.label-schema.build-arch: amd64 +org.label-schema.build-date: Monday_31_July_2023_2:44:16_UTC +org.label-schema.schema-version: 1.0 +org.label-schema.usage.apptainer.version: 1.1.9-1.el7 +org.label-schema.usage.singularity.deffile.bootstrap: docker +org.label-schema.usage.singularity.deffile.from: rocker/tidyverse:4.2.1 +org.opencontainers.image.authors: Carl Boettiger +org.opencontainers.image.base.name: docker.io/rocker/rstudio:4.2.1 +org.opencontainers.image.description: Version-stable build of R, RStudio Server, and R packages. +org.opencontainers.image.licenses: GPL-2.0-or-later +org.opencontainers.image.ref.name: ubuntu +org.opencontainers.image.revision: ef593dcd7b334e02e79188a9e17dcf6149c178b9 +org.opencontainers.image.source: https://github.com/rocker-org/rocker-versioned2 +org.opencontainers.image.title: rocker/tidyverse +org.opencontainers.image.vendor: Rocker Project +org.opencontainers.image.version: R-4.2.1 +``` + +The Rocker github provided a lot of detail about the builds. + +### Citations and Notes + +https://docs.sylabs.io/guides/3.7/user-guide/cli/singularity_pull.html + + + + + diff --git a/episodes/04-repos-and-registries.md b/episodes/04-repos-and-registries.md new file mode 100644 index 0000000..0956a50 --- /dev/null +++ b/episodes/04-repos-and-registries.md @@ -0,0 +1,350 @@ +--- +title: "04-repos-and-registries" +teaching: 30 +exercises: 0 +--- + +### Building options + +- Build from a repositor with the build or pull command. Works fine for things that are ready to go + +- Use a remote builder Sylabs/GCP Builder. + +- Use a system with sudo, for instance a cloud VM like we are using today. + + +### Review the remote builder directions + +We will omit Sylabs remote builder and repositories in this tutorial since it requiress an external account creation. + +A cloud instance with a few programs (apptainer, nano, wget, unzip,nvidia-smi) are a good alternative to remote builders. + +https://docs.sylabs.io/guides/3.2/user-guide/cloud_library.html + + +Sylabs also have several YouTube Videos on using Singularity: + +The 5 part Singularity Container Workflow Demo is a good place to start. + +Part 1: +https://www.youtube.com/watch?v=nQTMJ9hqKNI&list=PL052H4iYGzyvdZ8VS-omTzj1FKMjdXzfB&index=1 +Part 2: +https://www.youtube.com/watch?v=23KOlEouAiI&list=PL052H4iYGzyvdZ8VS-omTzj1FKMjdXzfB&index=2 +Part 3: +https://www.youtube.com/watch?v=I5M6er06lT0&list=PL052H4iYGzyvdZ8VS-omTzj1FKMjdXzfB&index=3 +Part 4: +https://www.youtube.com/watch?v=eb8vFmYLNTg&list=PL052H4iYGzyvdZ8VS-omTzj1FKMjdXzfB&index=4 +Part 5: +https://www.youtube.com/watch?v=CFxngpNl1nU&list=PL052H4iYGzyvdZ8VS-omTzj1FKMjdXzfB&index=5 + + +### Some note worthy registries and sources of software + +- Docker Hub + - https://hub.docker.com/ + +- Singularity Hub (Singularity Hub is no longer online as a builder service, but containers built before April 19, 2021 are available). + +- Galaxy several Nextflow nf-core containers are pulled from the galaxy project. + - example: https://github.com/nf-core/rnaseq/blob/3.12.0/modules/nf-core/fastqc/main.nf + +- Sylabs + - https://cloud.sylabs.io/ + - Sylabs also has a remote builder. + +### Lets pull some bioinformatic software from the registries and see what we can get. + + +```bash +cd /projects/my-lab/04-pull +``` + +Check and see what is in the directory. +```bash +ls -lF +``` + +Nothing, just an empty directory. +```output +total 0 +``` + +Lets look at the busco pull command from earlier, dont execute it. +```text +docker pull ezlabgva/busco:v5.4.7_cv1 +``` + +Modify for singularity and pull it down. +Maybe put docker, colon, forward slash,forward slash to a jingle as we will be using it often. + +```docker://``` + +```bash +singularity pull docker://ezlabgva/busco:v5.4.7_cv1 +``` + +```output +INFO: Converting OCI blobs to SIF format +INFO: Starting build... +Getting image source signatures +Copying blob 700f250e37eb done +Copying blob da52c665ae6a done +Copying blob 230a319b6d10 done +Copying blob f7ec5a41d630 done +Copying blob a3ed95caeb02 done +Copying blob bffdb47af6a2 done +Copying blob 519cab61f8f7 done +Copying blob 3b07ee5f9c53 done +Copying config edd7eca642 done +Writing manifest to image destination +Storing signatures +2023/07/31 03:16:00 info unpack layer: sha256:f7ec5a41d630a33a2d1db59b95d89d93de7ae5a619a3a8571b78457e48266eba +2023/07/31 03:16:01 info unpack layer: sha256:da52c665ae6a3c231308941f65380e35950ef6c10aca2d47181c8ebf4915f6f1 +2023/07/31 03:16:01 info unpack layer: sha256:bffdb47af6a29dd80fefdab2010f1c359c84e20797d0e22385589287bd992ace +2023/07/31 03:16:01 info unpack layer: sha256:230a319b6d10d18fb27a90920929d39624ae864f9d1b1ce2b82f579b084dcd94 +2023/07/31 03:16:01 info unpack layer: sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4 +2023/07/31 03:16:01 info unpack layer: sha256:700f250e37ebd4b22828d661f4a53537cd504b8d09d843bc1cbf01d36f622d3e +2023/07/31 03:16:31 info unpack layer: sha256:519cab61f8f7703cf31e02e406e11571d9432e3c3abbcd57ed5ea01b20a68199 +2023/07/31 03:16:31 info unpack layer: sha256:3b07ee5f9c539bdb444c78a97e1578d7c10b0dc1a8f9b6c89de29ff1d367bdb4 +INFO: Creating SIF file... +``` + +Lets see what happened. +```bash +ls -lFa +``` + +We see a busco sif file but nothing else. The cache is in the home directory. + +```output +ls -alF +total 802912 +drwxr-xr-x 2 student root 34 Jul 31 03:17 ./ +drwxr-xr-x 11 student root 165 Jul 31 02:34 ../ +-rwxrwxr-x 1 student student 822181888 Jul 31 03:17 busco_v5.4.7_cv1.sif* +``` + +Lets see if it works by pulling down a couple of Staphylococcus aureus bacteria proteomes. +```bash +wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/013/425/GCF_000013425.1_ASM1342v1/GCF_000013425.1_ASM1342v1_protein.faa.gz +wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/264/815/GCF_003264815.1_ASM326481v1/GCF_003264815.1_ASM326481v1_protein.faa.gz + +gunzip ./GCF_000013425.1_ASM1342v1_protein.faa.gz ./GCF_003264815.1_ASM326481v1_protein.faa.gz +``` + +Lets run the busco command. + +```bash +singularity exec ./busco_v5.4.7_cv1.sif busco -i GCF_000013425.1_ASM1342v1_protein.faa -l bacteria_odb10 -o busco_out_GCF_000013425 -m protein +``` + +Oh no, we got an error, its ok there is an easy fix. +We can add the **--bind $PWD** to specifically request the current directory to be mounted. + +```bash +singularity exec --bind $PWD ./busco_v5.4.7_cv1.sif busco -i GCF_000013425.1_ASM1342v1_protein.faa -l bacteria_odb10 -o busco_out_GCF_000013425 -m protein +``` + +```bash +singularity exec --bind $PWD ./busco_v5.4.7_cv1.sif busco -i GCF_003264815.1_ASM326481v1_protein.faa -l bacteria_odb10 -o busco_out_GCF_003264815 -m protein +``` + +Now lets download some more tools. +What was that jingle again ... + +```bash +singularity pull docker://staphb/bwa:0.7.17 +singularity pull docker://staphb/samtools:1.17-2023-06 +singularity pull docker://staphb/bedtools:2.31.0 +singularity pull docker://staphb/blast:2.14.0 +singularity pull docker://staphb/bcftools:1.17 +singularity pull docker://staphb/ncbi-datasets +singularity pull docker://biocontainers/vcftools:v0.1.16-1-deb_cv1 +singularity pull docker://biocontainers/bedops:v2.4.35dfsg-1-deb_cv1 +``` + +Lets see what we pulled down. +```bash +ls -lh -1 *sif +``` + +Thats alot of software ready to go. +We see a wide range in sizes for the various software. + +```output +-rwxrwxr-x 1 student student 119M Jul 31 07:53 bcftools_1.17.sif +-rwxrwxr-x 1 student student 59M Jul 31 07:53 bedops_v2.4.35dfsg-1-deb_cv1.sif +-rwxrwxr-x 1 student student 44M Jul 31 07:52 bedtools_2.31.0.sif +-rwxrwxr-x 1 student student 265M Jul 31 07:53 blast_2.14.0.sif +-rwxrwxr-x 1 student student 785M Jul 31 07:48 busco_v5.4.7_cv1.sif +-rwxrwxr-x 1 student student 76M Jul 31 07:52 bwa_0.7.17.sif +-rwxrwxr-x 1 student student 43M Jul 31 07:53 ncbi-datasets_latest.sif +-rwxrwxr-x 1 student student 44M Jul 31 07:52 samtools_1.17-2023-06.sif +-rwxrwxr-x 1 student student 61M Jul 31 07:53 vcftools_v0.1.16-1-deb_cv1.sif +``` + +Lets see what they used for the base images. + + +```bash +singularity exec bcftools_1.17.sif grep -E '^(VERSION|NAME)=' /etc/os-release +singularity exec bedops_v2.4.35dfsg-1-deb_cv1.sif grep -E '^(VERSION|NAME)=' /etc/os-release +singularity exec bedtools_2.31.0.sif grep -E '^(VERSION|NAME)=' /etc/os-release +singularity exec blast_2.14.0.sif grep -E '^(VERSION|NAME)=' /etc/os-release +singularity exec busco_v5.4.7_cv1.sif grep -E '^(VERSION|NAME)=' /etc/os-release +singularity exec bwa_0.7.17.sif grep -E '^(VERSION|NAME)=' /etc/os-release +singularity exec ncbi-datasets_latest.sif grep -E '^(VERSION|NAME)=' /etc/os-release +singularity exec samtools_1.17-2023-06.sif grep -E '^(VERSION|NAME)=' /etc/os-release +singularity exec vcftools_v0.1.16-1-deb_cv1.sif grep -E '^(VERSION|NAME)=' /etc/os-release +``` + +We see several different Ubuntu release and on Debian. + +```output +[student@edu-vm-bdafb86b-1 04-pull]$ singularity exec bcftools_1.17.sif grep -E '^(VERSION|NAME)=' /etc/os-release +INFO: underlay of /etc/localtime required more than 50 (75) bind mounts +NAME="Ubuntu" +VERSION="20.04.5 LTS (Focal Fossa)" +[student@edu-vm-bdafb86b-1 04-pull]$ singularity exec bedops_v2.4.35dfsg-1-deb_cv1.sif grep -E '^(VERSION|NAME)=' /etc/os-release +NAME="Debian GNU/Linux" +VERSION="10 (buster)" +[student@edu-vm-bdafb86b-1 04-pull]$ singularity exec bedtools_2.31.0.sif grep -E '^(VERSION|NAME)=' /etc/os-release +INFO: underlay of /etc/localtime required more than 50 (69) bind mounts +NAME="Ubuntu" +VERSION="22.04.2 LTS (Jammy Jellyfish)" +[student@edu-vm-bdafb86b-1 04-pull]$ singularity exec blast_2.14.0.sif grep -E '^(VERSION|NAME)=' /etc/os-release +INFO: underlay of /etc/localtime required more than 50 (71) bind mounts +NAME="Ubuntu" +VERSION="20.04.6 LTS (Focal Fossa)" +[student@edu-vm-bdafb86b-1 04-pull]$ singularity exec busco_v5.4.7_cv1.sif grep -E '^(VERSION|NAME)=' /etc/os-release +NAME="Debian GNU/Linux" +VERSION="10 (buster)" +[student@edu-vm-bdafb86b-1 04-pull]$ singularity exec bwa_0.7.17.sif grep -E '^(VERSION|NAME)=' /etc/os-release +NAME="Ubuntu" +VERSION="16.04.7 LTS (Xenial Xerus)" +[student@edu-vm-bdafb86b-1 04-pull]$ singularity exec ncbi-datasets_latest.sif grep -E '^(VERSION|NAME)=' /etc/os-release +INFO: underlay of /etc/localtime required more than 50 (73) bind mounts +NAME="Ubuntu" +VERSION="22.04.2 LTS (Jammy Jellyfish)" +[student@edu-vm-bdafb86b-1 04-pull]$ singularity exec samtools_1.17-2023-06.sif grep -E '^(VERSION|NAME)=' /etc/os-release +INFO: underlay of /etc/localtime required more than 50 (70) bind mounts +NAME="Ubuntu" +VERSION="22.04.2 LTS (Jammy Jellyfish)" +[student@edu-vm-bdafb86b-1 04-pull]$ singularity exec vcftools_v0.1.16-1-deb_cv1.sif grep -E '^(VERSION|NAME)=' /etc/os-release +NAME="Debian GNU/Linux" +VERSION="10 (buster)" + +``` + + +Now lets download another bacteria with the ncbi-datasets + +```bash +singularity exec -B $PWD ncbi-datasets_latest.sif datasets download genome accession GCF_001719145.1 --include gff3,rna,cds,protein,genome,seq-report --filename GCF_001719145.1.zip +``` + +```bash +unzip GCF_001719145.1.zip +``` + +We just download a gff, protein fasta, transcript fasta, genome fasta and more for this accession. + +```bash +ls -lah ncbi_dataset/data/* +``` + +Lets run some of the bioinformatic tools. + +Index the genome, transcriptome, and proteome cause why not. + +```bash +singularity exec -B $PWD samtools_1.17-2023-06.sif samtools faidx ncbi_dataset/data/GCF_001719145.1/GCF_001719145.1_ASM171914v1_genomic.fna +``` + +```bash +singularity exec -B $PWD samtools_1.17-2023-06.sif samtools faidx ncbi_dataset/data/GCF_001719145.1/cds_from_genomic.fna +``` + +```bash +singularity exec -B $PWD samtools_1.17-2023-06.sif samtools faidx ncbi_dataset/data/GCF_001719145.1/protein.faa +``` + +We can now see the *.fai* files have been added. + +```bash +ls -lah ncbi_dataset/data/GCF_001719145.1 +``` + +```bash +head ncbi_dataset/data/GCF_001719145.1/protein.faa.fai +``` + +```output +WP_002802878.1 86 100 80 81 +WP_002804494.1 76 255 76 77 +WP_002804983.1 90 418 80 81 +WP_002805908.1 46 580 46 47 +WP_002806026.1 208 728 80 81 +WP_002806565.1 121 1002 80 81 +WP_002808218.1 128 1212 80 81 +WP_002808376.1 71 1409 71 72 +WP_002808458.1 227 1565 80 81 +WP_002808480.1 112 1870 80 81 +``` + +```bash +sort -nk 2 ncbi_dataset/data/GCF_001719145.1/protein.faa.fai | tail +``` + +```output +WP_069288208.1 1504 1554134 80 81 +WP_005917363.1 1669 1264083 80 81 +WP_033837051.1 1746 1522853 80 81 +WP_033481971.1 1871 1504684 80 81 +WP_269466082.1 2264 1673627 80 81 +WP_033837667.1 2375 1534966 80 81 +WP_033837612.1 2416 1528386 80 81 +WP_033837670.1 2756 1537448 80 81 +WP_005916482.1 2982 1177517 80 81 +WP_005916057.1 3397 1136201 80 81 +``` + +```bash +singularity exec -B $PWD samtools_1.17-2023-06.sif samtools faidx ncbi_dataset/data/GCF_001719145.1/protein.faa WP_005916057.1 +``` + + +```bash +singularity exec -B $PWD samtools_1.17-2023-06.sif samtools faidx ncbi_dataset/data/GCF_001719145.1/protein.faa WP_005916057.1 > longest_prot.fasta +``` + + +```bash +singularity exec -B $PWD blast_2.14.0.sif makeblastdb -in ncbi_dataset/data/GCF_001719145.1/GCF_001719145.1_ASM171914v1_genomic.fna -input_type fasta -dbtype nucl +``` + +```bash +singularity exec -B $PWD blast_2.14.0.sif tblastn -query longest_prot.fasta -db ncbi_dataset/data/GCF_001719145.1/GCF_001719145.1_ASM171914v1_genomic.fna -outfmt 7 +``` + +```bash +singularity exec -B $PWD blast_2.14.0.sif tblastn -query longest_prot.fasta -db ncbi_dataset/data/GCF_001719145.1/GCF_001719145.1_ASM171914v1_genomic.fna -outfmt 6 | cut -f2,9,10 > hits.bed +``` + +```bash +cat hits.bed +``` + +```bash +awk '{ if ($2 > $3) { t = $2; $2 = $3; $3 = t; } else if ($2 == $3) { $3 += 1; } print $0; }' hits.bed | singularity exec bedops_v2.4.35dfsg-1-deb_cv1.sif sort-bed - > b.fixed.bed +`````` + +```bash +singularity exec -B $PWD bedtools_2.31.0.sif bedtools merge -i b.fixed.bed +``` + +### citations + +Awk command: +https://www.biostars.org/p/304852/ + diff --git a/episodes/05-singularity-build.md b/episodes/05-singularity-build.md new file mode 100644 index 0000000..d6c88e9 --- /dev/null +++ b/episodes/05-singularity-build.md @@ -0,0 +1,96 @@ +--- +title: "05-singularity-build" +teaching: 30 +exercises: 0 +--- + +### The build command + +Containers can be build from the same sources as pull command library, docker, shub, oras as well as using a binary file as a base. + +Today we will focus on one of the most common builds, building on top a container image from docker. + +Builds that do not modify the source image generally can be built with out sudo privileges, this is equivalent to a pull command. + +### Take away parts of a the build command and definition file \ +- singularity build is used to build images from definition file. +- The definition file contains the directions to build the image, similar to installing an OS. +- **Bootstrap** - the provider of the source images +- **From** - the source image/layers +- **%post** - commands issued to build the container +- **%environment** - variables set at runtime +- **%runscrip** - commands executed when the container image is run (either via the singularity run by executing the container directly as a command + +**Example of build definition file:** +``` +Bootstrap: docker +From: ubuntu:16.04 + +%post + apt-get -y update + apt-get -y install fortune cowsay lolcat + +%environment + export LC_ALL=C + export PATH=/usr/games:$PATH + +%runscript + fortune | cowsay | lolcat +``` + + +**Example of all the options for a definition file.** +``` +Bootstrap: library +From: ubuntu:18.04 + +%setup + touch /file1 + touch ${SINGULARITY_ROOTFS}/file2 + +%files + /file1 + /file1 /opt + +%environment + export LISTEN_PORT=12345 + export LC_ALL=C + +%post + apt-get update && apt-get install -y netcat + NOW=`date` + echo "export NOW=\"${NOW}\"" >> $SINGULARITY_ENVIRONMENT + +%runscript + echo "Container was created $NOW" + echo "Arguments received: $*" + exec echo "$@" + +%startscript + nc -lp $LISTEN_PORT + +%test + grep -q NAME=\"Ubuntu\" /etc/os-release + if [ $? -eq 0 ]; then + echo "Container base is Ubuntu as expected." + else + echo "Container base is not Ubuntu." + fi + +%labels + Author d@sylabs.io + Version v0.0.1 + +%help + This is a demo container used to illustrate a def file that uses all + supported sections. +``` + + +We will build containers later in the course. + + +### citations + +Lesson adapted from: + diff --git a/episodes/06-bioinformatics-qc.md b/episodes/06-bioinformatics-qc.md new file mode 100644 index 0000000..c797270 --- /dev/null +++ b/episodes/06-bioinformatics-qc.md @@ -0,0 +1,264 @@ +--- +title: "06-bioinformatics-qc" +teaching: 10 +exercises: 0 +--- + +### Lets build a container image for something not in dockerhub + +### First lets check our data files + +```bash +cd /projects/my-lab/06-bio-qc +``` + +View contents of directory file: + +```bash +ls -lF +``` + +```output +total 180 +-rw-rw-r-- 1 student student 82010 Jul 14 05:34 SRR10233452_subset_1.fastq.gz +-rw-rw-r-- 1 student student 64 Jul 14 05:34 SRR10233452_subset_1.fastq.gz.md5 +-rw-rw-r-- 1 student student 61 Jul 14 05:34 SRR10233452_subset_1.fastq.md5 +-rw-rw-r-- 1 student student 80450 Jul 14 05:34 SRR10233452_subset_2.fastq.gz +-rw-rw-r-- 1 student student 64 Jul 14 05:34 SRR10233452_subset_2.fastq.gz.md5 +-rw-rw-r-- 1 student student 61 Jul 14 05:34 SRR10233452_subset_2.fastq.md5 +``` + + + +Lets make sure our files are correct with md5sum + +```bash +cat SRR10233452_subset_1.fastq.gz.md5 +``` + + +```output +0e6b0d752ca7bd9019cc4f5994950cf4 SRR10233452_subset_1.fastq.gz +``` + + + +```bash +md5sum SRR10233452_subset_1.fastq.gz +``` + + +The output is the same so we know the file is correct. + +```output +0e6b0d752ca7bd9019cc4f5994950cf4 SRR10233452_subset_1.fastq.gz +``` + + +For second read just use the check function of md5sum. + +```bash +md5sum -c SRR10233452_subset_2.fastq.gz.md5 +``` + + +Prints and OK since the file is correct, otherwise it would say FAILED. + +```output +SRR10233452_subset_2.fastq.gz: OK +``` + + + +### Now we can run some containers + + +Lets run FastQC. + +```bash +singularity pull fastqc.sif docker://staphb/fastqc:0.12.1 +``` + +**the command above prints a lot of test to the screen.** + + + +### Use *ls* to view the new sif file + +```bash +ls +``` + + +```output +fastqc.sif SRR10233452_subset_1.fastq.gz SRR10233452_subset_1.fastq.gz.md5 SRR10233452_subset_1.fastq.md5 SRR10233452_subset_2.fastq.gz SRR10233452_subset_2.fastq.gz.md5 SRR10233452_subset_2.fastq.md5 + +``` + + +### Now run fastqc + +```bash +singularity exec fastqc.sif fastqc SRR10233452_subset_1.fastq.gz SRR10233452_subset_2.fastq.gz +``` + +```output +INFO: underlay of /etc/localtime required more than 50 (81) bind mounts +perl: warning: Setting locale failed. +perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LANG = "en_US.UTF-8" + are supported and installed on your system. +perl: warning: Falling back to the standard locale ("C"). +Skipping 'SRR10233452_subset_1.fastq.gz' which didn't exist, or couldn't be read +Skipping 'SRR10233452_subset_2.fastq.gz' which didn't exist, or couldn't be read +``` + +Oh no we dont see the files, again. +We can fix that. + + +```bash +singularity exec -B $PWD fastqc.sif fastqc SRR10233452_subset_1.fastq.gz SRR10233452_subset_2.fastq.gz +``` + +Worked great. + +```output +perl: warning: Setting locale failed. +perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LANG = "en_US.UTF-8" + are supported and installed on your system. +perl: warning: Falling back to the standard locale ("C"). +Started analysis of SRR10233452_subset_1.fastq.gz +Approx 100% complete for SRR10233452_subset_1.fastq.gz +Analysis complete for SRR10233452_subset_1.fastq.gz +Started analysis of SRR10233452_subset_2.fastq.gz +Approx 100% complete for SRR10233452_subset_2.fastq.gz +Analysis complete for SRR10233452_subset_2.fastq.gz +``` + +### Use ls with the 1 flag to see the files in the directory + +```bash +ls -1 +``` + + +```output +fastqc.sif +SRR10233452_subset_1_fastqc.html +SRR10233452_subset_1_fastqc.zip +SRR10233452_subset_1.fastq.gz +SRR10233452_subset_1.fastq.gz.md5 +SRR10233452_subset_1.fastq.md5 +SRR10233452_subset_2_fastqc.html +SRR10233452_subset_2_fastqc.zip +SRR10233452_subset_2.fastq.gz +SRR10233452_subset_2.fastq.gz.md5 +SRR10233452_subset_2.fastq.md5 + +``` + + + +### Use *unzip* to unzip the results for subset 1. + +```bash +unzip SRR10233452_subset_1_fastqc.zip +``` + + +```output +Archive: SRR10233452_subset_1_fastqc.zip + creating: SRR10233452_subset_1_fastqc/ + creating: SRR10233452_subset_1_fastqc/Icons/ + creating: SRR10233452_subset_1_fastqc/Images/ + inflating: SRR10233452_subset_1_fastqc/Icons/fastqc_icon.png + inflating: SRR10233452_subset_1_fastqc/Icons/warning.png + inflating: SRR10233452_subset_1_fastqc/Icons/error.png + inflating: SRR10233452_subset_1_fastqc/Icons/tick.png + inflating: SRR10233452_subset_1_fastqc/summary.txt + inflating: SRR10233452_subset_1_fastqc/Images/per_base_quality.png + inflating: SRR10233452_subset_1_fastqc/Images/per_sequence_quality.png + inflating: SRR10233452_subset_1_fastqc/Images/per_base_sequence_content.png + inflating: SRR10233452_subset_1_fastqc/Images/per_sequence_gc_content.png + inflating: SRR10233452_subset_1_fastqc/Images/per_base_n_content.png + inflating: SRR10233452_subset_1_fastqc/Images/sequence_length_distribution.png + inflating: SRR10233452_subset_1_fastqc/Images/duplication_levels.png + inflating: SRR10233452_subset_1_fastqc/Images/adapter_content.png + inflating: SRR10233452_subset_1_fastqc/fastqc_report.html + inflating: SRR10233452_subset_1_fastqc/fastqc_data.txt + inflating: SRR10233452_subset_1_fastqc/fastqc.fo +``` + + + +### Use *cat* to view the summary results + +```bash +cat SRR10233452_subset_1_fastqc/summary.txt +``` + + +```output +PASS Basic Statistics SRR10233452_subset_1.fastq.gz +PASS Per base sequence quality SRR10233452_subset_1.fastq.gz +PASS Per sequence quality scores SRR10233452_subset_1.fastq.gz +FAIL Per base sequence content SRR10233452_subset_1.fastq.gz +WARN Per sequence GC content SRR10233452_subset_1.fastq.gz +FAIL Per base N content SRR10233452_subset_1.fastq.gz +WARN Sequence Length Distribution SRR10233452_subset_1.fastq.gz +PASS Sequence Duplication Levels SRR10233452_subset_1.fastq.gz +PASS Overrepresented sequences SRR10233452_subset_1.fastq.gz +PASS Adapter Content SRR10233452_subset_1.fastq.gz + +``` + + + + +### Prep for BWA + +Now we can uncompress the 'gzipped' files with gunzip. Use the '-c option to preserve the original file' + +```bash +gunzip SRR10233452_subset_1.fastq.gz SRR10233452_subset_2.fastq.gz +``` + +Nothing is printed out. +```output + +``` + + + +Can see changes with 'ls'. + +```bash +ls -lF +``` + + +```output +-rwxrwxr-x 1 student student 297582592 Jul 14 12:21 fastqc.sif* +-rw-rw-r-- 1 student student 366018 Jul 14 12:06 SRR10233452_subset_1.fastq +-rw-rw-r-- 1 student student 218957 Jul 14 12:22 SRR10233452_subset_1_fastqc.html +-rw-rw-r-- 1 student student 225066 Jul 14 12:22 SRR10233452_subset_1_fastqc.zip +-rw-rw-r-- 1 student student 64 Jul 14 12:06 SRR10233452_subset_1.fastq.gz.md5 +-rw-rw-r-- 1 student student 61 Jul 14 12:06 SRR10233452_subset_1.fastq.md5 +-rw-rw-r-- 1 student student 355874 Jul 14 12:06 SRR10233452_subset_2.fastq +-rw-rw-r-- 1 student student 230235 Jul 14 12:22 SRR10233452_subset_2_fastqc.html +-rw-rw-r-- 1 student student 245117 Jul 14 12:22 SRR10233452_subset_2_fastqc.zip +-rw-rw-r-- 1 student student 64 Jul 14 12:06 SRR10233452_subset_2.fastq.gz.md5 +-rw-rw-r-- 1 student student 61 Jul 14 12:06 SRR10233452_subset_2.fastq.md5 +``` + + +#### Try downloading the zip files with **scp** + + + diff --git a/episodes/07-bioinformatics-bwa.md b/episodes/07-bioinformatics-bwa.md new file mode 100644 index 0000000..8a66b7c --- /dev/null +++ b/episodes/07-bioinformatics-bwa.md @@ -0,0 +1,112 @@ +--- +title: "07-bioinformatics bwa" +teaching: 10 +exercises: 0 +--- + +### Lets use bwa + +```bash +cd /projects/my-lab/07-bwa +``` + + +View contents of directory file: + +```bash +ls -lF +``` + +```output +total 15804 +-rw-rw-r-- 1 student student 16171456 Jul 14 05:34 chr20.fna.gz +-rw-rw-r-- 1 student student 47 Jul 14 05:34 chr20.fna.gz.md5 +-rw-rw-r-- 1 student student 44 Jul 14 05:34 chr20.fna.md5 +``` + + +```bash +gunzip chr20.fna.gz +``` + + + +### Now make an index + +Pull a bwa image from dockerhub + +```bash +singularity pull bwa.sif docker://staphb/bwa:0.7.17 +``` + + +Make and index + +```bash +singularity exec -B $PWD bwa.sif bwa index chr20.fna chr20.fna +``` + + +```output +[bwa_index] Pack FASTA... 0.20 sec +[bwa_index] Construct BWT for the packed sequence... +[BWTIncCreate] textLength=108871774, availableWord=19660180 +[BWTIncConstructFromPacked] 10 iterations done. 32429790 characters processed. +[BWTIncConstructFromPacked] 20 iterations done. 59910030 characters processed. +[BWTIncConstructFromPacked] 30 iterations done. 84330494 characters processed. +[BWTIncConstructFromPacked] 40 iterations done. 106031422 characters processed. +[bwt_gen] Finished constructing BWT in 42 iterations. +[bwa_index] 13.58 seconds elapse. +[bwa_index] Update BWT... 0.15 sec +[bwa_index] Pack forward-only FASTA... 0.13 sec +[bwa_index] Construct SA from BWT and Occ... 9.12 sec +[main] Version: 0.7.17-r1188 +[main] CMD: /bwa/bwa-0.7.17/bwa index chr20.fna chr20.fna +[main] Real time: 23.272 sec; CPU: 23.231 sec +``` + +```bash +ls -lF +``` + +```output +total 249052 +-rwxrwxr-x 1 student student 104620032 Jul 14 06:48 bwa.sif* +-rw-rw-r-- 1 student student 55116442 Jul 14 05:34 chr20.fna +-rw-rw-r-- 1 student student 160 Jul 14 06:49 chr20.fna.amb +-rw-rw-r-- 1 student student 135 Jul 14 06:49 chr20.fna.ann +-rw-rw-r-- 1 student student 54435968 Jul 14 06:49 chr20.fna.bwt +-rw-rw-r-- 1 student student 47 Jul 14 05:34 chr20.fna.gz.md5 +-rw-rw-r-- 1 student student 44 Jul 14 05:34 chr20.fna.md5 +-rw-rw-r-- 1 student student 13608973 Jul 14 06:49 chr20.fna.pac +-rw-rw-r-- 1 student student 27217992 Jul 14 06:49 chr20.fna.sa +``` + + +```bash +singularity exec -B $PWD bwa.sif bwa mem chr20.fna /projects/my-lab/06-bio-qc/SRR10233452_subset_1.fastq /projects/my-lab/06-bio-qc/SRR10233452_subset_2.fastq > out_bwa.sam +``` + +We get an error as the container can not see the fastq files in the other directory + +```output +0233452_subset_2.fastq > out_bwa.sam +[M::bwa_idx_load_from_disk] read 0 ALT contigs +[E::main_mem] fail to open file `/projects/my-lab/06-bio-qc/SRR10233452_subset_1.fastq'. +``` + +Lets add a bind mount to the command so we can see them. +Note: The mount location in the container does not need to exist in the container. + +```bash +singularity exec -B /projects/my-lab/06-bio-qc:/projects/my-lab/06-bio-qc -B $PWD bwa.sif bwa mem chr20.fna /projects/my-lab/06-bio-qc/SRR10233452_subset_1.fastq /projects/my-lab/06-bio-qc/SRR10233452_subset_2.fastq > out_bwa.sam +``` + +```bash +head -n 7 out_bwa.sam +``` + +### Citations + +singularity bind mounts +https://docs.sylabs.io/guides/3.0/user-guide/bind_paths_and_mounts.html diff --git a/episodes/08-circos.md b/episodes/08-circos.md new file mode 100644 index 0000000..d8f6bca --- /dev/null +++ b/episodes/08-circos.md @@ -0,0 +1,91 @@ +--- +title: "08-circos" +teaching: 15 +exercises: 0 +--- + +### Example for circos + +Change directory into the circos directory. + +```bash +cd /projects/my-lab/08-circos +``` + +Verify nothing is in the directory + +```bash +ls -lF +``` + +Make a circos SIF file by pulling the docker image circos. + +```bash +singularity pull docker://alexcoppe/circos +``` + + + +Download the circos tutorial. +```bash +wget http://circos.ca/distribution/circos-tutorials-current.tgz +``` + + +Untar and gzip the downloaded file. + +```bash +tar xzvf circos-tutorials-current.tgz +``` + + + +```bash +ls -1F +``` + + +```output +circos_latest.sif* +circos-tutorials-0.67/ +circos-tutorials-current.tgz + +``` + +Run tutorial 1/1 + +```bash +singularity exec -B $PWD circos_latest.sif /opt/circos/bin/circos -conf circos-tutorials-0.67/tutorials/1/1/circos.conf +``` + + +The base image of the human chromosomes was created + +![circos_1_1](episodes/fig/circos_1_1.svg){alt="circos example 1 dot 1"} + + +Run tutorial 8/6 histograms + +```bash +singularity exec -B $PWD circos_latest.sif /opt/circos/bin/circos -conf circos-tutorials-0.67/tutorials/8/6/circos.conf +``` + + +An image with histograms + +![circos_8_6](episodes/fig/circos_8_6.svg){alt="circos example 8 dot 6"} + +Run tutorial 8/11 links + +```bash +singularity exec -B $PWD circos_latest.sif /opt/circos/bin/circos -conf circos-tutorials-0.67/tutorials/8/11/circos.conf +``` + + +An image with links + +![circos_8_11](episodes/fig/circos_8_11.svg){alt="circos example 8 dot 11"} + +### citations +http://circos.ca/ + diff --git a/episodes/09-singularity-build-blast-example.md b/episodes/09-singularity-build-blast-example.md new file mode 100644 index 0000000..ee888c4 --- /dev/null +++ b/episodes/09-singularity-build-blast-example.md @@ -0,0 +1,301 @@ +--- +title: "09-singularity-build" +teaching: 35 +exercises: 0 +--- + +# Using Containers & Questions + +Building on our previous sections, in this unit, we’re going to build a container for BLAST and show how to use that container to construct a BLAST database and search sequences against that database. + +## Containerizing BLAST + +BLAST stands for Basic Local Alignment Search Tool, and is a sophisticated software package for rapid searching of protein and nucleotide databases. BLAST was developed by Steven Altschul in 1989, and has continually been refined, updated, and modified throughout the years to meet the increasing needs of the scientific community. + +To cite BLAST, please refer to the following: +Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ. Basic local alignment search tool. Journal of Molecular Biology. Volume 215(3), pages 403-410. 1990. +PMID: 2231712 DOI: 10.1016/S0022-2836(05)80360-2. + +To start, let’s create an empty file to use as our recipe file. + + +```bash +cd /projects/my-lab/09-build-blast +``` + +```bash +touch blast.def +``` + +The touch command allows modification of file timestamps, or in the case of this usage, where the file does not already exist, creates an empty file. + +Now we’ll use nano to build out our recipe file. + +```bash +nano blast.def +``` + +This should open the basic nano text editor for you to access through your terminal. + +Let’s type the following into our blast.def file: + +```bash +Bootstrap:docker +From:ubuntu + +%labels + MAINTAINER Kurt Showmaker + +%post + apt update && apt upgrade -y + apt install -y wget gzip zip unzip ncbi-blast+ locales + LANG=C perl -e exit + locale-gen en_US.UTF-8 + +%runscript + echo "Hello from BLAST!" +``` + +Let’s hit `CTRL-O` to save our modifications and then `CTRL-X` to exit the nano editor. + +Ok, now to build the container: + +```bash +sudo singularity build blast.sif blast.def +``` + +Ok, let’s test that our container is built properly. + +```bash +./blast.sif +``` + +```output +Hello from BLAST! +``` + +Ok, now we can go into the container’s environment to verify things using the singularity shell command. + +```bash +singularity shell blast.sif +``` + +Notice the change in prompt from “$” to ”Apptainer>”, this is because we are inside the container. +```bash +Apptainer> blastp -h +``` + +```output +USAGE + blastp [-h] [-help] [-import_search_strategy filename] + [-export_search_strategy filename] [-task task_name] [-db database_name] + [-dbsize num_letters] [-gilist filename] [-seqidlist filename] + [-negative_gilist filename] [-negative_seqidlist filename] + [-taxids taxids] [-negative_taxids taxids] [-taxidlist filename] + [-negative_taxidlist filename] [-ipglist filename] + [-negative_ipglist filename] [-entrez_query entrez_query] + [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm] + [-subject subject_input_file] [-subject_loc range] [-query input_file] + [-out output_file] [-evalue evalue] [-word_size int_value] + [-gapopen open_penalty] [-gapextend extend_penalty] + [-qcov_hsp_perc float_value] [-max_hsps int_value] + [-xdrop_ungap float_value] [-xdrop_gap float_value] + [-xdrop_gap_final float_value] [-searchsp int_value] [-seg SEG_options] + [-soft_masking soft_masking] [-matrix matrix_name] + [-threshold float_value] [-culling_limit int_value] + [-best_hit_overhang float_value] [-best_hit_score_edge float_value] + [-subject_besthit] [-window_size int_value] [-lcase_masking] + [-query_loc range] [-parse_deflines] [-outfmt format] [-show_gis] + [-num_descriptions int_value] [-num_alignments int_value] + [-line_length line_length] [-html] [-sorthits sort_hits] + [-sorthsps sort_hsps] [-max_target_seqs num_sequences] + [-num_threads int_value] [-ungapped] [-remote] [-comp_based_stats compo] + [-use_sw_tback] [-version] + +DESCRIPTION + Protein-Protein BLAST 2.9.0+ + +Use '-help' to print detailed descriptions of command line arguments +``` + +The blastp command is for Protein BLASTs, where a protein sequence is searched against a protein database. + +Let’s exit the container environment. + +```bash +Apptainer> exit +``` + +Now let’s try the containerized command from the server’s environment. + +```bash +singularity exec blast.sif blastp -h +``` + +```output +USAGE + blastp [-h] [-help] [-import_search_strategy filename] + [-export_search_strategy filename] [-task task_name] [-db database_name] + [-dbsize num_letters] [-gilist filename] [-seqidlist filename] + [-negative_gilist filename] [-negative_seqidlist filename] + [-taxids taxids] [-negative_taxids taxids] [-taxidlist filename] + [-negative_taxidlist filename] [-ipglist filename] + [-negative_ipglist filename] [-entrez_query entrez_query] + [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm] + [-subject subject_input_file] [-subject_loc range] [-query input_file] + [-out output_file] [-evalue evalue] [-word_size int_value] + [-gapopen open_penalty] [-gapextend extend_penalty] + [-qcov_hsp_perc float_value] [-max_hsps int_value] + [-xdrop_ungap float_value] [-xdrop_gap float_value] + [-xdrop_gap_final float_value] [-searchsp int_value] [-seg SEG_options] + [-soft_masking soft_masking] [-matrix matrix_name] + [-threshold float_value] [-culling_limit int_value] + [-best_hit_overhang float_value] [-best_hit_score_edge float_value] + [-subject_besthit] [-window_size int_value] [-lcase_masking] + [-query_loc range] [-parse_deflines] [-outfmt format] [-show_gis] + [-num_descriptions int_value] [-num_alignments int_value] + [-line_length line_length] [-html] [-sorthits sort_hits] + [-sorthsps sort_hsps] [-max_target_seqs num_sequences] + [-num_threads int_value] [-ungapped] [-remote] [-comp_based_stats compo] + [-use_sw_tback] [-version] + +DESCRIPTION + Protein-Protein BLAST 2.9.0+ + +Use '-help' to print detailed descriptions of command line arguments +``` + +Same output, so now let's put our new BLAST container to use! + +## Acquiring Protein Data + +To start, we are going to need some data to serve as our database to search against. For this exercise, we will use C. elegans proteome. Let's download and uncompress this file. + +```bash +wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/002/985/GCF_000002985.6_WBcel235/GCF_000002985.6_WBcel235_protein.faa.gz +``` + +This shouldn't take long, and produces the following output: +``` +Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 165.112.9.229, 130.14.250.13, 2607:f220:41f:250::228, ... +Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|165.112.9.229|:443... connected. +HTTP request sent, awaiting response... 200 OK +Length: 6989933 (6.7M) [application/x-gzip] +Saving to: ‘GCF_000002985.6_WBcel235_protein.faa.gz’ + +100%[=======================================================================>] 6,989,933 41.1MB/s in 0.2s + +2021-12-02 19:13:56 (41.1 MB/s) - ‘GCF_000002985.6_WBcel235_protein.faa.gz’ saved [6989933/6989933] + +``` + +Now we will unzip the data file. + +```bash +gunzip GCF_000002985.6_WBcel235_protein.faa.gz +``` + +Now we will convert the protein FASTA file we downloaded into a BLAST database to search against. + +```bash +time singularity exec -B $PWD blast.sif makeblastdb -in GCF_000002985.6_WBcel235_protein.faa -dbtype prot -out c_elegans +``` + +This gives us the following output: +```output + +Building a new DB, current time: 12/02/2021 19:14:39 +New DB name: /home/student/c_elegans +New DB title: GCF_000002985.6_WBcel235_protein.faa +Sequence type: Protein +Keep MBits: T +Maximum file size: 1000000000B +Adding sequences from FASTA; added 28350 sequences in 0.727009 seconds. + +real 0m1.248s +user 0m0.828s +sys 0m0.421s +``` + +Now we need some sequences of interest to search against the RefSeq database we just constructed. Let's download all the RefSeq proteins for Human Chromosome 1: + +```bash +wget ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.1.protein.faa.gz +``` + + +```output +--2021-12-02 19:15:08-- ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.1.protein.faa.gz + => ‘human.1.protein.faa.gz’ +Resolving ftp.ncbi.nih.gov (ftp.ncbi.nih.gov)... 130.14.250.10, 130.14.250.11, 2607:f220:41f:250::230, ... +Connecting to ftp.ncbi.nih.gov (ftp.ncbi.nih.gov)|130.14.250.10|:21... connected. +Logging in as anonymous ... Logged in! +==> SYST ... done. ==> PWD ... done. +==> TYPE I ... done. ==> CWD (1) /refseq/H_sapiens/mRNA_Prot ... done. +==> SIZE human.1.protein.faa.gz ... 1836521 +==> PASV ... done. ==> RETR human.1.protein.faa.gz ... done. +Length: 1836521 (1.8M) (unauthoritative) + +100%[=======================================================================>] 1,836,521 8.92MB/s in 0.2s + +2021-12-02 19:15:08 (8.92 MB/s) - ‘human.1.protein.faa.gz’ saved [1836521] +``` + + +```bash +gunzip human.1.protein.faa.gz +``` + + +We've downloaded a multi-line FASTA file, but for ease of use, we will now convert this into a single-fasta. + +Convert multi-line FASTA to single-line FASTA +```bash +sed -e 's/\(^>.*$\)/#\1#/' human.1.protein.faa | tr -d "\r" | tr -d "\n" | sed -e 's/$/#/' | tr "#" "\n" | sed -e '/^$/d' > human.1.protein.faa.cleaned.fasta +``` + +Now, we'll BLAST the proteins of Human chromosome 1 against the c_elegans blast database. Normally, you would want to run your entire query sequence against the database and interpret results, but because these cloud systems we are connected to are small, in the interest of time we are going to reduce our number of query sequences. + +```bash +head -n 20 human.1.protein.faa.cleaned.fasta > search_query.fasta +``` + +This pulls the first 10 protein sequences listed in the human chromosome 1 file into a new file, search_query.fasta. So instead of searching all 8,067 sequences, we will only search 10 (0.1%) against the c_elegans database we've constructed. + +Here We will run blast program using our input and constructed blast database. + +```bash +time singularity exec -B $PWD blast.sif blastp -num_threads 2 -db c_elegans -query search_query.fasta -outfmt 6 -out BLASTP_Results.txt -max_target_seqs 1 +``` + +This gives us the following output: +```output +Warning: [blastp] Examining 5 or more matches is recommended + +real 0m5.957s +user 0m10.364s +sys 0m0.500s +``` + +Checking the output file: +```bash +head BLASTP_Results.txt +``` + +Gives the following: +```output +NP_001355814.1 NP_001255859.1 41.697 542 273 10 122 646 77 592 1.46e-58 208 +NP_001355814.1 NP_001255859.1 41.199 517 259 9 125 626 154 640 2.69e-57 205 +NP_001355814.1 NP_001255859.1 38.609 575 266 9 116 646 109 640 3.11e-54 196 +NP_001355814.1 NP_001255859.1 39.962 533 256 10 112 618 156 650 8.53e-53 192 +NP_001355814.1 NP_001255859.1 40.393 458 211 9 109 565 243 639 1.33e-49 183 +NP_001355814.1 NP_001255859.1 38.647 207 105 5 108 314 455 639 2.77e-15 79.3 +NP_001355815.1 NP_001255859.1 40.937 491 220 10 1 472 153 592 4.73e-56 197 +NP_001355815.1 NP_001255859.1 39.038 520 232 13 2 472 80 563 1.16e-48 177 +NP_001355815.1 NP_001255859.1 42.958 426 196 9 76 459 75 495 9.38e-47 171 +NP_001355815.1 NP_001255859.1 40.086 464 218 11 2 444 220 644 9.89e-44 163 +``` + +# Any Questions? + +Please ask the presenters any questions you may have! diff --git a/episodes/10-singularity_build_rstudio.md b/episodes/10-singularity_build_rstudio.md new file mode 100644 index 0000000..57a9858 --- /dev/null +++ b/episodes/10-singularity_build_rstudio.md @@ -0,0 +1,195 @@ +--- +title: "10-singularity-build-rstudio" +teaching: 35 +exercises: 0 +--- + +# Disclaimer + +This build of rstudio is for demonstration purposes only on these demo instances. For production purposes please consult your local system admin. + +## Containerizing Rstudio + +CITE ROCKER AND RICHARD + +```bash +cd /projects/my-lab/10-build-rstudio +``` + + +To start, create a few directories Rstudio server looks for. + +```bash +workdir=/projects/my-lab/10-build-rstudio + +mkdir -p -m 700 ${workdir}/run ${workdir}/tmp ${workdir}/var/lib/rstudio-server + +cat > ${workdir}/database.conf <:8787```** +**username is student** +**password is set above to password** + + +Lets load a package that from the container. + +```R +library('DESeq2') +``` + +Using the Rstudio file browser look at the R folder that appeared, but nothing is in it. + +Lets install something to the user space. + +```R +BiocManager::install("EnhancedVolcano") +``` + +Now we see the local user install. +```R +.libPaths() +``` + +```output +[1] "/home/student/R/x86_64-pc-linux-gnu-library/4.2" +[2] "/usr/local/lib/R/site-library" +[3] "/usr/local/lib/R/library" +``` + + + +**Verify the libs with terminal tab** +```ls /home/student/R/x86_64-pc-linux-gnu-library/4.2``` +```ls /usr/local/lib/R``` + +**Verify via ssh'ing with another tab** +```ssh student@``` +```ls /home/student/R/x86_64-pc-linux-gnu-library/4.2``` +```ls /usr/local/lib/R``` + +Lets install few fun packages. +```R +install.packages('knitr', dependencies = TRUE) +``` + +```R +options(repos = c( + carpentries = "https://carpentries.r-universe.dev/", + CRAN = "https://cran.rstudio.com/" +)) +install.packages("sandpaper", dep = TRUE) +``` + +**note the sandpaper command will reset the rstudio server.** +**select dont save** +```R +library('sandpaper') +sandpaper::create_lesson("~/r-intermediate-penguins") +``` + +**after reset we are now in the new folder** + +```R +sandpaper::serve(quiet = FALSE, host = "0.0.0.0", port = "8789") +``` + +**Now edit one of the episodes** + +```R + servr::daemon_stop() + ``` + +### Knit example + +Make new folder called myknit. +Make file in folder called myknit.Rmd +Copy section of code (lines 56 to 101) from the link below and paste into new myknit.Rmd file. + + #https://github.com/sachsmc/knit-git-markr-guide/blob/master/knitr/knit.Rmd + +**Caution: just an example below, use copy lines from link above** + +```R + ```{r setup, include=FALSE} +library(stringr) +library(knitr) +opts_chunk$set(tidy = FALSE) + +knit_hooks$set(source = function(x, options){ + if (!is.null(options$verbatim) && options$verbatim){ + opts = gsub(",\\s*verbatim\\s*=\\s*TRUE\\s*", "", options$params.src) + bef = sprintf('\n\n ```{r %s}\n', opts, "\n") + stringr::str_c( + bef, + knitr:::indent_block(paste(x, collapse = '\n'), " "), + "\n ```\n" + ) + } else { + stringr::str_c("\n\n```", tolower(options$engine), "\n", + paste(x, collapse = '\n'), "\n```\n\n" + ) + } +}) +``` + +### Citations and more information + +Example: +https://rpubs.com/Ilyashaikall/ClusteringofWine + +https://rpubs.com/diyasarya/diabet + +https://rpubs.com/ + +https://bookdown.org/yihui/rmarkdown/rmarkdown-site.html + +https://github.com/rstudio/rmarkdown-website-examples diff --git a/episodes/fig/01_computer_parts.svg b/episodes/fig/01_computer_parts.svg new file mode 100644 index 0000000..9004899 --- /dev/null +++ b/episodes/fig/01_computer_parts.svg @@ -0,0 +1,1833 @@ + + + + + + + + + + image/svg+xml + + + + + + + What's inside your computer? + + + + + + + + + + + STORAGE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CPU + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + M + E + M + O + R + Y + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + M + E + M + O + R + Y + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + M + E + M + O + R + Y + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + M + E + M + O + R + Y + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CPU + + + + + + + + + + + + STORAGE + + + diff --git a/episodes/fig/circos_1_1.svg b/episodes/fig/circos_1_1.svg new file mode 100644 index 0000000..f2afbcd --- /dev/null +++ b/episodes/fig/circos_1_1.svg @@ -0,0 +1,65 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/episodes/fig/circos_8_11.svg b/episodes/fig/circos_8_11.svg new file mode 100644 index 0000000..25572be --- /dev/null +++ b/episodes/fig/circos_8_11.svg @@ -0,0 +1,1810 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr1 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + +160 + + + + +180 + + + + +200 + + + + +220 + + + + +240 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr2 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + +160 + + + + +180 + + + + +200 + + + + +220 + + + + +240 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr3 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + +160 + + + + +180 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr4 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + +160 + + + + +180 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr5 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + +160 + + + + +180 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr6 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + +160 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr7 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr8 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr9 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr10 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr11 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr12 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr13 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr14 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr15 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr16 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr17 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + + + + + + + + + + + + + + + + + + + +chr18 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + + + + + + + + + + + + + + + + + + + + + +chr19 + +0 + + + + +20 + + + + +40 + + + + + + + + + + + + + + + + + + + + + + + + + +chr20 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + + + + + + + + + + + + + +chr21 + +0 + + + + +20 + + + + +40 + + + + + + + + + + + + + + + + + + + +chr22 + +0 + + + + +20 + + + + +40 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chrx + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + + + + + + + + + + + + + +chry + +0 + + + + +20 + + + + +40 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/episodes/fig/circos_8_6.svg b/episodes/fig/circos_8_6.svg new file mode 100644 index 0000000..53caf33 --- /dev/null +++ b/episodes/fig/circos_8_6.svg @@ -0,0 +1,4942 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr1 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + +160 + + + + +180 + + + + +200 + + + + +220 + + + + +240 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr2 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + +160 + + + + +180 + + + + +200 + + + + +220 + + + + +240 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr3 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + +160 + + + + +180 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr4 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + +160 + + + + +180 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr5 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + +160 + + + + +180 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr6 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + +160 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr7 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr8 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr9 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr10 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr11 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr12 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr13 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr14 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr15 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr16 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chr17 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + + + + + + + + + + + + + + + + + + + +chr18 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + + + + + + + + + + + + + + + + + + + + + +chr19 + +0 + + + + +20 + + + + +40 + + + + + + + + + + + + + + + + + + + + + + + + + +chr20 + +0 + + + + +20 + + + + +40 + + + + +60 + + + + + + + + + + + + + + + + +chr21 + +0 + + + + +20 + + + + +40 + + + + + + + + + + + + + + + + + + + +chr22 + +0 + + + + +20 + + + + +40 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +chrx + +0 + + + + +20 + + + + +40 + + + + +60 + + + + +80 + + + + +100 + + + + +120 + + + + +140 + + + + + + + + + + + + + + + + +chry + +0 + + + + +20 + + + + +40 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/episodes/introduction.md b/episodes/introduction.md deleted file mode 100644 index 7065d23..0000000 --- a/episodes/introduction.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -title: "Using Markdown" -teaching: 10 -exercises: 2 ---- - -:::::::::::::::::::::::::::::::::::::: questions - -- How do you write a lesson using Markdown and `{sandpaper}`? - -:::::::::::::::::::::::::::::::::::::::::::::::: - -::::::::::::::::::::::::::::::::::::: objectives - -- Explain how to use markdown with The Carpentries Workbench -- Demonstrate how to include pieces of code, figures, and nested challenge blocks - -:::::::::::::::::::::::::::::::::::::::::::::::: - -## Introduction - -This is a lesson created via The Carpentries Workbench. It is written in -[Pandoc-flavored Markdown](https://pandoc.org/MANUAL.txt) for static files and -[R Markdown][r-markdown] for dynamic files that can render code into output. -Please refer to the [Introduction to The Carpentries -Workbench](https://carpentries.github.io/sandpaper-docs/) for full documentation. - -What you need to know is that there are three sections required for a valid -Carpentries lesson: - - 1. `questions` are displayed at the beginning of the episode to prime the - learner for the content. - 2. `objectives` are the learning objectives for an episode displayed with - the questions. - 3. `keypoints` are displayed at the end of the episode to reinforce the - objectives. - -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: instructor - -Inline instructor notes can help inform instructors of timing challenges -associated with the lessons. They appear in the "Instructor View" - -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -::::::::::::::::::::::::::::::::::::: challenge - -## Challenge 1: Can you do it? - -What is the output of this command? - -```r -paste("This", "new", "lesson", "looks", "good") -``` - -:::::::::::::::::::::::: solution - -## Output - -```output -[1] "This new lesson looks good" -``` - -::::::::::::::::::::::::::::::::: - - -## Challenge 2: how do you nest solutions within challenge blocks? - -:::::::::::::::::::::::: solution - -You can add a line with at least three colons and a `solution` tag. - -::::::::::::::::::::::::::::::::: -:::::::::::::::::::::::::::::::::::::::::::::::: - -## Figures - -You can use standard markdown for static figures with the following syntax: - -`![optional caption that appears below the figure](figure url){alt='alt text for -accessibility purposes'}` - -![You belong in The Carpentries!](https://raw.githubusercontent.com/carpentries/logo/master/Badge_Carpentries.svg){alt='Blue Carpentries hex person logo with no text.'} - -::::::::::::::::::::::::::::::::::::: callout - -Callout sections can highlight information. - -They are sometimes used to emphasise particularly important points -but are also used in some lessons to present "asides": -content that is not central to the narrative of the lesson, -e.g. by providing the answer to a commonly-asked question. - -:::::::::::::::::::::::::::::::::::::::::::::::: - - -## Math - -One of our episodes contains $\LaTeX$ equations when describing how to create -dynamic reports with {knitr}, so we now use mathjax to describe this: - -`$\alpha = \dfrac{1}{(1 - \beta)^2}$` becomes: $\alpha = \dfrac{1}{(1 - \beta)^2}$ - -Cool, right? - -::::::::::::::::::::::::::::::::::::: keypoints - -- Use `.md` files for episodes when you want static content -- Use `.Rmd` files for episodes when you need to generate output -- Run `sandpaper::check_lesson()` to identify any issues with your lesson -- Run `sandpaper::build_lesson()` to preview your lesson locally - -:::::::::::::::::::::::::::::::::::::::::::::::: - -[r-markdown]: https://rmarkdown.rstudio.com/