From fbd3a402d72cd3190ac4e09083c784a97484ef2c Mon Sep 17 00:00:00 2001
From: root These exercises are about reading and writing data sections of Introduction
to R. Exercise 1 Have a look at all files in notepad or excel before attempting to
read. All files can be found in the “data” directory.
-
-
+
+
+
[1] "/Users/mattpaul"
-
-sampleMeans <- c(mean(geneExpression[,1]),mean(geneExpression[,2]),mean(geneExpression[,3]),mean(geneExpression[,4]),mean(geneExpression[,5]),mean(geneExpression[,6]))
-names(sampleMeans) <- colnames(geneExpression)
-sampleMeans
-
-## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low Sample_1.low
-## 7.514996 6.774108 6.508127 6.262253 6.177761 6.017462
+## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low Sample_1.low
-## 7.514996 6.774108 6.508127 6.262253 6.177761 6.017462
geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
+is(geneExpression)
+## [1] "data.frame" "list" "oldClass" "vector"
+## [5] "list_OR_List" "vector_OR_factor" "vector_OR_Vector"
+
+
+
+
+sampleMeans <- c(mean(geneExpression[1,]),mean(geneExpression[2,]),mean(geneExpression[3,]),mean(geneExpression[4,]),mean(geneExpression[5,]),mean(geneExpression[6,]),mean(geneExpression[7,]),mean(geneExpression[8,]))
+names(sampleMeans) <- rownames(geneExpression)
+sampleMeans
+
+## Gene_a Gene_b Gene_c Gene_d Gene_e Gene_f Gene_g Gene_h
+## 4.660569 4.379796 4.259824 5.849420 5.850658 6.732781 10.405203 10.201357
+
+geneExpression <- as.data.frame(geneExpression)
+geneExpression$tissue_localisation <- factor(c("Kidney","Adrenal","Liver","Adrenal","Kidney","Liver","Liver","Kidney"))
+geneExpression$mean_expr <- sampleMeans
+geneExpression
+## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
+## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
+## Gene_b 6.444368 5.896076 2.592581 5.089549 3.624812
+## Gene_c 3.083392 3.414723 3.706069 4.535536 5.104273
+## Gene_d 4.726498 3.023746 3.033173 8.017895 8.098800
+## Gene_e 9.909185 9.174323 9.957153 2.053501 3.276533
+## Gene_f 10.680459 9.951243 8.985412 3.360963 3.566663
+## Gene_g 10.516534 10.176163 9.778173 11.781520 9.005437
+## Gene_h 9.017020 9.342291 9.895636 12.046704 11.003240
+## Sample_1.low tissue_localisation mean_expr
+## Gene_a 5.9350948 Kidney 4.660569
+## Gene_b 2.6313925 Adrenal 4.379796
+## Gene_c 5.7149521 Liver 4.259824
+## Gene_d 8.1964109 Adrenal 5.849420
+## Gene_e 0.7332521 Kidney 5.850658
+## Gene_f 3.8519471 Liver 6.732781
+## Gene_g 11.1733928 Liver 10.405203
+## Gene_h 9.9032500 Kidney 10.201357
+
+orderedExpression <- geneExpression[order(geneExpression$mean_expr,decreasing=T),]
+filteredExpression <- orderedExpression[orderedExpression$mean_expr>5,]
+expressionDF <- cbind(rownames(filteredExpression),filteredExpression)
+colnames(expressionDF)[1] <- "geneNames"
+write.table(expressionDF,"orderedExpression.txt",sep=",",col.names = T,row.names=F)
-geneExpression <- read.table("data/GeneExpressionWithMethods.txt",h=T,sep="\t",row.names=1,skip=3)
-geneExpression
geneExpression <- read.table("data/GeneExpressionWithMethods.txt",h=T,sep="\t",row.names=1,skip=3)
+geneExpression
## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
## Gene_b 6.444368 5.896076 2.592581 5.089549 3.624812
@@ -486,8 +540,8 @@
+geneExpression <- read.table("data/GeneExpressionWithNotes.txt",h=T,sep="\t",row.names=1,comment.char = ">")
-geneExpression
geneExpression <- read.table("data/GeneExpressionWithNotes.txt",h=T,sep="\t",row.names=1,comment.char = ">")
+geneExpression
## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
## Gene_b 6.444368 5.896076 2.592581 5.089549 3.624812
@@ -506,16 +560,6 @@
diff --git a/docs/exercises/answers/Functions_answers.html b/docs/exercises/answers/Functions_answers.html
index 8026aa8..f85ece8 100644
--- a/docs/exercises/answers/Functions_answers.html
+++ b/docs/exercises/answers/Functions_answers.html
@@ -439,6 +439,7 @@ geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
-orderedExpression <- geneExpression[order(geneExpression$Sample_1.hi,decreasing=T),]
-expressionDF <- cbind(rownames(orderedExpression),orderedExpression)
-colnames(expressionDF)[1] <- "geneNames"
-write.table(expressionDF,"orderedExpression.txt",sep=",",col.names = T,row.names=F)
Introduction
to R.
Exercise 1 - Functions
– Create a function which takes one number and returns the square of that number
## [1] 2
meanOfVectors <- function(vector,vector2){
- meanOfVectorsRes <- mean(c(vector,vector2))
- return(meanOfVectorsRes)
-}
-
-meanOfVectors(1,3)
## [1] 2
+ bigvector <- c(vector,vector2)
+ message(paste("The total length of my vectors is", length(bigvector)))
+ meanOfVectorsRes <- mean(bigvector )
+ return(meanOfVectorsRes)
+}
+
+meanOfVectors(c(1,3,4),c(3,6,1,7,9))
+## The total length of my vectors is 8
+## [1] 4.25
dfAndVecReturn <- function(number,number2){
- input <- c(number,number2)
- df <- data.frame(mean=mean(c(number,number2)),
- sum=number+number2,
- multiple=number*number2
- )
- return(list(input,df))
-}
-
-dfAndVecReturn(1,3)
dfAndVecReturn <- function(number,number2){
+ input <- c(number,number2)
+ df <- data.frame(mean=mean(c(number,number2)),
+ sum=number+number2,
+ multiple=number*number2
+ )
+ return(list(input,df))
+}
+
+dfAndVecReturn(1,3)
## [[1]]
## [1] 1 3
##
@@ -489,48 +495,77 @@ findSmallestFactorial <- function(x){
- factorialAnswer <- 0
- count <- 0
- while(factorialAnswer <= x){
- count <- count+1
- if(count == 1){
- factorialAnswer <- 1
- }else{
- factorialAnswer <- factorialAnswer * count
- }
- }
- return(count)
-}
-
-findSmallestFactorial(3000)
+findSmallestFactorial <- function(x){
+ factorialAnswer <- 0
+ count <- 0
+ while(factorialAnswer <= x){
+ count <- count+1
+ if(count == 1){
+ factorialAnswer <- 1
+ }else{
+ factorialAnswer <- factorialAnswer * count
+ }
+ }
+ return(count)
+}
+
+findSmallestFactorial(3000)
## [1] 7
-
+
## [1] 70
– Add a if and else statement in your function to only calculate
factorial code if argument is a numeric.
-findSmallestFactorial <- function(x){
- if(!is.numeric(x)){
- message("Please provide a numeric argument!")
- }else{
- factorialAnswer <- 0
- count <- 0
- while(factorialAnswer <= x){
- count <- count+1
- if(count == 1){
- factorialAnswer <- 1
- }else{
- factorialAnswer <- factorialAnswer * count
- }
- }
- return(count)
- }
-}
-
-findSmallestFactorial(3000)
+findSmallestFactorial <- function(x){
+ if(!is.numeric(x)){
+ message("Please provide a numeric argument!")
+ }else{
+ factorialAnswer <- 0
+ count <- 0
+ while(factorialAnswer <= x){
+ count <- count+1
+ if(count == 1){
+ factorialAnswer <- 1
+ }else{
+ factorialAnswer <- factorialAnswer * count
+ }
+ }
+ return(count)
+ }
+}
+
+findSmallestFactorial(3000)
## [1] 7
-
+
## Please provide a numeric argument!
+Exercise 2 - Scripts
+Lets try to put as much together that we have learnt thus far. This
+will be a multistep challenge. Break it down and use pseudocode to help.
+Start by working the code interactively, then turn it into a script.
+
+- Read in the “data/GeneExpression.txt” dataset.
+- Use a apply to calculate the Z score for each gene (per row). The
+zscore is (gene_expression - mean)/standard deviation. You should use a
+function to do this calculation.
+- Find which gene has the highest absolute max Zscore. This is a very
+rough proxy for the variability of that gene.
+- Print out the gene name with the highest value
+- Turn this into a script and run the script
+- Think about what modifications you would need to make in order to
+accept a different data set as input.
+
+geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
+geneExpression <-as.matrix(geneExpression)
+zscores <- function(x){
+ my_mean <- mean(x)
+ my_sd <- sd(x)
+ my_z <- (x-my_mean)/my_sd
+ return(my_z)}
+
+my_zs <- apply(geneExpression,2, zscores)
+
+inds <- which(my_zs == max(my_zs), arr.ind = T)
+rownames(geneExpression)[inds[,1]]
+## [1] "Gene_h"
diff --git a/r_course/exercises/answers/Dataframes_answers.html b/docs/exercises/answers/Lists_answers.html
similarity index 98%
rename from r_course/exercises/answers/Dataframes_answers.html
rename to docs/exercises/answers/Lists_answers.html
index 1ded227..86ec741 100644
--- a/r_course/exercises/answers/Dataframes_answers.html
+++ b/docs/exercises/answers/Lists_answers.html
@@ -12,7 +12,7 @@
-Factors and Data frames
+Lists
@@ -337,22 +437,68 @@ Introduction
+These exercises are about reading and writing data sections of Introduction
to R.
Exercise 1
Have a look at all files in notepad or excel before attempting to
read. All files can be found in the “data” directory.
-Read in the tab delimited file “GeneExpression.txt”.
-Find the mean expression of all samples.
+- Check your current working directory. Set your working directory to
+be in the downloaded course material. Specifically the r_course
+subdirectory. [The exact path will depend on where you have save your
+download]
+
+
+[1] "/Users/mattpaul"
+
+
+- Read in the tab delimited file “GeneExpression.txt”. Check the data
+type.
-## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low Sample_1.low
-## 7.514996 6.774108 6.508127 6.262253 6.177761 6.017462
-## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low Sample_1.low
-## 7.514996 6.774108 6.508127 6.262253 6.177761 6.017462
+## [1] "data.frame" "list" "oldClass" "vector"
+## [5] "list_OR_List" "vector_OR_factor" "vector_OR_Vector"
-- Read in the tab delimited file “GeneExpressionWithMethods.txt”. This
-file contains information on analysis steps used to produce file.
+Coerce the data frame to a matrix
+Find the mean expression of all genes.
+
+## Gene_a Gene_b Gene_c Gene_d Gene_e Gene_f Gene_g Gene_h
+## 4.660569 4.379796 4.259824 5.849420 5.850658 6.732781 10.405203 10.201357
+
+- Coerce the matrix back to a data frame. Add an additional column
+with extra gene info
+“Kidney”,“Adrenal”,“Liver”,“Adrenal”,“Kidney”,“Liver”,“Liver”,“Kidney”.
+Also add the mean expression as a column.
+
+geneExpression <- as.data.frame(geneExpression)
+geneExpression$tissue_localisation <- factor(c("Kidney","Adrenal","Liver","Adrenal","Kidney","Liver","Liver","Kidney"))
+geneExpression$mean_expr <- sampleMeans
+geneExpression
+## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
+## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
+## Gene_b 6.444368 5.896076 2.592581 5.089549 3.624812
+## Gene_c 3.083392 3.414723 3.706069 4.535536 5.104273
+## Gene_d 4.726498 3.023746 3.033173 8.017895 8.098800
+## Gene_e 9.909185 9.174323 9.957153 2.053501 3.276533
+## Gene_f 10.680459 9.951243 8.985412 3.360963 3.566663
+## Gene_g 10.516534 10.176163 9.778173 11.781520 9.005437
+## Gene_h 9.017020 9.342291 9.895636 12.046704 11.003240
+## Sample_1.low tissue_localisation mean_expr
+## Gene_a 5.9350948 Kidney 4.660569
+## Gene_b 2.6313925 Adrenal 4.379796
+## Gene_c 5.7149521 Liver 4.259824
+## Gene_d 8.1964109 Adrenal 5.849420
+## Gene_e 0.7332521 Kidney 5.850658
+## Gene_f 3.8519471 Liver 6.732781
+## Gene_g 11.1733928 Liver 10.405203
+## Gene_h 9.9032500 Kidney 10.201357
+
+Read in tab delimited file “GeneExpression.txt”. Order genes by
+decreasing expression in mean expression. Filter out genes with
+expression below 5. Write out a new comma separated file with column
+titles.
+Read in the tab delimited file “GeneExpressionWithMethods.txt”.
+This file contains information on analysis steps used to produce
+file.
## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
@@ -394,11 +540,6 @@ Introduction
to R.
+
Exercise 1 - Functions
– Create a function which takes one number and returns the square of
that number
## [1] 9
– Create a function which takes two numbers and returns the mean
## [1] 2
-- Create a function which takes two vectors and returns the mean
+- Create a function which takes two vectors and returns the mean.
+Include a message statement that gives the total length of both
+vectors.
-## [1] 2
+## The total length of my vectors is 8
+## [1] 4.25
- Create a function which takes two numbers and returns the two
numbers as a vector and the mean, summary and multiple as a
@@ -367,6 +371,23 @@
+- Read in the “data/GeneExpression.txt” dataset.
+- Use a apply to calculate the Z score for each gene (per row). The
+zscore is (gene_expression - mean)/standard deviation. You should use a
+function to do this calculation.
+- Find which gene has the highest absolute max Zscore. This is a very
+rough proxy for the variability of that gene.
+- Print out the gene name with the highest value
+- Turn this into a script and run the script
+- Think about what modifications you would need to make in order to
+accept a different data set as input.
+
+## [1] "Gene_h"
diff --git a/r_course/exercises/exercises/Dataframes_exercise.html b/docs/exercises/exercises/Lists_exercise.html
similarity index 98%
rename from r_course/exercises/exercises/Dataframes_exercise.html
rename to docs/exercises/exercises/Lists_exercise.html
index a1cb488..86ec741 100644
--- a/r_course/exercises/exercises/Dataframes_exercise.html
+++ b/docs/exercises/exercises/Lists_exercise.html
@@ -12,7 +12,7 @@
-Factors and Data frames
+Lists
@@ -326,7 +426,7 @@
-Factors and Data frames
+Lists
Rockefeller University, Bioinformatics Resource
Centre
https://rockefelleruniversity.github.io/Intro_To_R_1Day/
@@ -337,61 +437,102 @@ Introduction
+These exercises are about the Lists sections of Introduction
to R.
-Exercise 1 - Data frames
+Exercise 1
+
+- Create a list containing a character vector, a numeric matrix and a
+data frame with 2 or more datatypes.
+
+firstElement <- c("A","B","C","D","E")
+secondElement <- matrix(1:5,nrow=5,ncol=5)
+thirdElement <- data.frame(Sample=c("Sample1","Sample2","Sample3","Sample4"), Age=c(25,21,24,25),factor=c("Smoker","Smoker","NonSmoker","Smoker"))
+
+my_list <- list(firstElement, secondElement, thirdElement)
+my_list
+## [[1]]
+## [1] "A" "B" "C" "D" "E"
+##
+## [[2]]
+## [,1] [,2] [,3] [,4] [,5]
+## [1,] 1 1 1 1 1
+## [2,] 2 2 2 2 2
+## [3,] 3 3 3 3 3
+## [4,] 4 4 4 4 4
+## [5,] 5 5 5 5 5
+##
+## [[3]]
+## Sample Age factor
+## 1 Sample1 25 Smoker
+## 2 Sample2 21 Smoker
+## 3 Sample3 24 NonSmoker
+## 4 Sample4 25 Smoker
+
+- Rename each entry: “my_vector”, “my_matrix”, “my_df”
+
+
+## $my_vector
+## [1] "A" "B" "C" "D" "E"
+##
+## $my_matrix
+## [,1] [,2] [,3] [,4] [,5]
+## [1,] 1 1 1 1 1
+## [2,] 2 2 2 2 2
+## [3,] 3 3 3 3 3
+## [4,] 4 4 4 4 4
+## [5,] 5 5 5 5 5
+##
+## $my_df
+## Sample Age factor
+## 1 Sample1 25 Smoker
+## 2 Sample2 21 Smoker
+## 3 Sample3 24 NonSmoker
+## 4 Sample4 25 Smoker
-Create data frame called Annotation with a column of gene names
-(“Gene_1”, “Gene_2”, “Gene_3”,“Gene_4”,“Gene_5”), ensembl gene names
-(“Ens001”, “Ens003”, “Ens006”, “Ens007”, “Ens010”), pathway information
-(“Glycolysis”, “TGFb”, “Glycolysis”, “TGFb”, “Glycolysis”) and gene
-lengths (100, 3000, 200, 1000,1200).
-Create data frame called Sample1 with ensembl gene names
-(“Ens001”, “Ens003”, “Ens006”, “Ens010”) and expression (1000, 3000,
-10000,5000)
-Create data frame called Sample2 with ensembl gene names
-(“Ens001”, “Ens003”, “Ens006”, “Ens007”,“Ens010”) and expression (1500,
-1500, 17000,500,10000)
-Create a data frame containing only those gene names common to
-all data frames with all information from Annotation and the expression
-from Sample 1 and Sample 2.
+- Access the second column of the dataframe. Try to access it in at
+least two different ways.
-## ensembl geneNames pathway geneLengths expression.x expression.y
-## 1 Ens001 Gene_1 Glycolysis 100 1000 1500
-## 2 Ens003 Gene_2 TGFb 3000 3000 1500
-## 3 Ens006 Gene_3 Glycolysis 200 10000 17000
-## 4 Ens010 Gene_5 Glycolysis 1200 5000 10000
+
+## [1] 25 21 24 25
+
+## [1] 25 21 24 25
-- Add an extra two columns containing the length normalised
-expressions for Sample 1 and Sample 2.
+- Add a list into the fourth slot of our list. This list should
+contain 2 numeric vectors. The resulting list should be 4 long.
+Check.
-Hint see our previous
-exercises
-## ensembl geneNames pathway geneLengths expression.x expression.y
-## 1 Ens001 Gene_1 Glycolysis 100 1000 1500
-## 2 Ens003 Gene_2 TGFb 3000 3000 1500
-## 3 Ens006 Gene_3 Glycolysis 200 10000 17000
-## 4 Ens010 Gene_5 Glycolysis 1200 5000 10000
-## Sample1_lne Sample2_lne
-## 1 10.000000 15.000000
-## 2 1.000000 0.500000
-## 3 50.000000 85.000000
-## 4 4.166667 8.333333
+
+## [1] 4
-- Identify the mean length normalised expression across Sample1 and
-Sample2 for Ens006 genes
+- Access the second vector within the sublist in the 4th
+position.
-## [1] 50
+
+## [1] 2 3 4 5 6
-- For all genes, identify the log2 fold change in length normalised
-expression from Sample 1 to Sample 2.
+- Create a list with three numeric vectors: c(5,2,9), c(13,12,6),
+c(1,3,4)
-## Gene_1 Gene_2 Gene_3 Gene_5
-## 0.5849625 -1.0000000 0.7655347 1.0000000
+
-- Identify the total length of genes in Glycolysis pathway.
+- Flatten the list and turn it into a matrix.
-## [1] 1500
+flat_list <- unlist(my_list)
+listAsMat <- matrix(flat_list,
+ nrow=length(my_list),
+ ncol=3,
+ byrow=T,
+ dimnames=list(names(my_list)))
+listAsMat
+## [,1] [,2] [,3]
+## First 5 2 9
+## Second 13 12 6
+## Third 1 3 4
diff --git a/r_course/exercises/exercises/vector_exercise.html b/docs/exercises/exercises/Vectors_exercise.html
similarity index 100%
rename from r_course/exercises/exercises/vector_exercise.html
rename to docs/exercises/exercises/Vectors_exercise.html
diff --git a/docs/exercises/exercises/conditionsAndLoops_exercise.html b/docs/exercises/exercises/conditionsAndLoops_exercise.html
index 728e753..eddba83 100644
--- a/docs/exercises/exercises/conditionsAndLoops_exercise.html
+++ b/docs/exercises/exercises/conditionsAndLoops_exercise.html
@@ -339,12 +339,34 @@ Introduction
to R.
-
– Calculate the factorial (factorial of 3 = 3 * 2 * 1) of 10 using a
-loop.
-## [1] 3628800
-– Adjusting your answer from before, what is the first number that
-has a factorial greater than 1000.
-## [1] 7
+Exercise 1 - If Else
+
+- Build an if statement that test if x is a negative number. Write a
+print statement that will confirm if x is less than 0. Test it with
+several values of x.
+
+## [1] "It's a negative number!"
+
+- Modify the previous if statement to include an else. Ensure a print
+statement is returned to say if the number is not negative.
+
+## [1] "It's not a negative number!"
+## [1] "It's a negative number!"
+
+- Finally add a else if statement. We want a response to confirm if x
+is negative, positive or if it is zero.
+
+## [1] "It's a negative number!"
+## [1] "It's zero"
+## [1] "It's a positive number!"
+
+- Build an if/else statement that test if a variable is odd/even.
+Include x in the printed output.
+
+Hint: The modulus operator may be useful here i.e. x%%2 returns
+the remainder after the value of x is divided by 2.
+## [1] "1 is odd"
+## [1] "2 is even"
– Using an ifelse() expression, create a factor from a vector of 1 to
40 where all numbers less than 10 are “small”,10 to 30 are “mid”,31 to
40 are “big”
@@ -355,8 +377,19 @@
+code {
+white-space: pre;
+}
+.sourceCode {
+overflow: visible;
+}
+
+
+
@@ -337,11 +437,49 @@ Introduction
+These exercises are about the factors and data frames sections of Introduction
to R.
-These exercises cover the factors and data frames sections of Introduction to R.
Exercise 1 - Factors
+- Create a nominal factor called CellType containing:
+“DC1”,“DC1”,“DC1”,“NK”,“NK”,“Mono”,“Mono”,“DC2”,“NK”
+
+## [1] DC1 DC1 DC1 NK NK Mono Mono DC2 NK
+## Levels: DC1 DC2 Mono NK
+
+- Modify the the third position of CellType to “Neu”, by modifying the
+levels of the factor.
+
+## [1] DC1 DC1 Neu NK NK Mono Mono DC2 NK
+## Levels: DC1 DC2 Mono NK Neu
+
+- Create CellType2 with the same entries, but directly specify the
+levels to include: “DC1”, “DC2”, “Mono”, “NK”, “Neu”, “Bcell”,
+“Tcell”.
+
+## [1] DC1 DC1 DC1 NK NK Mono Mono DC2 NK
+## Levels: DC1 DC2 Mono NK Neu Bcell Tcell
+
+- Use combine to increase the length of CellType2 to include:
+“Neu”,“Neu”,“Bcell”,“DC1”
+
+## [1] DC1 DC1 DC1 NK NK Mono Mono DC2 NK Neu Neu Bcell
+## [13] DC1
+## Levels: DC1 DC2 Mono NK Neu Bcell Tcell
+
+- Summarize the number of entries for each cell type.
+
+
+## DC1 DC2 Mono NK Neu Bcell Tcell
+## 4 1 2 3 2 1 0
+
+- Reorder the summary to alphabetical order
+
+
+## Bcell DC1 DC2 Mono Neu NK Tcell
+## 4 1 2 3 2 1 0
+
- Create a ordinal factor named “Height” containing – high, low, mid,
low, mid, low, mid, high, mid, high.
@@ -361,20 +499,65 @@
-
@@ -423,13 +412,13 @@
- conditionsAndLoops
+ ConditionsAndLoops
@@ -481,7 +470,7 @@ Course Overview
Course Integrity
-This course is compiled automatically on 2024-12-10
+This course is compiled automatically on 2024-12-11
The course is tested and available on MacOS, Windows and
diff --git a/docs/presentations/singlepage/introToR_Session1.html b/docs/presentations/singlepage/introToR_Session1.html
index ba083d8..8c6933c 100644
--- a/docs/presentations/singlepage/introToR_Session1.html
+++ b/docs/presentations/singlepage/introToR_Session1.html
@@ -2084,11 +2084,11 @@ Combining logical vectors
-
-Answers to exercise.
-Answers can be found here here
+
+Answers to exercise
+Answers can be found here here
@@ -2395,11 +2395,11 @@ Data types
Time for an exercise!
-Exercise on matrices can be found here
+Exercise on matrices can be found here
-
-Answers to exercise.
-Answers can be found here here
+
+Answers to exercise
+Answers can be found here here
@@ -2453,8 +2453,7 @@ Display order of levels
Nominal factors
In some cases there is no natural order to the categories such that
-one category is greater than the other (nominal data). In this case we
-can see that R is gender neutral.
+one category is greater than the other (nominal data).
factorExample <- factor(vectorExample, levels=c("male","female"))
factorExample[1] < factorExample[2]
## Warning in Ops.factor(factorExample[1], factorExample[2]): '<' not meaningful
@@ -2798,11 +2797,11 @@ Merging data frames
Time for an exercise!
-Exercise on data frames can be found here
+Exercise on data frames can be found here
-
-Answers to exercise.
-Answers can be found here here
+
+Answers to exercise
+Answers can be found here here
@@ -2953,6 +2952,14 @@ Flattening lists to matrices
## Second 2 6 7
## Third 1 4 7
+
+Time for an exercise!
+Exercise on matrices can be found here
+
+
+Answers to exercise
+Answers can be found here here
+
Coercing data formats
@@ -3075,7 +3082,7 @@ More complex objects
manage Dates or Times.
-## [1] "2024-12-10 21:07:46 UTC"
+## [1] "2024-12-11 01:00:32 UTC"
More complex objects
@@ -3103,11 +3110,11 @@ More complex objects
More complex objects
We can also use the arithmetic operations with our time objects.
-## [1] "2024-12-10 21:07:46 UTC"
+## [1] "2024-12-11 01:00:32 UTC"
-## [1] "2024-12-10 21:05:46 UTC"
+## [1] "2024-12-11 00:58:32 UTC"
-## Time difference of 0.1043649 secs
+## Time difference of 0.1018391 secs
More complex objects
@@ -3118,18 +3125,18 @@ More complex objects
We can also change the timezone by specifying a tz
parameter
-## [1] "21 O'Clock PM Tuesday on December 10th"
+## [1] "01 O'Clock AM Wednesday on December 11th"
-## [1] "21 O'Clock PM Tuesday on December 10th"
+## [1] "01 O'Clock AM Wednesday on December 11th"
Complex to base objects
Most of the time we can convert more complex object back to our basic
object types we are more familar with.
-## [1] "2024-12-10 21:07:46.094567"
+## [1] "2024-12-11 01:00:32.629524"
-## [1] 0.1043649
+## [1] 0.1018391
Complex objects summary
@@ -3238,13 +3245,13 @@ Data from external sources
Gene_a
-3.973947
+3.658047
-3.930245
+5.530241
-2.474149
+3.272248
@@ -3252,13 +3259,13 @@ Data from external sources
Gene_b
-3.461118
+3.778873
-3.985219
+5.652030
-5.081024
+3.694406
@@ -3266,13 +3273,13 @@ Data from external sources
Gene_c
-4.363544
+3.918055
-4.808006
+4.841402
-4.068423
+5.957884
@@ -3280,13 +3287,13 @@ Data from external sources
Gene_d
-5.089677
+3.223094
-1.477786
+3.130542
-4.949975
+4.522371
@@ -3294,13 +3301,13 @@ Data from external sources
Gene_e
-9.478252
+10.369538
-10.525974
+10.172782
-9.489895
+10.039658
@@ -3308,13 +3315,13 @@ Data from external sources
Gene_f
-9.167348
+11.044072
-10.601165
+10.561451
-9.324316
+9.909005
@@ -3322,13 +3329,13 @@ Data from external sources
Gene_g
-9.399451
+8.915426
-11.443669
+11.178636
-9.400897
+10.473599
@@ -3336,13 +3343,13 @@ Data from external sources
Gene_h
-11.166681
+9.649608
-11.069730
+10.231282
-9.805737
+10.386836
@@ -3655,11 +3662,11 @@ Save and read data
remembers the objects original name i.e. Table or myList.
-
+
Time for an exercise!
Exercise on reading and writing data can be found here
-
+
Answers to exercise
Answers can be found here
diff --git a/docs/presentations/singlepage/introToR_Session2.html b/docs/presentations/singlepage/introToR_Session2.html
index eb84eb9..65a321d 100644
--- a/docs/presentations/singlepage/introToR_Session2.html
+++ b/docs/presentations/singlepage/introToR_Session2.html
@@ -2151,11 +2151,11 @@ sapply() example 3
-## Mean is 0.389048273564465
-## [1] 0.3648576
+## Mean is 0.416867907369881
+## [1] -0.002603881
Debugging functions
@@ -2406,10 +2406,10 @@ Debugging functions
Custom functions and apply
These custom functions can also be utilized with apply.
-## [1] 0.36485764 1.15853052 -0.87955629 0.30247335 -0.27396124 -0.41809173
-## [7] 0.52441334 0.69940853 1.12897715 0.07544800 -1.69037492 1.56788608
-## [13] -1.27228116 1.51188113 -0.05544895 0.88612324 -0.78095665 -1.69200058
-## [19] -0.90606862 -0.25125886
+## [1] -0.002603881 -0.128616732 0.538543594 1.050796185 -0.562454904
+## [6] 1.331683190 -0.208345806 -0.621922028 -2.431404457 -0.149177218
+## [11] -0.905864425 0.441651262 -0.378431902 -0.195380445 -1.361557140
+## [16] 1.155218949 -0.223992938 2.123924129 0.421056362 0.106878204
@@ -2515,7 +2515,7 @@ Sourcing scripts
}
-## [1] "Tue"
+## [1] "Wed"
Rscript
diff --git a/docs/presentations/slides/introToR_Session1.html b/docs/presentations/slides/introToR_Session1.html
index 173c79a..c10ab48 100644
--- a/docs/presentations/slides/introToR_Session1.html
+++ b/docs/presentations/slides/introToR_Session1.html
@@ -1020,14 +1020,14 @@
## Time for an exercise!
-Exercise on vectors can be found [here](../../exercises/exercises/vector_exercise.html)
+Exercise on vectors can be found [here](../../exercises/exercises/Vectors_exercise.html)
---
-## Answers to exercise.
+## Answers to exercise
-Answers can be found here [here](../../exercises/answers/vector_answers.html)
+Answers can be found here [here](../../exercises/answers/Vectors_answers.html)
---
@@ -1583,14 +1583,14 @@
## Time for an exercise!
-Exercise on matrices can be found [here](../../exercises/exercises/matrices_exercise.html)
+Exercise on matrices can be found [here](../../exercises/exercises/Matrices_exercise.html)
---
-## Answers to exercise.
+## Answers to exercise
-Answers can be found here [here](../../exercises/answers/matrices_answers.html)
+Answers can be found here [here](../../exercises/answers/Matrices_answers.html)
@@ -1690,7 +1690,6 @@
## Nominal factors
In some cases there is no natural order to the categories such that one category is greater than the other (nominal data).
-In this case we can see that R is gender neutral.
``` r
@@ -2264,13 +2263,13 @@
---
## Time for an exercise!
-Exercise on data frames can be found [here](../../exercises/exercises/factorsAndDataframes_exercise.html)
+Exercise on data frames can be found [here](../../exercises/exercises/FactorsAndDataframes_exercise.html)
---
-## Answers to exercise.
+## Answers to exercise
-Answers can be found here [here](../../exercises/answers/factorsAndDataframes_answers.html)
+Answers can be found here [here](../../exercises/answers/FactorsAndDataframes_answers.html)
---
@@ -2495,6 +2494,21 @@
## Third 1 4 7
```
+---
+## Time for an exercise!
+
+
+Exercise on matrices can be found [here](../../exercises/exercises/Lists_exercise.html)
+
+
+---
+## Answers to exercise
+
+
+Answers can be found here [here](../../exercises/answers/Lists_answers.html)
+
+
+
---
class: inverse, center, middle
@@ -2708,7 +2722,7 @@
```
```
-## [1] "2024-12-10 21:07:43 UTC"
+## [1] "2024-12-11 01:00:30 UTC"
```
---
@@ -2763,7 +2777,7 @@
```
```
-## [1] "2024-12-10 21:07:43 UTC"
+## [1] "2024-12-11 01:00:30 UTC"
```
``` r
@@ -2771,7 +2785,7 @@
```
```
-## [1] "2024-12-10 21:05:43 UTC"
+## [1] "2024-12-11 00:58:30 UTC"
```
``` r
@@ -2779,7 +2793,7 @@
```
```
-## Time difference of 0.1014812 secs
+## Time difference of 0.09647393 secs
```
---
@@ -2797,7 +2811,7 @@
```
```
-## [1] "21 O'Clock PM Tuesday on December 10th"
+## [1] "01 O'Clock AM Wednesday on December 11th"
```
``` r
@@ -2805,7 +2819,7 @@
```
```
-## [1] "21 O'Clock PM Tuesday on December 10th"
+## [1] "01 O'Clock AM Wednesday on December 11th"
```
---
@@ -2819,7 +2833,7 @@
```
```
-## [1] "2024-12-10 21:07:43.989243"
+## [1] "2024-12-11 01:00:30.832828"
```
``` r
@@ -2827,7 +2841,7 @@
```
```
-## [1] 0.1014812
+## [1] 0.09647393
```
---
@@ -2944,51 +2958,51 @@
<tbody>
<tr>
<td style="text-align:left;"> Gene_a </td>
- <td style="text-align:right;"> 3.423364 </td>
- <td style="text-align:right;"> 2.551130 </td>
- <td style="text-align:right;"> 2.575654 </td>
+ <td style="text-align:right;"> 4.429622 </td>
+ <td style="text-align:right;"> 3.363374 </td>
+ <td style="text-align:right;"> 4.520210 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_b </td>
- <td style="text-align:right;"> 4.080952 </td>
- <td style="text-align:right;"> 5.865549 </td>
- <td style="text-align:right;"> 3.958085 </td>
+ <td style="text-align:right;"> 4.114483 </td>
+ <td style="text-align:right;"> 3.992814 </td>
+ <td style="text-align:right;"> 2.877940 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_c </td>
- <td style="text-align:right;"> 4.473275 </td>
- <td style="text-align:right;"> 3.771709 </td>
- <td style="text-align:right;"> 4.611821 </td>
+ <td style="text-align:right;"> 3.521606 </td>
+ <td style="text-align:right;"> 3.215211 </td>
+ <td style="text-align:right;"> 5.199799 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_d </td>
- <td style="text-align:right;"> 3.415446 </td>
- <td style="text-align:right;"> 3.817052 </td>
- <td style="text-align:right;"> 2.907351 </td>
+ <td style="text-align:right;"> 3.840954 </td>
+ <td style="text-align:right;"> 3.860644 </td>
+ <td style="text-align:right;"> 3.688784 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_e </td>
- <td style="text-align:right;"> 11.960879 </td>
- <td style="text-align:right;"> 9.113075 </td>
- <td style="text-align:right;"> 9.927639 </td>
+ <td style="text-align:right;"> 11.896069 </td>
+ <td style="text-align:right;"> 10.056214 </td>
+ <td style="text-align:right;"> 10.062543 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_f </td>
- <td style="text-align:right;"> 9.821113 </td>
- <td style="text-align:right;"> 8.704334 </td>
- <td style="text-align:right;"> 9.940262 </td>
+ <td style="text-align:right;"> 9.947901 </td>
+ <td style="text-align:right;"> 8.884012 </td>
+ <td style="text-align:right;"> 8.773162 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_g </td>
- <td style="text-align:right;"> 10.763196 </td>
- <td style="text-align:right;"> 9.869513 </td>
- <td style="text-align:right;"> 9.393374 </td>
+ <td style="text-align:right;"> 9.640603 </td>
+ <td style="text-align:right;"> 10.470026 </td>
+ <td style="text-align:right;"> 9.591002 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_h </td>
- <td style="text-align:right;"> 11.401766 </td>
- <td style="text-align:right;"> 9.550342 </td>
- <td style="text-align:right;"> 11.530905 </td>
+ <td style="text-align:right;"> 11.954618 </td>
+ <td style="text-align:right;"> 10.533096 </td>
+ <td style="text-align:right;"> 11.728931 </td>
</tr>
</tbody>
</table>
diff --git a/docs/presentations/slides/introToR_Session2.html b/docs/presentations/slides/introToR_Session2.html
index dfdd2ed..cc513c9 100644
--- a/docs/presentations/slides/introToR_Session2.html
+++ b/docs/presentations/slides/introToR_Session2.html
@@ -1027,13 +1027,13 @@
## Time for an exercise!
-Exercise on loops and conditional branching can be found [here](../..//exercises/exercises/conditionsAndLoops_exercise.html)
+Exercise on loops and conditional branching can be found [here](../..//exercises/exercises/ConditionsAndLoops_exercise.html)
---
## Answers to exercise
-Answers can be found here [here](../..//exercises/answers/conditionsAndLoops_answers.html)
+Answers can be found here [here](../..//exercises/answers/ConditionsAndLoops_answers.html)
---
@@ -1388,11 +1388,11 @@
```
```
-## Mean is -0.0417158165317701
+## Mean is -0.138706455190791
```
```
-## [1] -0.555588
+## [1] 1.020659
```
---
@@ -1417,10 +1417,10 @@
```
```
-## [1] -0.5555880 -0.1322663 0.1899138 -0.2299799 -1.6904861 0.4030488
-## [7] 0.9826960 -0.1899368 1.5052016 1.1021064 -0.8039631 -0.1358766
-## [13] -0.1283686 0.4864651 0.7693742 -2.5554399 1.2469762 -1.0641361
-## [19] 0.5323066 0.2679527
+## [1] 1.02065900 0.47143522 -0.11262739 0.35643079 -1.19629115 -0.07521402
+## [7] -0.55040037 0.13963265 1.40238593 -1.51405434 -1.32555234 -1.04840617
+## [13] 1.84708611 0.46460992 1.63750080 -0.51232894 -0.95341254 0.89065505
+## [19] -0.51716384 -0.42494437
```
@@ -1569,7 +1569,7 @@
```
```
-## [1] "Tue"
+## [1] "Wed"
```
---
diff --git a/r_course/_site.yml b/r_course/_site.yml
index e320864..42727c8 100644
--- a/r_course/_site.yml
+++ b/r_course/_site.yml
@@ -24,42 +24,36 @@ navbar:
href: presentations/r_code/introToR_Session2.R
- text: Exercises
menu:
- - text: vector
+ - text: Vectors
menu:
- text: Exercise
- href: exercises/exercises/vector_exercise.html
+ href: exercises/exercises/Vectors_exercise.html
- text: Solution
- href: exercises/answers/vector_answers.html
- - text: matrices
+ href: exercises/answers/Vectors_answers.html
+ - text: Matrices
menu:
- text: Exercise
- href: exercises/exercises/matrices_exercise.html
+ href: exercises/exercises/Matrices_exercise.html
- text: Solution
- href: exercises/answers/matrices_answers.html
- - text: factorsAndDataframes
+ href: exercises/answers/Matrices_answers.html
+ - text: FactorsAndDataframes
menu:
- text: Exercise
- href: exercises/exercises/factorsAndDataframes_exercise.html
+ href: exercises/exercises/FactorsAndDataframes_exercise.html
- text: Solution
- href: exercises/answers/factorsAndDataframes_answers.html
- - text: Dataframes
- menu:
- - text: Exercise
- href: exercises/exercises/Dataframes_exercise.html
- - text: Solution
- href: exercises/answers/Dataframes_answers.html
+ href: exercises/answers/FactorsAndDataframes_answers.html
- text: DataInputOutput
menu:
- text: Exercise
href: exercises/exercises/DataInputOutput_exercise.html
- text: Solution
href: exercises/answers/DataInputOutput_answers.html
- - text: conditionsAndLoops
+ - text: ConditionsAndLoops
menu:
- text: Exercise
- href: exercises/exercises/conditionsAndLoops_exercise.html
+ href: exercises/exercises/ConditionsAndLoops_exercise.html
- text: Solution
- href: exercises/answers/conditionsAndLoops_answers.html
+ href: exercises/answers/ConditionsAndLoops_answers.html
- text: Functions
menu:
- text: Exercise
diff --git a/r_course/data/writeThisMultipleXLSX.xlsx b/r_course/data/writeThisMultipleXLSX.xlsx
index ae4f2172b4a819087ee7702eef73aa481008d9ca..f53e6846f5af336896ef29e9bee469c8c2a818c0 100644
GIT binary patch
delta 334
zcmV-U0kQtAGORMNqZ0++mITEllc*Caf5|ST2-|_S2GN3`42-Z+nXW+}%X$8YvJknEytf7#w1t3X?OiP#
z6V}6rtHCo1iA+Fbu^|
zjn#sN`ZXZCjH9WVUc|w$Seh5
z`?yEtvwD76&QV*E@;;|!PU(_R97YJM%J1FCi~bW(ABNmaJOcK)FF!&14^T@31e4zw
g8nXu#qXPxsmITElldl+50qBzi86yUg761SM0F}?50RR91
delta 335
zcmV-V0kHn8GOaSOqZ0+YUf6CWlc*Caf6EDJ5w-(u$*F-tQ(Afx(#8?Bevs5Ue)}p;
z;w0cxkE@yec6L#@+^aVE1?Q#F6(U)V642ZTshbMDZ&v98N_=2iu-0g(P!Aq0>${R$
z%8i3pXDqlN!6y-|ASyAUkJ*z+AI=CkO~QQR7*Scs)-ux0$4HGqpeUtk4+e+d>C
zJ^)iY6441M_=vPWt{nh^V`xEv4jz+?;EOkaQ+~2xP_CRxhTg)Y^(>C=y_ci0>$0|b-a
h7#gz%6{72GN3`42-Z+nXW+}%X$8YvJknEytf7#w1t3X?OiP#
z6V}6rtHCo1iA+Fbu^|
zjn#sN`ZXZCjH9WVUc|w$Seh5
z`?yEtvwD76&QV*E@;;|!PU(_R7)A)I%J1FCi~bW(ABNmaJOcK)FF!&153{}!b^`_9
YmITEllQ|Yt0lkxs79$2?6aWAK085;lpa1{>
delta 327
zcmV-N0l5CQEw?SOco7A=Uf6CWlYJ2?f6EDJ5w-(u$*F-tQ(Afx(#8?Bevs5Ue)}p;
z;w0cxkE@yec6L#@+^aVE1?Q#F6(U)V642ZTshbMDZ&v98N_=2iu-0g(P!Aq0>${R$
z%8i3pXDqlN!6y-|ASyAUkJ*z+AI=CkO~QQR7*Scs)-ux0$4HGqpeUtk4+e+d>C
zJ^)iY6441M_=vPWt{nh^V`xEv4jz+?;EOkaQ+~2xP_CRxhTg)Y^(>C=y_ci0>$Introduction
+These exercises are about reading and writing data sections of Introduction
to R.
Exercise 1
Have a look at all files in notepad or excel before attempting to
read. All files can be found in the “data” directory.
-- Read in the tab delimited file “GeneExpression.txt”.
+- Check your current working directory. Set your working directory to
+be in the downloaded course material. Specifically the r_course
+subdirectory. [The exact path will depend on where you have save your
+download]
-
+
+[1] "/Users/mattpaul"
+
-- Find the mean expression of all samples.
+- Read in the tab delimited file “GeneExpression.txt”. Check the data
+type.
-sampleMeans <- c(mean(geneExpression[,1]),mean(geneExpression[,2]),mean(geneExpression[,3]),mean(geneExpression[,4]),mean(geneExpression[,5]),mean(geneExpression[,6]))
-names(sampleMeans) <- colnames(geneExpression)
-sampleMeans
-## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low Sample_1.low
-## 7.514996 6.774108 6.508127 6.262253 6.177761 6.017462
-
-## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low Sample_1.low
-## 7.514996 6.774108 6.508127 6.262253 6.177761 6.017462
+geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
+is(geneExpression)
+## [1] "data.frame" "list" "oldClass" "vector"
+## [5] "list_OR_List" "vector_OR_factor" "vector_OR_Vector"
+
+- Coerce the data frame to a matrix
+
+
+
+- Find the mean expression of all genes.
+
+sampleMeans <- c(mean(geneExpression[1,]),mean(geneExpression[2,]),mean(geneExpression[3,]),mean(geneExpression[4,]),mean(geneExpression[5,]),mean(geneExpression[6,]),mean(geneExpression[7,]),mean(geneExpression[8,]))
+names(sampleMeans) <- rownames(geneExpression)
+sampleMeans
+## Gene_a Gene_b Gene_c Gene_d Gene_e Gene_f Gene_g Gene_h
+## 4.660569 4.379796 4.259824 5.849420 5.850658 6.732781 10.405203 10.201357
+
+
+- Coerce the matrix back to a data frame. Add an additional column
+with extra gene info
+“Kidney”,“Adrenal”,“Liver”,“Adrenal”,“Kidney”,“Liver”,“Liver”,“Kidney”.
+Also add the mean expression as a column.
+
+geneExpression <- as.data.frame(geneExpression)
+geneExpression$tissue_localisation <- factor(c("Kidney","Adrenal","Liver","Adrenal","Kidney","Liver","Liver","Kidney"))
+geneExpression$mean_expr <- sampleMeans
+geneExpression
+## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
+## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
+## Gene_b 6.444368 5.896076 2.592581 5.089549 3.624812
+## Gene_c 3.083392 3.414723 3.706069 4.535536 5.104273
+## Gene_d 4.726498 3.023746 3.033173 8.017895 8.098800
+## Gene_e 9.909185 9.174323 9.957153 2.053501 3.276533
+## Gene_f 10.680459 9.951243 8.985412 3.360963 3.566663
+## Gene_g 10.516534 10.176163 9.778173 11.781520 9.005437
+## Gene_h 9.017020 9.342291 9.895636 12.046704 11.003240
+## Sample_1.low tissue_localisation mean_expr
+## Gene_a 5.9350948 Kidney 4.660569
+## Gene_b 2.6313925 Adrenal 4.379796
+## Gene_c 5.7149521 Liver 4.259824
+## Gene_d 8.1964109 Adrenal 5.849420
+## Gene_e 0.7332521 Kidney 5.850658
+## Gene_f 3.8519471 Liver 6.732781
+## Gene_g 11.1733928 Liver 10.405203
+## Gene_h 9.9032500 Kidney 10.201357
+
+- Read in tab delimited file “GeneExpression.txt”. Order genes by
+decreasing expression in mean expression. Filter out genes with
+expression below 5. Write out a new comma separated file with column
+titles.
+
+orderedExpression <- geneExpression[order(geneExpression$mean_expr,decreasing=T),]
+filteredExpression <- orderedExpression[orderedExpression$mean_expr>5,]
+expressionDF <- cbind(rownames(filteredExpression),filteredExpression)
+colnames(expressionDF)[1] <- "geneNames"
+write.table(expressionDF,"orderedExpression.txt",sep=",",col.names = T,row.names=F)
- Read in the tab delimited file “GeneExpressionWithMethods.txt”. This
file contains information on analysis steps used to produce file.
-geneExpression <- read.table("data/GeneExpressionWithMethods.txt",h=T,sep="\t",row.names=1,skip=3)
-geneExpression
+geneExpression <- read.table("data/GeneExpressionWithMethods.txt",h=T,sep="\t",row.names=1,skip=3)
+geneExpression
## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
## Gene_b 6.444368 5.896076 2.592581 5.089549 3.624812
@@ -486,8 +540,8 @@ geneExpression <- read.table("data/GeneExpressionWithNotes.txt",h=T,sep="\t",row.names=1,comment.char = ">")
-geneExpression
+geneExpression <- read.table("data/GeneExpressionWithNotes.txt",h=T,sep="\t",row.names=1,comment.char = ">")
+geneExpression
## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
## Gene_b 6.444368 5.896076 2.592581 5.089549 3.624812
@@ -506,16 +560,6 @@ geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
-orderedExpression <- geneExpression[order(geneExpression$Sample_1.hi,decreasing=T),]
-expressionDF <- cbind(rownames(orderedExpression),orderedExpression)
-colnames(expressionDF)[1] <- "geneNames"
-write.table(expressionDF,"orderedExpression.txt",sep=",",col.names = T,row.names=F)
diff --git a/r_course/exercises/answers/Functions_answers.html b/r_course/exercises/answers/Functions_answers.html
index 8026aa8..f85ece8 100644
--- a/r_course/exercises/answers/Functions_answers.html
+++ b/r_course/exercises/answers/Functions_answers.html
@@ -439,6 +439,7 @@ Introduction
to R.
+
Exercise 1 - Functions
– Create a function which takes one number and returns the square of
that number
## [1] 2
-- Create a function which takes two vectors and returns the mean
+- Create a function which takes two vectors and returns the mean.
+Include a message statement that gives the total length of both
+vectors.
meanOfVectors <- function(vector,vector2){
- meanOfVectorsRes <- mean(c(vector,vector2))
- return(meanOfVectorsRes)
-}
-
-meanOfVectors(1,3)
-## [1] 2
+ bigvector <- c(vector,vector2)
+ message(paste("The total length of my vectors is", length(bigvector)))
+ meanOfVectorsRes <- mean(bigvector )
+ return(meanOfVectorsRes)
+}
+
+meanOfVectors(c(1,3,4),c(3,6,1,7,9))
+## The total length of my vectors is 8
+## [1] 4.25
- Create a function which takes two numbers and returns the two
numbers as a vector and the mean, summary and multiple as a
data.frame.
-dfAndVecReturn <- function(number,number2){
- input <- c(number,number2)
- df <- data.frame(mean=mean(c(number,number2)),
- sum=number+number2,
- multiple=number*number2
- )
- return(list(input,df))
-}
-
-dfAndVecReturn(1,3)
+dfAndVecReturn <- function(number,number2){
+ input <- c(number,number2)
+ df <- data.frame(mean=mean(c(number,number2)),
+ sum=number+number2,
+ multiple=number*number2
+ )
+ return(list(input,df))
+}
+
+dfAndVecReturn(1,3)
## [[1]]
## [1] 1 3
##
@@ -489,48 +495,77 @@ findSmallestFactorial <- function(x){
- factorialAnswer <- 0
- count <- 0
- while(factorialAnswer <= x){
- count <- count+1
- if(count == 1){
- factorialAnswer <- 1
- }else{
- factorialAnswer <- factorialAnswer * count
- }
- }
- return(count)
-}
-
-findSmallestFactorial(3000)
+findSmallestFactorial <- function(x){
+ factorialAnswer <- 0
+ count <- 0
+ while(factorialAnswer <= x){
+ count <- count+1
+ if(count == 1){
+ factorialAnswer <- 1
+ }else{
+ factorialAnswer <- factorialAnswer * count
+ }
+ }
+ return(count)
+}
+
+findSmallestFactorial(3000)
## [1] 7
-
+
## [1] 70
– Add a if and else statement in your function to only calculate
factorial code if argument is a numeric.
-findSmallestFactorial <- function(x){
- if(!is.numeric(x)){
- message("Please provide a numeric argument!")
- }else{
- factorialAnswer <- 0
- count <- 0
- while(factorialAnswer <= x){
- count <- count+1
- if(count == 1){
- factorialAnswer <- 1
- }else{
- factorialAnswer <- factorialAnswer * count
- }
- }
- return(count)
- }
-}
-
-findSmallestFactorial(3000)
+findSmallestFactorial <- function(x){
+ if(!is.numeric(x)){
+ message("Please provide a numeric argument!")
+ }else{
+ factorialAnswer <- 0
+ count <- 0
+ while(factorialAnswer <= x){
+ count <- count+1
+ if(count == 1){
+ factorialAnswer <- 1
+ }else{
+ factorialAnswer <- factorialAnswer * count
+ }
+ }
+ return(count)
+ }
+}
+
+findSmallestFactorial(3000)
## [1] 7
-
+
## Please provide a numeric argument!
+Exercise 2 - Scripts
+Lets try to put as much together that we have learnt thus far. This
+will be a multistep challenge. Break it down and use pseudocode to help.
+Start by working the code interactively, then turn it into a script.
+
+- Read in the “data/GeneExpression.txt” dataset.
+- Use a apply to calculate the Z score for each gene (per row). The
+zscore is (gene_expression - mean)/standard deviation. You should use a
+function to do this calculation.
+- Find which gene has the highest absolute max Zscore. This is a very
+rough proxy for the variability of that gene.
+- Print out the gene name with the highest value
+- Turn this into a script and run the script
+- Think about what modifications you would need to make in order to
+accept a different data set as input.
+
+geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
+geneExpression <-as.matrix(geneExpression)
+zscores <- function(x){
+ my_mean <- mean(x)
+ my_sd <- sd(x)
+ my_z <- (x-my_mean)/my_sd
+ return(my_z)}
+
+my_zs <- apply(geneExpression,2, zscores)
+
+inds <- which(my_zs == max(my_zs), arr.ind = T)
+rownames(geneExpression)[inds[,1]]
+## [1] "Gene_h"
diff --git a/r_course/exercises/answers/Lists_answers.html b/r_course/exercises/answers/Lists_answers.html
new file mode 100644
index 0000000..86ec741
--- /dev/null
+++ b/r_course/exercises/answers/Lists_answers.html
@@ -0,0 +1,583 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lists
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lists
+Rockefeller University, Bioinformatics Resource
+Centre
+https://rockefelleruniversity.github.io/Intro_To_R_1Day/
+
+
+
+
+
+
+
+These exercises are about the Lists sections of Introduction
+to R.
+Exercise 1
+
+- Create a list containing a character vector, a numeric matrix and a
+data frame with 2 or more datatypes.
+
+firstElement <- c("A","B","C","D","E")
+secondElement <- matrix(1:5,nrow=5,ncol=5)
+thirdElement <- data.frame(Sample=c("Sample1","Sample2","Sample3","Sample4"), Age=c(25,21,24,25),factor=c("Smoker","Smoker","NonSmoker","Smoker"))
+
+my_list <- list(firstElement, secondElement, thirdElement)
+my_list
+## [[1]]
+## [1] "A" "B" "C" "D" "E"
+##
+## [[2]]
+## [,1] [,2] [,3] [,4] [,5]
+## [1,] 1 1 1 1 1
+## [2,] 2 2 2 2 2
+## [3,] 3 3 3 3 3
+## [4,] 4 4 4 4 4
+## [5,] 5 5 5 5 5
+##
+## [[3]]
+## Sample Age factor
+## 1 Sample1 25 Smoker
+## 2 Sample2 21 Smoker
+## 3 Sample3 24 NonSmoker
+## 4 Sample4 25 Smoker
+
+- Rename each entry: “my_vector”, “my_matrix”, “my_df”
+
+
+## $my_vector
+## [1] "A" "B" "C" "D" "E"
+##
+## $my_matrix
+## [,1] [,2] [,3] [,4] [,5]
+## [1,] 1 1 1 1 1
+## [2,] 2 2 2 2 2
+## [3,] 3 3 3 3 3
+## [4,] 4 4 4 4 4
+## [5,] 5 5 5 5 5
+##
+## $my_df
+## Sample Age factor
+## 1 Sample1 25 Smoker
+## 2 Sample2 21 Smoker
+## 3 Sample3 24 NonSmoker
+## 4 Sample4 25 Smoker
+
+- Access the second column of the dataframe. Try to access it in at
+least two different ways.
+
+
+## [1] 25 21 24 25
+
+## [1] 25 21 24 25
+
+- Add a list into the fourth slot of our list. This list should
+contain 2 numeric vectors. The resulting list should be 4 long.
+Check.
+
+
+## [1] 4
+
+- Access the second vector within the sublist in the 4th
+position.
+
+
+## [1] 2 3 4 5 6
+
+- Create a list with three numeric vectors: c(5,2,9), c(13,12,6),
+c(1,3,4)
+
+
+
+- Flatten the list and turn it into a matrix.
+
+flat_list <- unlist(my_list)
+listAsMat <- matrix(flat_list,
+ nrow=length(my_list),
+ ncol=3,
+ byrow=T,
+ dimnames=list(names(my_list)))
+listAsMat
+## [,1] [,2] [,3]
+## First 5 2 9
+## Second 13 12 6
+## Third 1 3 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/r_course/exercises/answers/Vectors_answers.html b/r_course/exercises/answers/Vectors_answers.html
new file mode 100644
index 0000000..8fdec69
--- /dev/null
+++ b/r_course/exercises/answers/Vectors_answers.html
@@ -0,0 +1,687 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Vectors
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Vectors
+Rockefeller University, Bioinformatics Resource
+Centre
+https://rockefelleruniversity.github.io/Intro_To_R_1Day/
+
+
+
+
+
+
+
+These exercises are about the vector sections of Introduction
+to R.
+Exercise 1
+
+- Create a vector containing the values 1,2,3,4,5
+
+
+## [1] 1 2 3 4 5
+
+- Create a vector containing the values 1 to 100. Save it as the
+variable x.
+
+
+## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
+## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
+## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
+## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
+## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
+## [91] 91 92 93 94 95 96 97 98 99 100
+
+- Multiply x by 2
+
+
+## [1] 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36
+## [19] 38 40 42 44 46 48 50 52 54 56 58 60 62 64 66 68 70 72
+## [37] 74 76 78 80 82 84 86 88 90 92 94 96 98 100 102 104 106 108
+## [55] 110 112 114 116 118 120 122 124 126 128 130 132 134 136 138 140 142 144
+## [73] 146 148 150 152 154 156 158 160 162 164 166 168 170 172 174 176 178 180
+## [91] 182 184 186 188 190 192 194 196 198 200
+
+- Create a vector containing the values 0,5,10,15,20
+
+
+## [1] 0 5 10 15 20
+
+- Check the help page for the rep function
+
+
+## Help on topic 'rep' was found in the following packages:
+##
+## Package Library
+## base /usr/local/lib/R/library
+## S4Vectors /usr/local/lib/R/host-site-library
+##
+##
+## Using the first match ...
+
+- Create a vector containing the values 1,1,2,2,3,3 using rep
+
+
+## [1] 1 1 2 2 3 3
+
+- Create a vector containing the values 1,1,5,7,9,10. Overwrite the
+variable x with this.
+
+
+## [1] 1 1 5 7 9 10
+Exercise 2
+
+- Create a vector containing the values 1 to 10.
+
+
+## [1] 1 2 3 4 5 6 7 8 9 10
+
+- Create a new vector with all but the first and last value.
+
+
+## [1] 2 3 4 5 6 7 8 9
+
+- Create a new vector with all but the second and fifth value.
+
+
+## [1] 1 3 4 6 7 8 9 10
+
+- Create a new vector of square root of the sixth and seventh
+position.
+
+
+## [1] 2.449490 2.645751
+
+- Create a new vector of alternating positions in the vector using
+another vector.
+
+
+## [1] 1 3 5 7 9
+Exercise 3
+
+- Check the help page for the paste() function.
+
+
+## Help on topic 'paste' was found in the following packages:
+##
+## Package Library
+## base /usr/local/lib/R/library
+## BiocGenerics /usr/local/lib/R/host-site-library
+##
+##
+## Using the first match ...
+
+- Combine the two characters: “A” and “B”
+
+
+## [1] "A B"
+
+- Change the separator to an underscore
+
+
+## [1] "A_B"
+Exercise 4
+
+- Create a vector with these gene names: “PKM”, “ADPRH”, “TDG”,
+“ATP4A”, “SLC6A4”, “CAPN3”, “TDG”, “ATP1A2”,“PKM”
+- Subset to just the unique genes
+
+my_genes <- c("PKM", "ADPRH", "TDG", "ATP4A", "SLC6A4", "CAPN3", "TDG", "ATP1A2","PKM")
+my_genes <- unique(my_genes)
+my_genes
+## [1] "PKM" "ADPRH" "TDG" "ATP4A" "SLC6A4" "CAPN3" "ATP1A2"
+
+- Create a second vector with these gene names: “SLC6A4”, “CAPN3”,
+“TDG”, “ATP1A2”, “IMPA1”, “PDXK”.
+- Check which genes from vector 1 are present in vector 2.
+- Subset vector 1 depending on whether the gene is present in vector
+2.
+
+my_genes_of_interest <- c("SLC6A4", "CAPN3", "TDG", "ATP1A2", "IMPA1", "PDXK")
+idx <- my_genes %in% my_genes_of_interest
+idx
+## [1] FALSE FALSE TRUE FALSE TRUE TRUE TRUE
+
+## [1] "TDG" "SLC6A4" "CAPN3" "ATP1A2"
+my_genes_of_interest <- c("SLC6A4", "CAPN3", "TDG", "ATP1A2", "IMPA1", "PDXK")
+idx <- my_genes %in% my_genes_of_interest
+idx
+## [1] FALSE FALSE TRUE FALSE TRUE TRUE TRUE
+
+## [1] "TDG" "SLC6A4" "CAPN3" "ATP1A2"
+
+- Create a vector with these gene names: “SMC1”, “SMC3”, “SCC1”,
+“SCC3”, “RAD21”, “NIPBL”, “SMC2”, “SMC4”,“CAPH”,“CAPD3”
+- Subset to just the genes containing SMC
+
+my_genes <- c("SMC1", "SMC3", "SCC1", "SCC3", "RAD21", "NIPBL", "SMC2", "SMC4","CAPH","CAPD3")
+my_genes[grepl("SMC",my_genes)]
+## [1] "SMC1" "SMC3" "SMC2" "SMC4"
+Exercise 5
+
+- Create a vector of the gene names Gene_1, Gene_2, Gene_3 Gene_4
+- Create a vector of the expression values 1000, 3000, 10000,
+12000
+- Create a vector of the gene lengths 100, 3000, 200, 1000
+
+geneNames <- c("Gene_1", "Gene_2", "Gene_3","Gene_4")
+expression <- c(1000, 3000, 10000, 12000)
+geneLengths <- c(100, 3000, 200, 1000)
+names(expression) <- geneNames
+names(geneLengths) <- geneNames
+expression
+## Gene_1 Gene_2 Gene_3 Gene_4
+## 1000 3000 10000 12000
+
+## Gene_1 Gene_2 Gene_3 Gene_4
+## 100 3000 200 1000
+
+- Find the longest gene.
+
+
+## [1] "Gene_2"
+
+## [1] "Gene_2"
+
+- Identify genes which have a length greater than 100 and expression
+greater than 10000
+
+
+## [1] "Gene_4"
+
+
+- Calculate the expression over the gene length for all genes (Length
+normalised expression).
+
+
+## Gene_1 Gene_2 Gene_3 Gene_4
+## 10 1 50 12
+
+- Identify genes with a length normalised expression greater than the
+average
+
+
+## [1] "Gene_3"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/r_course/exercises/answers/conditionsAndLoops_answers.html b/r_course/exercises/answers/conditionsAndLoops_answers.html
index 2b37ffd..cc7526c 100644
--- a/r_course/exercises/answers/conditionsAndLoops_answers.html
+++ b/r_course/exercises/answers/conditionsAndLoops_answers.html
@@ -439,65 +439,166 @@ Introduction
to R.
-
– Calculate the factorial (factorial of 3 = 3 * 2 * 1) of 10 using a
-loop.
-for(x in 1:10){
- if(x == 1){
- factorialAnswer <- 1
- }else{
- factorialAnswer <- factorialAnswer * x
- }
-}
-factorialAnswer
-## [1] 3628800
-– Adjusting your answer from before, what is the first number that
-has a factorial greater than 1000.
-factorialAnswer <- 0
-count <- 0
-
-while(factorialAnswer <= 1000){
- count <- count+1
- if(count == 1){
- factorialAnswer <- 1
- }else{
- factorialAnswer <- factorialAnswer * count
- }
-}
-count
-## [1] 7
+Exercise 1 - If Else
+
+- Build an if statement that test if x is a negative number. Write a
+print statement that will confirm if x is less than 0. Test it with
+several values of x.
+
+x<- 42
+
+if (x<0){
+ print("It's a negative number!")
+}
+
+x <- -42
+
+if (x<0){
+ print("It's a negative number!")
+}
+## [1] "It's a negative number!"
+
+- Modify the previous if statement to include an else. Ensure a print
+statement is returned to say if the number is not negative.
+
+x <- 0
+
+if (x<0){
+ print("It's a negative number!")
+}else{
+ print("It's not a negative number!")
+}
+## [1] "It's not a negative number!"
+x <- -1
+
+if (x<0){
+ print("It's a negative number!")
+}else{
+ print("It's not a negative number!")
+}
+## [1] "It's a negative number!"
+
+- Finally add a else if statement. We want a response to confirm if x
+is negative, positive or if it is zero.
+
+x <- -1
+
+if (x<0){
+ print("It's a negative number!")
+}else if (x==0){
+ print("It's zero")
+}else{
+ print("It's a positive number!")
+}
+## [1] "It's a negative number!"
+x <- 0
+
+if (x<0){
+ print("It's a negative number!")
+}else if (x==0){
+ print("It's zero")
+}else{
+ print("It's a positive number!")
+}
+## [1] "It's zero"
+x <- 1
+
+if (x<0){
+ print("It's a negative number!")
+}else if (x==0){
+ print("It's zero")
+}else{
+ print("It's a positive number!")
+}
+## [1] "It's a positive number!"
+
+- Build an if/else statement that test if a variable is odd/even.
+Include x in the printed output.
+
+Hint: The modulus operator may be useful here i.e. x%%2 returns
+the remainder after the value of x is divided by 2.
+
+## [1] "1 is odd"
+
+## [1] "2 is even"
– Using an ifelse() expression, create a factor from a vector of 1 to
40 where all numbers less than 10 are “small”,10 to 30 are “mid”,31 to
40 are “big”
-
+
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
-vectorResult <- ifelse(condExercise<10,"small",ifelse(condExercise < 31,"mid","big"))
-temp <- factor(vectorResult,levels=c("small","mid","big"),order=T)
-temp
+vectorResult <- ifelse(condExercise<10,"small",ifelse(condExercise < 31,"mid","big"))
+temp <- factor(vectorResult,levels=c("small","mid","big"),order=T)
+temp
## [1] small small small small small small small small small mid mid mid
## [13] mid mid mid mid mid mid mid mid mid mid mid mid
## [25] mid mid mid mid mid mid big big big big big big
## [37] big big big big
## Levels: small < mid < big
-– Read in all files from expression directory with .txt extension and
-create a table of gene expression results.
-filesToRead <- dir("ExpressionResults/",pattern = "*\\.txt",full.names=T)
-fileRead <- vector("list",length=length(filesToRead))
-for(i in 1:length(filesToRead)){
- fileRead[[i]] <- read.delim(filesToRead[i],header=F,sep="\t")
- colnames(fileRead[[i]]) <- c("GeneNames",basename(filesToRead[i]))
-}
-mergedTable <- NULL
-for(i in fileRead){
- if(is.null(mergedTable)){
- mergedTable <- i
- }else{
- mergedTable <- merge(mergedTable,i,by=1,all=T)
- }
-
- print(nrow(mergedTable))
-}
+– Calculate the factorial (factorial of 3 = 3 * 2 * 1) of 10 using a
+loop.
+for(x in 1:10){
+ if(x == 1){
+ factorialAnswer <- 1
+ }else{
+ factorialAnswer <- factorialAnswer * x
+ }
+}
+factorialAnswer
+## [1] 3628800
+– Adjusting your answer from before, what is the first number that
+has a factorial greater than 1000.
+factorialAnswer <- 0
+count <- 0
+
+while(factorialAnswer <= 1000){
+ count <- count+1
+ if(count == 1){
+ factorialAnswer <- 1
+ }else{
+ factorialAnswer <- factorialAnswer * count
+ }
+}
+count
+## [1] 7
+
+- Set your working directory to be in the downloaded course material.
+Specifically the r_course subdirectory. [The exact path will
+depend on where you have save your download]. Read in all files from
+expression directory (“ExpressionResults/”) with .txt extension and
+create a table of gene expression results.
+
+
+filesToRead <- dir("ExpressionResults/",pattern = "*\\.txt",full.names=T)
+fileRead <- vector("list",length=length(filesToRead))
+
+for(i in 1:length(filesToRead)){
+ fileRead[[i]] <- read.delim(filesToRead[i],header=F,sep="\t")
+ colnames(fileRead[[i]]) <- c("GeneNames",basename(filesToRead[i]))
+}
+mergedTable <- NULL
+for(i in fileRead){
+ if(is.null(mergedTable)){
+ mergedTable <- i
+ }else{
+ mergedTable <- merge(mergedTable,i,by=1,all=T)
+ }
+
+ print(nrow(mergedTable))
+}
## [1] 5001
## [1] 5001
## [1] 5001
@@ -509,7 +610,7 @@ mergedTable[1:3,]
+
## GeneNames Annotation.txt NA ExpressionResults_Sample1.txt
## 1 GeneName Ensembl Pathway NA
## 2 Gene_1 Ens_1001 DNA_Binding 3.448466
@@ -536,9 +637,9 @@ Annotation <- read.table("ExpressionResults/Annotation.txt",sep="\t",h=T)
-annotatedExpression <- merge(Annotation,mergedTable,by=1,all.x=F,all.y=T)
-annotatedExpression[1:2,]
+Annotation <- read.table("ExpressionResults/Annotation.txt",sep="\t",h=T)
+annotatedExpression <- merge(Annotation,mergedTable,by=1,all.x=F,all.y=T)
+annotatedExpression[1:2,]
## GeneName Ensembl Pathway Annotation.txt NA
## 1 GeneName <NA> <NA> Ensembl Pathway
## 2 Gene_1 Ens_1001 DNA_Binding Ens_1001 DNA_Binding
@@ -557,10 +658,10 @@ summary(annotatedExpression$Pathway)
+
## Length Class Mode
## 5001 character character
-
+
## Length Class Mode
## 5000 character character
diff --git a/r_course/exercises/answers/factorsAndDataframes_answers.html b/r_course/exercises/answers/factorsAndDataframes_answers.html
index fd2f156..3e8db17 100644
--- a/r_course/exercises/answers/factorsAndDataframes_answers.html
+++ b/r_course/exercises/answers/factorsAndDataframes_answers.html
@@ -437,33 +437,80 @@ Introduction
+These exercises are about the factors and data frames sections of Introduction
to R.
-These exercises cover the factors and data frames sections of Introduction to R.
Exercise 1 - Factors
+- Create a nominal factor called CellType containing:
+“DC1”,“DC1”,“DC1”,“NK”,“NK”,“Mono”,“Mono”,“DC2”,“NK”
+
+
+## [1] DC1 DC1 DC1 NK NK Mono Mono DC2 NK
+## Levels: DC1 DC2 Mono NK
+
+- Modify the the third position of CellType to “Neu”, by modifying the
+levels of the factor.
+
+
+## [1] DC1 DC1 Neu NK NK Mono Mono DC2 NK
+## Levels: DC1 DC2 Mono NK Neu
+
+- Create CellType2 with the same entries, but directly specify the
+levels to include: “DC1”, “DC2”, “Mono”, “NK”, “Neu”, “Bcell”,
+“Tcell”.
+
+CellType2 <- factor(c("DC1","DC1","DC1","NK","NK","Mono","Mono","DC2","NK"), levels = c("DC1", "DC2", "Mono", "NK", "Neu", "Bcell", "Tcell"))
+CellType2
+## [1] DC1 DC1 DC1 NK NK Mono Mono DC2 NK
+## Levels: DC1 DC2 Mono NK Neu Bcell Tcell
+
+- Use combine to increase the length of CellType2 to include:
+“Neu”,“Neu”,“Bcell”,“DC1”
+
+CellType2 <- c(CellType2, factor(c("Neu","Neu","Bcell","DC1"), levels = c("DC1", "DC2", "Mono", "NK", "Neu", "Bcell", "Tcell")))
+CellType2
+## [1] DC1 DC1 DC1 NK NK Mono Mono DC2 NK Neu Neu Bcell
+## [13] DC1
+## Levels: DC1 DC2 Mono NK Neu Bcell Tcell
+
+- Summarize the number of entries for each cell type.
+
+
+## DC1 DC2 Mono NK Neu Bcell Tcell
+## 4 1 2 3 2 1 0
+
+- Reorder the summary to alphabetical order
+
+
+## Bcell DC1 DC2 Mono Neu NK Tcell
+## 4 1 2 3 2 1 0
+
- Create a ordinal factor named “Height” containing – high, low, mid,
low, mid, low, mid, high, mid, high.
-ordinalFactor <- factor(c("high", "low", "mid", "low", "mid", "low", "mid", "high", "mid", "high"),ordered=T,levels=c("low", "mid", "high"))
-ordinalFactor
+Height <- factor(c("high", "low", "mid", "low", "mid", "low", "mid", "high", "mid", "high"),ordered=T,levels=c("low", "mid", "high"))
+Height
## [1] high low mid low mid low mid high mid high
## Levels: low < mid < high
- Using a logical index, create new factor of only those from
“Height”” greater than low.
-
+
## [1] high mid mid mid high mid high
## Levels: low < mid < high
- Replace the last index in “Height” with veryHigh and create new
factor with those greater than mid.
-newFactor <- factor(ordinalFactor,ordered=T,levels=c("low", "mid", "high","veryHigh"))
-newFactor[length(newFactor)] <- "veryHigh"
-newFactor[newFactor > "mid"]
+newFactor <- factor(Height,ordered=T,levels=c("low", "mid", "high","veryHigh"))
+newFactor[length(newFactor)] <- "veryHigh"
+newFactor[newFactor > "mid"]
## [1] high high veryHigh
## Levels: low < mid < high < veryHigh
Exercise 2 - Data frames
@@ -474,69 +521,129 @@ Annotation <- data.frame(geneNames=c("Gene_1", "Gene_2", "Gene_3","Gene_4","Gene_5"), ensembl=c("Ens001", "Ens003", "Ens006", "Ens007", "Ens010"),pathway=c("Glycolysis", "TGFb", "Glycolysis", "TGFb", "Glycolysis"),geneLengths=c(100, 3000, 200, 1000,1200))
+Annotation <- data.frame(geneNames=c("Gene_1", "Gene_2", "Gene_3","Gene_4","Gene_5"), ensembl=c("Ens001", "Ens003", "Ens006", "Ens007", "Ens010"),pathway=c("Glycolysis", "TGFb", "Glycolysis", "TGFb", "Glycolysis"),geneLengths=c(100, 3000, 200, 1000,1200))
+Annotation
+## geneNames ensembl pathway geneLengths
+## 1 Gene_1 Ens001 Glycolysis 100
+## 2 Gene_2 Ens003 TGFb 3000
+## 3 Gene_3 Ens006 Glycolysis 200
+## 4 Gene_4 Ens007 TGFb 1000
+## 5 Gene_5 Ens010 Glycolysis 1200
+
+- Filter Annotation to geneLengths that are greater than 500 and less
+than 2000. Use the dollar sign to extract column information.
+
+
+## geneNames ensembl pathway geneLengths
+## 4 Gene_4 Ens007 TGFb 1000
+## 5 Gene_5 Ens010 Glycolysis 1200
+
+- Check the data types of each column. Update the pathway column to be
+a factor.
+
+
+## [1] "character"
+
+## [1] "character"
+
+## [1] "character"
+
+## [1] "numeric"
+
+## geneNames ensembl pathway geneLengths
+## 1 Gene_1 Ens001 Glycolysis 100
+## 2 Gene_2 Ens003 TGFb 3000
+## 3 Gene_3 Ens006 Glycolysis 200
+## 4 Gene_4 Ens007 TGFb 1000
+## 5 Gene_5 Ens010 Glycolysis 1200
+
+## [1] "factor"
- Create data frame called Sample1 with ensembl gene names (“Ens001”,
“Ens003”, “Ens006”, “Ens010”) and expression (1000, 3000,
10000,5000)
-Sample1 <- data.frame(ensembl=c("Ens001", "Ens003", "Ens006","Ens010"),expression=c(1000, 3000, 10000,5000))
+Sample1 <- data.frame(ensembl=c("Ens001", "Ens003", "Ens006","Ens010"),expression=c(1000, 3000, 10000,5000))
+Sample1
+## ensembl expression
+## 1 Ens001 1000
+## 2 Ens003 3000
+## 3 Ens006 10000
+## 4 Ens010 5000
- Create data frame called Sample2 with ensembl gene names (“Ens001”,
“Ens003”, “Ens006”, “Ens007”,“Ens010”) and expression (1500, 1500,
17000,500,10000)
-Sample2 <- data.frame(ensembl=c("Ens001", "Ens003", "Ens006","Ens007","Ens010"),expression=c(1500, 1500, 17000,500,10000))
+Sample2 <- data.frame(ensembl=c("Ens001", "Ens003", "Ens006","Ens007","Ens010"),expression=c(1500, 1500, 17000,500,10000))
+Sample2
+## ensembl expression
+## 1 Ens001 1500
+## 2 Ens003 1500
+## 3 Ens006 17000
+## 4 Ens007 500
+## 5 Ens010 10000
- Create a data frame containing only those gene names common to all
data frames with all information from Annotation and the expression from
Sample 1 and Sample 2.
-AnnoSample1 <- merge(Annotation,Sample1,by.x=2,by.y=1,all=F)
-AnnoSample1And2 <- merge(AnnoSample1,Sample2,by=1,all=F)
-AnnoSample1And2
+AnnoSample1 <- merge(Annotation,Sample1,by.x=2,by.y=1,all=F)
+AnnoSample1And2 <- merge(AnnoSample1,Sample2,by=1,all=F)
+AnnoSample1And2
## ensembl geneNames pathway geneLengths expression.x expression.y
## 1 Ens001 Gene_1 Glycolysis 100 1000 1500
## 2 Ens003 Gene_2 TGFb 3000 3000 1500
## 3 Ens006 Gene_3 Glycolysis 200 10000 17000
## 4 Ens010 Gene_5 Glycolysis 1200 5000 10000
-- Add an extra two columns containing the length normalised
-expressions for Sample 1 and Sample 2
+- Order our new dataframe by geneLengths - biggest to smallest.
-AnnoSample1And2$Sample1_lne <- AnnoSample1And2$expression.x/AnnoSample1And2$geneLengths
-AnnoSample1And2$Sample2_lne <- AnnoSample1And2$expression.y/AnnoSample1And2$geneLengths
-AnnoSample1And2
+AnnoSample1And2 <- AnnoSample1And2[order(AnnoSample1And2$geneLengths, decreasing = T),]
+AnnoSample1And2
## ensembl geneNames pathway geneLengths expression.x expression.y
-## 1 Ens001 Gene_1 Glycolysis 100 1000 1500
## 2 Ens003 Gene_2 TGFb 3000 3000 1500
+## 4 Ens010 Gene_5 Glycolysis 1200 5000 10000
## 3 Ens006 Gene_3 Glycolysis 200 10000 17000
+## 1 Ens001 Gene_1 Glycolysis 100 1000 1500
+
+- Add an extra two columns containing the length normalized
+expressions for Sample 1 and Sample 2
+
+AnnoSample1And2$Sample1_lne <- AnnoSample1And2$expression.x/AnnoSample1And2$geneLengths
+AnnoSample1And2$Sample2_lne <- AnnoSample1And2$expression.y/AnnoSample1And2$geneLengths
+AnnoSample1And2
+## ensembl geneNames pathway geneLengths expression.x expression.y
+## 2 Ens003 Gene_2 TGFb 3000 3000 1500
## 4 Ens010 Gene_5 Glycolysis 1200 5000 10000
+## 3 Ens006 Gene_3 Glycolysis 200 10000 17000
+## 1 Ens001 Gene_1 Glycolysis 100 1000 1500
## Sample1_lne Sample2_lne
-## 1 10.000000 15.000000
## 2 1.000000 0.500000
+## 4 4.166667 8.333333
## 3 50.000000 85.000000
-## 4 4.166667 8.333333
+## 1 10.000000 15.000000
-- Identify the mean length normalised expression across Sample1 and
+
- Identify the mean length normalized expression across Sample1 and
Sample2 for Ens006 genes
-rownames(AnnoSample1And2) <- AnnoSample1And2$ensembl
-mean(c(AnnoSample1And2["Ens006","Sample1_lne"],AnnoSample1And2["Ens006","Sample2_lne"]))
+rownames(AnnoSample1And2) <- AnnoSample1And2$ensembl
+mean(c(AnnoSample1And2["Ens006","Sample1_lne"],AnnoSample1And2["Ens006","Sample2_lne"]))
## [1] 67.5
-- For all genes, identify the log2 fold change in length normalised
+
- For all genes, identify the log2 fold change in length normalized
expression from Sample 1 to Sample 2.
-log2FoldChange <- log2(AnnoSample1And2$Sample2_lne) - log2(AnnoSample1And2$Sample1_lne)
-names(log2FoldChange) <- AnnoSample1And2$geneNames
-log2FoldChange
-## Gene_1 Gene_2 Gene_3 Gene_5
-## 0.5849625 -1.0000000 0.7655347 1.0000000
+log2FoldChange <- log2(AnnoSample1And2$Sample2_lne) - log2(AnnoSample1And2$Sample1_lne)
+names(log2FoldChange) <- AnnoSample1And2$geneNames
+log2FoldChange
+## Gene_2 Gene_5 Gene_3 Gene_1
+## -1.0000000 1.0000000 0.7655347 0.5849625
- Identify the total length of genes in Glycolysis pathway.
-
+
## [1] 1500
diff --git a/r_course/exercises/exercises/DataInputOutput_exercise.html b/r_course/exercises/exercises/DataInputOutput_exercise.html
index 728060b..5b0b1f5 100644
--- a/r_course/exercises/exercises/DataInputOutput_exercise.html
+++ b/r_course/exercises/exercises/DataInputOutput_exercise.html
@@ -226,6 +226,106 @@
+
+
+
@@ -337,22 +437,68 @@ Introduction
+These exercises are about reading and writing data sections of Introduction
to R.
Exercise 1
Have a look at all files in notepad or excel before attempting to
read. All files can be found in the “data” directory.
-Read in the tab delimited file “GeneExpression.txt”.
-Find the mean expression of all samples.
+- Check your current working directory. Set your working directory to
+be in the downloaded course material. Specifically the r_course
+subdirectory. [The exact path will depend on where you have save your
+download]
+
+
+[1] "/Users/mattpaul"
+
+
+- Read in the tab delimited file “GeneExpression.txt”. Check the data
+type.
-## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low Sample_1.low
-## 7.514996 6.774108 6.508127 6.262253 6.177761 6.017462
-## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low Sample_1.low
-## 7.514996 6.774108 6.508127 6.262253 6.177761 6.017462
+## [1] "data.frame" "list" "oldClass" "vector"
+## [5] "list_OR_List" "vector_OR_factor" "vector_OR_Vector"
-- Read in the tab delimited file “GeneExpressionWithMethods.txt”. This
-file contains information on analysis steps used to produce file.
+Coerce the data frame to a matrix
+Find the mean expression of all genes.
+
+## Gene_a Gene_b Gene_c Gene_d Gene_e Gene_f Gene_g Gene_h
+## 4.660569 4.379796 4.259824 5.849420 5.850658 6.732781 10.405203 10.201357
+
+- Coerce the matrix back to a data frame. Add an additional column
+with extra gene info
+“Kidney”,“Adrenal”,“Liver”,“Adrenal”,“Kidney”,“Liver”,“Liver”,“Kidney”.
+Also add the mean expression as a column.
+
+geneExpression <- as.data.frame(geneExpression)
+geneExpression$tissue_localisation <- factor(c("Kidney","Adrenal","Liver","Adrenal","Kidney","Liver","Liver","Kidney"))
+geneExpression$mean_expr <- sampleMeans
+geneExpression
+## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
+## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
+## Gene_b 6.444368 5.896076 2.592581 5.089549 3.624812
+## Gene_c 3.083392 3.414723 3.706069 4.535536 5.104273
+## Gene_d 4.726498 3.023746 3.033173 8.017895 8.098800
+## Gene_e 9.909185 9.174323 9.957153 2.053501 3.276533
+## Gene_f 10.680459 9.951243 8.985412 3.360963 3.566663
+## Gene_g 10.516534 10.176163 9.778173 11.781520 9.005437
+## Gene_h 9.017020 9.342291 9.895636 12.046704 11.003240
+## Sample_1.low tissue_localisation mean_expr
+## Gene_a 5.9350948 Kidney 4.660569
+## Gene_b 2.6313925 Adrenal 4.379796
+## Gene_c 5.7149521 Liver 4.259824
+## Gene_d 8.1964109 Adrenal 5.849420
+## Gene_e 0.7332521 Kidney 5.850658
+## Gene_f 3.8519471 Liver 6.732781
+## Gene_g 11.1733928 Liver 10.405203
+## Gene_h 9.9032500 Kidney 10.201357
+
+Read in tab delimited file “GeneExpression.txt”. Order genes by
+decreasing expression in mean expression. Filter out genes with
+expression below 5. Write out a new comma separated file with column
+titles.
+Read in the tab delimited file “GeneExpressionWithMethods.txt”.
+This file contains information on analysis steps used to produce
+file.
## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
@@ -394,11 +540,6 @@ Introduction
to R.
+
Exercise 1 - Functions
– Create a function which takes one number and returns the square of
that number
## [1] 9
– Create a function which takes two numbers and returns the mean
## [1] 2
-- Create a function which takes two vectors and returns the mean
+- Create a function which takes two vectors and returns the mean.
+Include a message statement that gives the total length of both
+vectors.
-## [1] 2
+## The total length of my vectors is 8
+## [1] 4.25
- Create a function which takes two numbers and returns the two
numbers as a vector and the mean, summary and multiple as a
@@ -367,6 +371,23 @@
+- Read in the “data/GeneExpression.txt” dataset.
+- Use a apply to calculate the Z score for each gene (per row). The
+zscore is (gene_expression - mean)/standard deviation. You should use a
+function to do this calculation.
+- Find which gene has the highest absolute max Zscore. This is a very
+rough proxy for the variability of that gene.
+- Print out the gene name with the highest value
+- Turn this into a script and run the script
+- Think about what modifications you would need to make in order to
+accept a different data set as input.
+
+## [1] "Gene_h"
diff --git a/r_course/exercises/exercises/Lists_exercise.html b/r_course/exercises/exercises/Lists_exercise.html
new file mode 100644
index 0000000..86ec741
--- /dev/null
+++ b/r_course/exercises/exercises/Lists_exercise.html
@@ -0,0 +1,583 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lists
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lists
+Rockefeller University, Bioinformatics Resource
+Centre
+https://rockefelleruniversity.github.io/Intro_To_R_1Day/
+
+
+
+
+
+
+
+These exercises are about the Lists sections of Introduction
+to R.
+Exercise 1
+
+- Create a list containing a character vector, a numeric matrix and a
+data frame with 2 or more datatypes.
+
+firstElement <- c("A","B","C","D","E")
+secondElement <- matrix(1:5,nrow=5,ncol=5)
+thirdElement <- data.frame(Sample=c("Sample1","Sample2","Sample3","Sample4"), Age=c(25,21,24,25),factor=c("Smoker","Smoker","NonSmoker","Smoker"))
+
+my_list <- list(firstElement, secondElement, thirdElement)
+my_list
+## [[1]]
+## [1] "A" "B" "C" "D" "E"
+##
+## [[2]]
+## [,1] [,2] [,3] [,4] [,5]
+## [1,] 1 1 1 1 1
+## [2,] 2 2 2 2 2
+## [3,] 3 3 3 3 3
+## [4,] 4 4 4 4 4
+## [5,] 5 5 5 5 5
+##
+## [[3]]
+## Sample Age factor
+## 1 Sample1 25 Smoker
+## 2 Sample2 21 Smoker
+## 3 Sample3 24 NonSmoker
+## 4 Sample4 25 Smoker
+
+- Rename each entry: “my_vector”, “my_matrix”, “my_df”
+
+
+## $my_vector
+## [1] "A" "B" "C" "D" "E"
+##
+## $my_matrix
+## [,1] [,2] [,3] [,4] [,5]
+## [1,] 1 1 1 1 1
+## [2,] 2 2 2 2 2
+## [3,] 3 3 3 3 3
+## [4,] 4 4 4 4 4
+## [5,] 5 5 5 5 5
+##
+## $my_df
+## Sample Age factor
+## 1 Sample1 25 Smoker
+## 2 Sample2 21 Smoker
+## 3 Sample3 24 NonSmoker
+## 4 Sample4 25 Smoker
+
+- Access the second column of the dataframe. Try to access it in at
+least two different ways.
+
+
+## [1] 25 21 24 25
+
+## [1] 25 21 24 25
+
+- Add a list into the fourth slot of our list. This list should
+contain 2 numeric vectors. The resulting list should be 4 long.
+Check.
+
+
+## [1] 4
+
+- Access the second vector within the sublist in the 4th
+position.
+
+
+## [1] 2 3 4 5 6
+
+- Create a list with three numeric vectors: c(5,2,9), c(13,12,6),
+c(1,3,4)
+
+
+
+- Flatten the list and turn it into a matrix.
+
+flat_list <- unlist(my_list)
+listAsMat <- matrix(flat_list,
+ nrow=length(my_list),
+ ncol=3,
+ byrow=T,
+ dimnames=list(names(my_list)))
+listAsMat
+## [,1] [,2] [,3]
+## First 5 2 9
+## Second 13 12 6
+## Third 1 3 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/r_course/exercises/exercises/Vectors_exercise.html b/r_course/exercises/exercises/Vectors_exercise.html
new file mode 100644
index 0000000..9f0e0a7
--- /dev/null
+++ b/r_course/exercises/exercises/Vectors_exercise.html
@@ -0,0 +1,651 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Vectors
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Vectors
+Rockefeller University, Bioinformatics Resource
+Centre
+https://rockefelleruniversity.github.io/Intro_To_R_1Day/
+
+
+
+
+
+
+
+These exercises are about the vector sections of Introduction
+to R.
+Exercise 1
+
+- Create a vector containing the values 1,2,3,4,5
+
+## [1] 1 2 3 4 5
+
+- Create a vector containing the values 1 to 100. Save it as the
+variable x.
+
+## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
+## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
+## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
+## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
+## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
+## [91] 91 92 93 94 95 96 97 98 99 100
+
+- Multiply x by 2
+
+## [1] 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36
+## [19] 38 40 42 44 46 48 50 52 54 56 58 60 62 64 66 68 70 72
+## [37] 74 76 78 80 82 84 86 88 90 92 94 96 98 100 102 104 106 108
+## [55] 110 112 114 116 118 120 122 124 126 128 130 132 134 136 138 140 142 144
+## [73] 146 148 150 152 154 156 158 160 162 164 166 168 170 172 174 176 178 180
+## [91] 182 184 186 188 190 192 194 196 198 200
+
+- Create a vector containing the values 0,5,10,15,20
+
+## [1] 0 5 10 15 20
+
+- Check the help page for the rep function
+
+
+## Help on topic 'rep' was found in the following packages:
+##
+## Package Library
+## base /usr/local/lib/R/library
+## S4Vectors /usr/local/lib/R/host-site-library
+##
+##
+## Using the first match ...
+
+- Create a vector containing the values 1,1,2,2,3,3 using rep
+
+## [1] 1 1 2 2 3 3
+
+- Create a vector containing the values 1,1,5,7,9,10. Overwrite the
+variable x with this.
+
+## [1] 1 1 5 7 9 10
+Exercise 2
+
+- Create a vector containing the values 1 to 10.
+
+## [1] 1 2 3 4 5 6 7 8 9 10
+
+- Create a new vector with all but the first and last value.
+
+## [1] 2 3 4 5 6 7 8 9
+
+- Create a new vector with all but the second and fifth value.
+
+## [1] 1 3 4 6 7 8 9 10
+
+- Create a new vector of square root of the sixth and seventh
+position.
+
+## [1] 2.449490 2.645751
+
+- Create a new vector of alternating positions in the vector using
+another vector.
+
+## [1] 1 3 5 7 9
+Exercise 3
+
+- Check the help page for the paste() function.
+
+## Help on topic 'paste' was found in the following packages:
+##
+## Package Library
+## base /usr/local/lib/R/library
+## BiocGenerics /usr/local/lib/R/host-site-library
+##
+##
+## Using the first match ...
+
+- Combine the two characters: “A” and “B”
+
+## [1] "A B"
+
+- Change the separator to an underscore
+
+## [1] "A_B"
+Exercise 4
+
+- Create a vector with these gene names: “PKM”, “ADPRH”, “TDG”,
+“ATP4A”, “SLC6A4”, “CAPN3”, “TDG”, “ATP1A2”,“PKM”
+- Subset to just the unique genes
+
+my_genes <- c("PKM", "ADPRH", "TDG", "ATP4A", "SLC6A4", "CAPN3", "TDG", "ATP1A2","PKM")
+my_genes <- unique(my_genes)
+my_genes
+## [1] "PKM" "ADPRH" "TDG" "ATP4A" "SLC6A4" "CAPN3" "ATP1A2"
+
+- Create a second vector with these gene names: “SLC6A4”, “CAPN3”,
+“TDG”, “ATP1A2”, “IMPA1”, “PDXK”.
+- Check which genes from vector 1 are present in vector 2.
+- Subset vector 1 depending on whether the gene is present in vector
+2.
+
+my_genes_of_interest <- c("SLC6A4", "CAPN3", "TDG", "ATP1A2", "IMPA1", "PDXK")
+idx <- my_genes %in% my_genes_of_interest
+idx
+## [1] FALSE FALSE TRUE FALSE TRUE TRUE TRUE
+
+## [1] "TDG" "SLC6A4" "CAPN3" "ATP1A2"
+my_genes_of_interest <- c("SLC6A4", "CAPN3", "TDG", "ATP1A2", "IMPA1", "PDXK")
+idx <- my_genes %in% my_genes_of_interest
+idx
+## [1] FALSE FALSE TRUE FALSE TRUE TRUE TRUE
+
+## [1] "TDG" "SLC6A4" "CAPN3" "ATP1A2"
+
+- Create a vector with these gene names: “SMC1”, “SMC3”, “SCC1”,
+“SCC3”, “RAD21”, “NIPBL”, “SMC2”, “SMC4”,“CAPH”,“CAPD3”
+- Subset to just the genes containing SMC
+
+my_genes <- c("SMC1", "SMC3", "SCC1", "SCC3", "RAD21", "NIPBL", "SMC2", "SMC4","CAPH","CAPD3")
+my_genes[grepl("SMC",my_genes)]
+## [1] "SMC1" "SMC3" "SMC2" "SMC4"
+Exercise 5
+
+- Create a vector of the gene names Gene_1, Gene_2, Gene_3 Gene_4
+- Create a vector of the expression values 1000, 3000, 10000,
+12000
+- Create a vector of the gene lengths 100, 3000, 200, 1000
+
+## Gene_1 Gene_2 Gene_3 Gene_4
+## 1000 3000 10000 12000
+## Gene_1 Gene_2 Gene_3 Gene_4
+## 100 3000 200 1000
+
+- Find the longest gene.
+
+## [1] "Gene_2"
+## [1] "Gene_2"
+
+- Identify genes which have a length greater than 100 and expression
+greater than 10000
+
+## [1] "Gene_4"
+
+
+- Calculate the expression over the gene length for all genes (Length
+normalised expression).
+
+## Gene_1 Gene_2 Gene_3 Gene_4
+## 10 1 50 12
+
+- Identify genes with a length normalised expression greater than the
+average
+
+## [1] "Gene_3"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/r_course/exercises/exercises/conditionsAndLoops_exercise.html b/r_course/exercises/exercises/conditionsAndLoops_exercise.html
index 728e753..eddba83 100644
--- a/r_course/exercises/exercises/conditionsAndLoops_exercise.html
+++ b/r_course/exercises/exercises/conditionsAndLoops_exercise.html
@@ -339,12 +339,34 @@ Introduction
to R.
-
– Calculate the factorial (factorial of 3 = 3 * 2 * 1) of 10 using a
-loop.
-## [1] 3628800
-– Adjusting your answer from before, what is the first number that
-has a factorial greater than 1000.
-## [1] 7
+Exercise 1 - If Else
+
+- Build an if statement that test if x is a negative number. Write a
+print statement that will confirm if x is less than 0. Test it with
+several values of x.
+
+## [1] "It's a negative number!"
+
+- Modify the previous if statement to include an else. Ensure a print
+statement is returned to say if the number is not negative.
+
+## [1] "It's not a negative number!"
+## [1] "It's a negative number!"
+
+- Finally add a else if statement. We want a response to confirm if x
+is negative, positive or if it is zero.
+
+## [1] "It's a negative number!"
+## [1] "It's zero"
+## [1] "It's a positive number!"
+
+- Build an if/else statement that test if a variable is odd/even.
+Include x in the printed output.
+
+Hint: The modulus operator may be useful here i.e. x%%2 returns
+the remainder after the value of x is divided by 2.
+## [1] "1 is odd"
+## [1] "2 is even"
– Using an ifelse() expression, create a factor from a vector of 1 to
40 where all numbers less than 10 are “small”,10 to 30 are “mid”,31 to
40 are “big”
@@ -355,8 +377,19 @@
+code {
+white-space: pre;
+}
+.sourceCode {
+overflow: visible;
+}
+
+
+
@@ -337,11 +437,49 @@ Introduction
+These exercises are about the factors and data frames sections of Introduction
to R.
-These exercises cover the factors and data frames sections of Introduction to R.
Exercise 1 - Factors
+- Create a nominal factor called CellType containing:
+“DC1”,“DC1”,“DC1”,“NK”,“NK”,“Mono”,“Mono”,“DC2”,“NK”
+
+## [1] DC1 DC1 DC1 NK NK Mono Mono DC2 NK
+## Levels: DC1 DC2 Mono NK
+
+- Modify the the third position of CellType to “Neu”, by modifying the
+levels of the factor.
+
+## [1] DC1 DC1 Neu NK NK Mono Mono DC2 NK
+## Levels: DC1 DC2 Mono NK Neu
+
+- Create CellType2 with the same entries, but directly specify the
+levels to include: “DC1”, “DC2”, “Mono”, “NK”, “Neu”, “Bcell”,
+“Tcell”.
+
+## [1] DC1 DC1 DC1 NK NK Mono Mono DC2 NK
+## Levels: DC1 DC2 Mono NK Neu Bcell Tcell
+
+- Use combine to increase the length of CellType2 to include:
+“Neu”,“Neu”,“Bcell”,“DC1”
+
+## [1] DC1 DC1 DC1 NK NK Mono Mono DC2 NK Neu Neu Bcell
+## [13] DC1
+## Levels: DC1 DC2 Mono NK Neu Bcell Tcell
+
+- Summarize the number of entries for each cell type.
+
+
+## DC1 DC2 Mono NK Neu Bcell Tcell
+## 4 1 2 3 2 1 0
+
+- Reorder the summary to alphabetical order
+
+
+## Bcell DC1 DC2 Mono Neu NK Tcell
+## 4 1 2 3 2 1 0
+
- Create a ordinal factor named “Height” containing – high, low, mid,
low, mid, low, mid, high, mid, high.
@@ -361,20 +499,65 @@
-
@@ -423,13 +412,13 @@
- conditionsAndLoops
+ ConditionsAndLoops
@@ -481,7 +470,7 @@ Course Overview
Course Integrity
-This course is compiled automatically on 2024-12-10
+This course is compiled automatically on 2024-12-11
The course is tested and available on MacOS, Windows and
diff --git a/r_course/presentations/singlepage/introToR_Session1.html b/r_course/presentations/singlepage/introToR_Session1.html
index ba083d8..8c6933c 100644
--- a/r_course/presentations/singlepage/introToR_Session1.html
+++ b/r_course/presentations/singlepage/introToR_Session1.html
@@ -2084,11 +2084,11 @@ Combining logical vectors
-
-Answers to exercise.
-Answers can be found here here
+
+Answers to exercise
+Answers can be found here here
@@ -2395,11 +2395,11 @@ Data types
Time for an exercise!
-Exercise on matrices can be found here
+Exercise on matrices can be found here
-
-Answers to exercise.
-Answers can be found here here
+
+Answers to exercise
+Answers can be found here here
@@ -2453,8 +2453,7 @@ Display order of levels
Nominal factors
In some cases there is no natural order to the categories such that
-one category is greater than the other (nominal data). In this case we
-can see that R is gender neutral.
+one category is greater than the other (nominal data).
factorExample <- factor(vectorExample, levels=c("male","female"))
factorExample[1] < factorExample[2]
## Warning in Ops.factor(factorExample[1], factorExample[2]): '<' not meaningful
@@ -2798,11 +2797,11 @@ Merging data frames
Time for an exercise!
-Exercise on data frames can be found here
+Exercise on data frames can be found here
-
-Answers to exercise.
-Answers can be found here here
+
+Answers to exercise
+Answers can be found here here
@@ -2953,6 +2952,14 @@ Flattening lists to matrices
## Second 2 6 7
## Third 1 4 7
+
+Time for an exercise!
+Exercise on matrices can be found here
+
+
+Answers to exercise
+Answers can be found here here
+
Coercing data formats
@@ -3075,7 +3082,7 @@ More complex objects
manage Dates or Times.
-## [1] "2024-12-10 21:07:46 UTC"
+## [1] "2024-12-11 01:00:32 UTC"
More complex objects
@@ -3103,11 +3110,11 @@ More complex objects
More complex objects
We can also use the arithmetic operations with our time objects.
-## [1] "2024-12-10 21:07:46 UTC"
+## [1] "2024-12-11 01:00:32 UTC"
-## [1] "2024-12-10 21:05:46 UTC"
+## [1] "2024-12-11 00:58:32 UTC"
-## Time difference of 0.1043649 secs
+## Time difference of 0.1018391 secs
More complex objects
@@ -3118,18 +3125,18 @@ More complex objects
We can also change the timezone by specifying a tz
parameter
-## [1] "21 O'Clock PM Tuesday on December 10th"
+## [1] "01 O'Clock AM Wednesday on December 11th"
-## [1] "21 O'Clock PM Tuesday on December 10th"
+## [1] "01 O'Clock AM Wednesday on December 11th"
Complex to base objects
Most of the time we can convert more complex object back to our basic
object types we are more familar with.
-## [1] "2024-12-10 21:07:46.094567"
+## [1] "2024-12-11 01:00:32.629524"
-## [1] 0.1043649
+## [1] 0.1018391
Complex objects summary
@@ -3238,13 +3245,13 @@ Data from external sources
Gene_a
-3.973947
+3.658047
-3.930245
+5.530241
-2.474149
+3.272248
@@ -3252,13 +3259,13 @@ Data from external sources
Gene_b
-3.461118
+3.778873
-3.985219
+5.652030
-5.081024
+3.694406
@@ -3266,13 +3273,13 @@ Data from external sources
Gene_c
-4.363544
+3.918055
-4.808006
+4.841402
-4.068423
+5.957884
@@ -3280,13 +3287,13 @@ Data from external sources
Gene_d
-5.089677
+3.223094
-1.477786
+3.130542
-4.949975
+4.522371
@@ -3294,13 +3301,13 @@ Data from external sources
Gene_e
-9.478252
+10.369538
-10.525974
+10.172782
-9.489895
+10.039658
@@ -3308,13 +3315,13 @@ Data from external sources
Gene_f
-9.167348
+11.044072
-10.601165
+10.561451
-9.324316
+9.909005
@@ -3322,13 +3329,13 @@ Data from external sources
Gene_g
-9.399451
+8.915426
-11.443669
+11.178636
-9.400897
+10.473599
@@ -3336,13 +3343,13 @@ Data from external sources
Gene_h
-11.166681
+9.649608
-11.069730
+10.231282
-9.805737
+10.386836
@@ -3655,11 +3662,11 @@ Save and read data
remembers the objects original name i.e. Table or myList.
-
+
Time for an exercise!
Exercise on reading and writing data can be found here
-
+
Answers to exercise
Answers can be found here
diff --git a/r_course/presentations/singlepage/introToR_Session2.html b/r_course/presentations/singlepage/introToR_Session2.html
index eb84eb9..65a321d 100644
--- a/r_course/presentations/singlepage/introToR_Session2.html
+++ b/r_course/presentations/singlepage/introToR_Session2.html
@@ -2151,11 +2151,11 @@ sapply() example 3
-## Mean is 0.389048273564465
-## [1] 0.3648576
+## Mean is 0.416867907369881
+## [1] -0.002603881
Debugging functions
@@ -2406,10 +2406,10 @@ Debugging functions
Custom functions and apply
These custom functions can also be utilized with apply.
-## [1] 0.36485764 1.15853052 -0.87955629 0.30247335 -0.27396124 -0.41809173
-## [7] 0.52441334 0.69940853 1.12897715 0.07544800 -1.69037492 1.56788608
-## [13] -1.27228116 1.51188113 -0.05544895 0.88612324 -0.78095665 -1.69200058
-## [19] -0.90606862 -0.25125886
+## [1] -0.002603881 -0.128616732 0.538543594 1.050796185 -0.562454904
+## [6] 1.331683190 -0.208345806 -0.621922028 -2.431404457 -0.149177218
+## [11] -0.905864425 0.441651262 -0.378431902 -0.195380445 -1.361557140
+## [16] 1.155218949 -0.223992938 2.123924129 0.421056362 0.106878204
@@ -2515,7 +2515,7 @@ Sourcing scripts
}
-## [1] "Tue"
+## [1] "Wed"
Rscript
diff --git a/r_course/presentations/slides/introToR_Session1.html b/r_course/presentations/slides/introToR_Session1.html
index 173c79a..c10ab48 100644
--- a/r_course/presentations/slides/introToR_Session1.html
+++ b/r_course/presentations/slides/introToR_Session1.html
@@ -1020,14 +1020,14 @@
## Time for an exercise!
-Exercise on vectors can be found [here](../../exercises/exercises/vector_exercise.html)
+Exercise on vectors can be found [here](../../exercises/exercises/Vectors_exercise.html)
---
-## Answers to exercise.
+## Answers to exercise
-Answers can be found here [here](../../exercises/answers/vector_answers.html)
+Answers can be found here [here](../../exercises/answers/Vectors_answers.html)
---
@@ -1583,14 +1583,14 @@
## Time for an exercise!
-Exercise on matrices can be found [here](../../exercises/exercises/matrices_exercise.html)
+Exercise on matrices can be found [here](../../exercises/exercises/Matrices_exercise.html)
---
-## Answers to exercise.
+## Answers to exercise
-Answers can be found here [here](../../exercises/answers/matrices_answers.html)
+Answers can be found here [here](../../exercises/answers/Matrices_answers.html)
@@ -1690,7 +1690,6 @@
## Nominal factors
In some cases there is no natural order to the categories such that one category is greater than the other (nominal data).
-In this case we can see that R is gender neutral.
``` r
@@ -2264,13 +2263,13 @@
---
## Time for an exercise!
-Exercise on data frames can be found [here](../../exercises/exercises/factorsAndDataframes_exercise.html)
+Exercise on data frames can be found [here](../../exercises/exercises/FactorsAndDataframes_exercise.html)
---
-## Answers to exercise.
+## Answers to exercise
-Answers can be found here [here](../../exercises/answers/factorsAndDataframes_answers.html)
+Answers can be found here [here](../../exercises/answers/FactorsAndDataframes_answers.html)
---
@@ -2495,6 +2494,21 @@
## Third 1 4 7
```
+---
+## Time for an exercise!
+
+
+Exercise on matrices can be found [here](../../exercises/exercises/Lists_exercise.html)
+
+
+---
+## Answers to exercise
+
+
+Answers can be found here [here](../../exercises/answers/Lists_answers.html)
+
+
+
---
class: inverse, center, middle
@@ -2708,7 +2722,7 @@
```
```
-## [1] "2024-12-10 21:07:43 UTC"
+## [1] "2024-12-11 01:00:30 UTC"
```
---
@@ -2763,7 +2777,7 @@
```
```
-## [1] "2024-12-10 21:07:43 UTC"
+## [1] "2024-12-11 01:00:30 UTC"
```
``` r
@@ -2771,7 +2785,7 @@
```
```
-## [1] "2024-12-10 21:05:43 UTC"
+## [1] "2024-12-11 00:58:30 UTC"
```
``` r
@@ -2779,7 +2793,7 @@
```
```
-## Time difference of 0.1014812 secs
+## Time difference of 0.09647393 secs
```
---
@@ -2797,7 +2811,7 @@
```
```
-## [1] "21 O'Clock PM Tuesday on December 10th"
+## [1] "01 O'Clock AM Wednesday on December 11th"
```
``` r
@@ -2805,7 +2819,7 @@
```
```
-## [1] "21 O'Clock PM Tuesday on December 10th"
+## [1] "01 O'Clock AM Wednesday on December 11th"
```
---
@@ -2819,7 +2833,7 @@
```
```
-## [1] "2024-12-10 21:07:43.989243"
+## [1] "2024-12-11 01:00:30.832828"
```
``` r
@@ -2827,7 +2841,7 @@
```
```
-## [1] 0.1014812
+## [1] 0.09647393
```
---
@@ -2944,51 +2958,51 @@
<tbody>
<tr>
<td style="text-align:left;"> Gene_a </td>
- <td style="text-align:right;"> 3.423364 </td>
- <td style="text-align:right;"> 2.551130 </td>
- <td style="text-align:right;"> 2.575654 </td>
+ <td style="text-align:right;"> 4.429622 </td>
+ <td style="text-align:right;"> 3.363374 </td>
+ <td style="text-align:right;"> 4.520210 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_b </td>
- <td style="text-align:right;"> 4.080952 </td>
- <td style="text-align:right;"> 5.865549 </td>
- <td style="text-align:right;"> 3.958085 </td>
+ <td style="text-align:right;"> 4.114483 </td>
+ <td style="text-align:right;"> 3.992814 </td>
+ <td style="text-align:right;"> 2.877940 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_c </td>
- <td style="text-align:right;"> 4.473275 </td>
- <td style="text-align:right;"> 3.771709 </td>
- <td style="text-align:right;"> 4.611821 </td>
+ <td style="text-align:right;"> 3.521606 </td>
+ <td style="text-align:right;"> 3.215211 </td>
+ <td style="text-align:right;"> 5.199799 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_d </td>
- <td style="text-align:right;"> 3.415446 </td>
- <td style="text-align:right;"> 3.817052 </td>
- <td style="text-align:right;"> 2.907351 </td>
+ <td style="text-align:right;"> 3.840954 </td>
+ <td style="text-align:right;"> 3.860644 </td>
+ <td style="text-align:right;"> 3.688784 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_e </td>
- <td style="text-align:right;"> 11.960879 </td>
- <td style="text-align:right;"> 9.113075 </td>
- <td style="text-align:right;"> 9.927639 </td>
+ <td style="text-align:right;"> 11.896069 </td>
+ <td style="text-align:right;"> 10.056214 </td>
+ <td style="text-align:right;"> 10.062543 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_f </td>
- <td style="text-align:right;"> 9.821113 </td>
- <td style="text-align:right;"> 8.704334 </td>
- <td style="text-align:right;"> 9.940262 </td>
+ <td style="text-align:right;"> 9.947901 </td>
+ <td style="text-align:right;"> 8.884012 </td>
+ <td style="text-align:right;"> 8.773162 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_g </td>
- <td style="text-align:right;"> 10.763196 </td>
- <td style="text-align:right;"> 9.869513 </td>
- <td style="text-align:right;"> 9.393374 </td>
+ <td style="text-align:right;"> 9.640603 </td>
+ <td style="text-align:right;"> 10.470026 </td>
+ <td style="text-align:right;"> 9.591002 </td>
</tr>
<tr>
<td style="text-align:left;"> Gene_h </td>
- <td style="text-align:right;"> 11.401766 </td>
- <td style="text-align:right;"> 9.550342 </td>
- <td style="text-align:right;"> 11.530905 </td>
+ <td style="text-align:right;"> 11.954618 </td>
+ <td style="text-align:right;"> 10.533096 </td>
+ <td style="text-align:right;"> 11.728931 </td>
</tr>
</tbody>
</table>
diff --git a/r_course/presentations/slides/introToR_Session2.html b/r_course/presentations/slides/introToR_Session2.html
index dfdd2ed..cc513c9 100644
--- a/r_course/presentations/slides/introToR_Session2.html
+++ b/r_course/presentations/slides/introToR_Session2.html
@@ -1027,13 +1027,13 @@
## Time for an exercise!
-Exercise on loops and conditional branching can be found [here](../..//exercises/exercises/conditionsAndLoops_exercise.html)
+Exercise on loops and conditional branching can be found [here](../..//exercises/exercises/ConditionsAndLoops_exercise.html)
---
## Answers to exercise
-Answers can be found here [here](../..//exercises/answers/conditionsAndLoops_answers.html)
+Answers can be found here [here](../..//exercises/answers/ConditionsAndLoops_answers.html)
---
@@ -1388,11 +1388,11 @@
```
```
-## Mean is -0.0417158165317701
+## Mean is -0.138706455190791
```
```
-## [1] -0.555588
+## [1] 1.020659
```
---
@@ -1417,10 +1417,10 @@
```
```
-## [1] -0.5555880 -0.1322663 0.1899138 -0.2299799 -1.6904861 0.4030488
-## [7] 0.9826960 -0.1899368 1.5052016 1.1021064 -0.8039631 -0.1358766
-## [13] -0.1283686 0.4864651 0.7693742 -2.5554399 1.2469762 -1.0641361
-## [19] 0.5323066 0.2679527
+## [1] 1.02065900 0.47143522 -0.11262739 0.35643079 -1.19629115 -0.07521402
+## [7] -0.55040037 0.13963265 1.40238593 -1.51405434 -1.32555234 -1.04840617
+## [13] 1.84708611 0.46460992 1.63750080 -0.51232894 -0.95341254 0.89065505
+## [19] -0.51716384 -0.42494437
```
@@ -1569,7 +1569,7 @@
```
```
-## [1] "Tue"
+## [1] "Wed"
```
---