From fbd3a402d72cd3190ac4e09083c784a97484ef2c Mon Sep 17 00:00:00 2001 From: root Date: Wed, 11 Dec 2024 01:00:43 +0000 Subject: [PATCH] Autobuild --- docs/_site.yml | 30 +- docs/data/writeThisMultipleXLSX.xlsx | Bin 6445 -> 6444 bytes docs/data/writeThisXLSX.xlsx | Bin 5815 -> 5814 bytes .../answers/DataInputOutput_answers.html | 98 ++- docs/exercises/answers/Functions_answers.html | 141 ++-- .../exercises/answers/Lists_answers.html | 146 ++-- .../exercises/answers/Vectors_answers.html | 0 .../answers/conditionsAndLoops_answers.html | 211 ++++-- .../answers/factorsAndDataframes_answers.html | 173 ++++- .../exercises/DataInputOutput_exercise.html | 169 ++++- .../exercises/Functions_exercise.html | 25 +- .../exercises/exercises/Lists_exercise.html | 233 ++++-- .../exercises/exercises/Vectors_exercise.html | 0 .../conditionsAndLoops_exercise.html | 49 +- .../factorsAndDataframes_exercise.html | 235 +++++- docs/index.html | 37 +- .../singlepage/introToR_Session1.html | 103 +-- .../singlepage/introToR_Session2.html | 18 +- .../slides/introToR_Session1.html | 98 +-- .../slides/introToR_Session2.html | 18 +- r_course/_site.yml | 30 +- r_course/data/writeThisMultipleXLSX.xlsx | Bin 6445 -> 6444 bytes r_course/data/writeThisXLSX.xlsx | Bin 5815 -> 5814 bytes .../answers/DataInputOutput_answers.html | 98 ++- .../exercises/answers/Functions_answers.html | 141 ++-- r_course/exercises/answers/Lists_answers.html | 583 +++++++++++++++ .../exercises/answers/Vectors_answers.html | 687 ++++++++++++++++++ .../answers/conditionsAndLoops_answers.html | 211 ++++-- .../answers/factorsAndDataframes_answers.html | 173 ++++- .../exercises/DataInputOutput_exercise.html | 169 ++++- .../exercises/Functions_exercise.html | 25 +- .../exercises/exercises/Lists_exercise.html | 583 +++++++++++++++ .../exercises/exercises/Vectors_exercise.html | 651 +++++++++++++++++ .../conditionsAndLoops_exercise.html | 49 +- .../factorsAndDataframes_exercise.html | 235 +++++- r_course/index.html | 37 +- .../singlepage/introToR_Session1.html | 103 +-- .../singlepage/introToR_Session2.html | 18 +- .../slides/introToR_Session1.html | 98 +-- .../slides/introToR_Session2.html | 18 +- 40 files changed, 4856 insertions(+), 837 deletions(-) rename r_course/exercises/answers/Dataframes_answers.html => docs/exercises/answers/Lists_answers.html (98%) rename r_course/exercises/answers/vector_answers.html => docs/exercises/answers/Vectors_answers.html (100%) rename r_course/exercises/exercises/Dataframes_exercise.html => docs/exercises/exercises/Lists_exercise.html (98%) rename r_course/exercises/exercises/vector_exercise.html => docs/exercises/exercises/Vectors_exercise.html (100%) create mode 100644 r_course/exercises/answers/Lists_answers.html create mode 100644 r_course/exercises/answers/Vectors_answers.html create mode 100644 r_course/exercises/exercises/Lists_exercise.html create mode 100644 r_course/exercises/exercises/Vectors_exercise.html diff --git a/docs/_site.yml b/docs/_site.yml index e320864..42727c8 100644 --- a/docs/_site.yml +++ b/docs/_site.yml @@ -24,42 +24,36 @@ navbar: href: presentations/r_code/introToR_Session2.R - text: Exercises menu: - - text: vector + - text: Vectors menu: - text: Exercise - href: exercises/exercises/vector_exercise.html + href: exercises/exercises/Vectors_exercise.html - text: Solution - href: exercises/answers/vector_answers.html - - text: matrices + href: exercises/answers/Vectors_answers.html + - text: Matrices menu: - text: Exercise - href: exercises/exercises/matrices_exercise.html + href: exercises/exercises/Matrices_exercise.html - text: Solution - href: exercises/answers/matrices_answers.html - - text: factorsAndDataframes + href: exercises/answers/Matrices_answers.html + - text: FactorsAndDataframes menu: - text: Exercise - href: exercises/exercises/factorsAndDataframes_exercise.html + href: exercises/exercises/FactorsAndDataframes_exercise.html - text: Solution - href: exercises/answers/factorsAndDataframes_answers.html - - text: Dataframes - menu: - - text: Exercise - href: exercises/exercises/Dataframes_exercise.html - - text: Solution - href: exercises/answers/Dataframes_answers.html + href: exercises/answers/FactorsAndDataframes_answers.html - text: DataInputOutput menu: - text: Exercise href: exercises/exercises/DataInputOutput_exercise.html - text: Solution href: exercises/answers/DataInputOutput_answers.html - - text: conditionsAndLoops + - text: ConditionsAndLoops menu: - text: Exercise - href: exercises/exercises/conditionsAndLoops_exercise.html + href: exercises/exercises/ConditionsAndLoops_exercise.html - text: Solution - href: exercises/answers/conditionsAndLoops_answers.html + href: exercises/answers/ConditionsAndLoops_answers.html - text: Functions menu: - text: Exercise diff --git a/docs/data/writeThisMultipleXLSX.xlsx b/docs/data/writeThisMultipleXLSX.xlsx index ae4f2172b4a819087ee7702eef73aa481008d9ca..f53e6846f5af336896ef29e9bee469c8c2a818c0 100644 GIT binary patch delta 334 zcmV-U0kQtAGORMNqZ0++mITEllc*Caf5|ST2-|_S2GN3`42-Z+nXW+}%X$8YvJknEytf7#w1t3X?OiP# z6V}6rtHCo1iA+Fbu^| zjn#sN`ZXZCjH9WVUc|w$Seh5 z`?yEtvwD76&QV*E@;;|!PU(_R97YJM%J1FCi~bW(ABNmaJOcK)FF!&14^T@31e4zw g8nXu#qXPxsmITElldl+50qBzi86yUg761SM0F}?50RR91 delta 335 zcmV-V0kHn8GOaSOqZ0+YUf6CWlc*Caf6EDJ5w-(u$*F-tQ(Afx(#8?Bevs5Ue)}p; z;w0cxkE@yec6L#@+^aVE1?Q#F6(U)V642ZTshbMDZ&v98N_=2iu-0g(P!Aq0>${R$ z%8i3pXDqlN!6y-|ASyAUkJ*z+AI=CkO~QQR7*Scs)-ux0$4HGqpeUtk4+e+d>C zJ^)iY6441M_=vPWt{nh^V`xEv4jz+?;EOkaQ+~2xP_CRxhTg)Y^(>C=y_ci0>$0|b-a h7#gz%6{72GN3`42-Z+nXW+}%X$8YvJknEytf7#w1t3X?OiP# z6V}6rtHCo1iA+Fbu^| zjn#sN`ZXZCjH9WVUc|w$Seh5 z`?yEtvwD76&QV*E@;;|!PU(_R7)A)I%J1FCi~bW(ABNmaJOcK)FF!&153{}!b^`_9 YmITEllQ|Yt0lkxs79$2?6aWAK085;lpa1{> delta 327 zcmV-N0l5CQEw?SOco7A=Uf6CWlYJ2?f6EDJ5w-(u$*F-tQ(Afx(#8?Bevs5Ue)}p; z;w0cxkE@yec6L#@+^aVE1?Q#F6(U)V642ZTshbMDZ&v98N_=2iu-0g(P!Aq0>${R$ z%8i3pXDqlN!6y-|ASyAUkJ*z+AI=CkO~QQR7*Scs)-ux0$4HGqpeUtk4+e+d>C zJ^)iY6441M_=vPWt{nh^V`xEv4jz+?;EOkaQ+~2xP_CRxhTg)Y^(>C=y_ci0>$Introduction +

These exercises are about reading and writing data sections of Introduction to R.

Exercise 1

Have a look at all files in notepad or excel before attempting to read. All files can be found in the “data” directory.

    -
  • Read in the tab delimited file “GeneExpression.txt”.
  • +
  • Check your current working directory. Set your working directory to +be in the downloaded course material. Specifically the r_course +subdirectory. [The exact path will depend on where you have save your +download]
-
geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
+
getwd()
+
[1] "/Users/mattpaul"
+
setwd("~/Downloads/Intro_To_R_1Day-master/r_course")
    -
  • Find the mean expression of all samples.
  • +
  • Read in the tab delimited file “GeneExpression.txt”. Check the data +type.
-
sampleMeans <- c(mean(geneExpression[,1]),mean(geneExpression[,2]),mean(geneExpression[,3]),mean(geneExpression[,4]),mean(geneExpression[,5]),mean(geneExpression[,6]))
-names(sampleMeans) <- colnames(geneExpression)
-sampleMeans
-
##  Sample_1.hi  Sample_2.hi  Sample_3.hi Sample_4.low Sample_5.low Sample_1.low 
-##     7.514996     6.774108     6.508127     6.262253     6.177761     6.017462
-
# Alternatively we could use the colMeans() function.
-colMeans(geneExpression)
-
##  Sample_1.hi  Sample_2.hi  Sample_3.hi Sample_4.low Sample_5.low Sample_1.low 
-##     7.514996     6.774108     6.508127     6.262253     6.177761     6.017462
+
geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
+is(geneExpression)
+
## [1] "data.frame"       "list"             "oldClass"         "vector"          
+## [5] "list_OR_List"     "vector_OR_factor" "vector_OR_Vector"
+
    +
  • Coerce the data frame to a matrix
  • +
+
geneExpression <- as.matrix(geneExpression)
+
    +
  • Find the mean expression of all genes.
  • +
+
sampleMeans <- c(mean(geneExpression[1,]),mean(geneExpression[2,]),mean(geneExpression[3,]),mean(geneExpression[4,]),mean(geneExpression[5,]),mean(geneExpression[6,]),mean(geneExpression[7,]),mean(geneExpression[8,]))
+names(sampleMeans) <- rownames(geneExpression)
+sampleMeans
+
##    Gene_a    Gene_b    Gene_c    Gene_d    Gene_e    Gene_f    Gene_g    Gene_h 
+##  4.660569  4.379796  4.259824  5.849420  5.850658  6.732781 10.405203 10.201357
+
# Alternatively we could use the colMeans() function.
+# rowMeans(geneExpression)
+
    +
  • Coerce the matrix back to a data frame. Add an additional column +with extra gene info +“Kidney”,“Adrenal”,“Liver”,“Adrenal”,“Kidney”,“Liver”,“Liver”,“Kidney”. +Also add the mean expression as a column.
  • +
+
geneExpression <- as.data.frame(geneExpression)
+geneExpression$tissue_localisation <- factor(c("Kidney","Adrenal","Liver","Adrenal","Kidney","Liver","Liver","Kidney"))
+geneExpression$mean_expr <- sampleMeans
+geneExpression
+
##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
+## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
+## Gene_b    6.444368    5.896076    2.592581     5.089549     3.624812
+## Gene_c    3.083392    3.414723    3.706069     4.535536     5.104273
+## Gene_d    4.726498    3.023746    3.033173     8.017895     8.098800
+## Gene_e    9.909185    9.174323    9.957153     2.053501     3.276533
+## Gene_f   10.680459    9.951243    8.985412     3.360963     3.566663
+## Gene_g   10.516534   10.176163    9.778173    11.781520     9.005437
+## Gene_h    9.017020    9.342291    9.895636    12.046704    11.003240
+##        Sample_1.low tissue_localisation mean_expr
+## Gene_a    5.9350948              Kidney  4.660569
+## Gene_b    2.6313925             Adrenal  4.379796
+## Gene_c    5.7149521               Liver  4.259824
+## Gene_d    8.1964109             Adrenal  5.849420
+## Gene_e    0.7332521              Kidney  5.850658
+## Gene_f    3.8519471               Liver  6.732781
+## Gene_g   11.1733928               Liver 10.405203
+## Gene_h    9.9032500              Kidney 10.201357
+
    +
  • Read in tab delimited file “GeneExpression.txt”. Order genes by +decreasing expression in mean expression. Filter out genes with +expression below 5. Write out a new comma separated file with column +titles.
  • +
+
orderedExpression <- geneExpression[order(geneExpression$mean_expr,decreasing=T),]
+filteredExpression <- orderedExpression[orderedExpression$mean_expr>5,]
+expressionDF <- cbind(rownames(filteredExpression),filteredExpression)
+colnames(expressionDF)[1] <- "geneNames"
+write.table(expressionDF,"orderedExpression.txt",sep=",",col.names = T,row.names=F)
  • Read in the tab delimited file “GeneExpressionWithMethods.txt”. This file contains information on analysis steps used to produce file.
-
geneExpression <- read.table("data/GeneExpressionWithMethods.txt",h=T,sep="\t",row.names=1,skip=3)
-geneExpression
+
geneExpression <- read.table("data/GeneExpressionWithMethods.txt",h=T,sep="\t",row.names=1,skip=3)
+geneExpression
##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
 ## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
 ## Gene_b    6.444368    5.896076    2.592581     5.089549     3.624812
@@ -486,8 +540,8 @@ 

geneExpression <- read.table("data/GeneExpressionWithNotes.txt",h=T,sep="\t",row.names=1,comment.char = ">")
-geneExpression
+
geneExpression <- read.table("data/GeneExpressionWithNotes.txt",h=T,sep="\t",row.names=1,comment.char = ">")
+geneExpression
##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
 ## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
 ## Gene_b    6.444368    5.896076    2.592581     5.089549     3.624812
@@ -506,16 +560,6 @@ 

geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
-orderedExpression <- geneExpression[order(geneExpression$Sample_1.hi,decreasing=T),]
-expressionDF <- cbind(rownames(orderedExpression),orderedExpression)
-colnames(expressionDF)[1] <- "geneNames"
-write.table(expressionDF,"orderedExpression.txt",sep=",",col.names = T,row.names=F)
diff --git a/docs/exercises/answers/Functions_answers.html b/docs/exercises/answers/Functions_answers.html index 8026aa8..f85ece8 100644 --- a/docs/exercises/answers/Functions_answers.html +++ b/docs/exercises/answers/Functions_answers.html @@ -439,6 +439,7 @@

Introduction to R.

+

Exercise 1 - Functions

– Create a function which takes one number and returns the square of that number

squareOfNumber <- function(number){
@@ -457,30 +458,35 @@ 

meanOfNumbers(1,3)

## [1] 2
    -
  • Create a function which takes two vectors and returns the mean
  • +
  • Create a function which takes two vectors and returns the mean. +Include a message statement that gives the total length of both +vectors.
meanOfVectors <- function(vector,vector2){
-  meanOfVectorsRes <- mean(c(vector,vector2))
-  return(meanOfVectorsRes)
-}
-
-meanOfVectors(1,3)
-
## [1] 2
+ bigvector <- c(vector,vector2) + message(paste("The total length of my vectors is", length(bigvector))) + meanOfVectorsRes <- mean(bigvector ) + return(meanOfVectorsRes) +} + +meanOfVectors(c(1,3,4),c(3,6,1,7,9))

+
## The total length of my vectors is 8
+
## [1] 4.25
  • Create a function which takes two numbers and returns the two numbers as a vector and the mean, summary and multiple as a data.frame.
-
dfAndVecReturn <- function(number,number2){
-  input <- c(number,number2)
-  df <- data.frame(mean=mean(c(number,number2)),
-                   sum=number+number2,
-                   multiple=number*number2
-                   )
-  return(list(input,df))
-}
-
-dfAndVecReturn(1,3)
+
dfAndVecReturn <- function(number,number2){
+  input <- c(number,number2)
+  df <- data.frame(mean=mean(c(number,number2)),
+                   sum=number+number2,
+                   multiple=number*number2
+                   )
+  return(list(input,df))
+}
+
+dfAndVecReturn(1,3)
## [[1]]
 ## [1] 1 3
 ## 
@@ -489,48 +495,77 @@ 

findSmallestFactorial <- function(x){
-    factorialAnswer <- 0
-    count <- 0
-    while(factorialAnswer <= x){
-      count <- count+1
-      if(count == 1){
-        factorialAnswer <- 1
-      }else{
-        factorialAnswer <- factorialAnswer * count 
-      }
-    }
-    return(count)
-}
-
-findSmallestFactorial(3000)
+
findSmallestFactorial <- function(x){
+    factorialAnswer <- 0
+    count <- 0
+    while(factorialAnswer <= x){
+      count <- count+1
+      if(count == 1){
+        factorialAnswer <- 1
+      }else{
+        factorialAnswer <- factorialAnswer * count 
+      }
+    }
+    return(count)
+}
+
+findSmallestFactorial(3000)
## [1] 7
-
findSmallestFactorial(10^100)
+
findSmallestFactorial(10^100)
## [1] 70

– Add a if and else statement in your function to only calculate factorial code if argument is a numeric.

-
findSmallestFactorial <- function(x){
-  if(!is.numeric(x)){
-    message("Please provide a numeric argument!")
-  }else{
-    factorialAnswer <- 0
-    count <- 0
-    while(factorialAnswer <= x){
-      count <- count+1
-      if(count == 1){
-        factorialAnswer <- 1
-      }else{
-        factorialAnswer <- factorialAnswer * count 
-      }
-    }
-    return(count)
-  }
-}
-
-findSmallestFactorial(3000)
+
findSmallestFactorial <- function(x){
+  if(!is.numeric(x)){
+    message("Please provide a numeric argument!")
+  }else{
+    factorialAnswer <- 0
+    count <- 0
+    while(factorialAnswer <= x){
+      count <- count+1
+      if(count == 1){
+        factorialAnswer <- 1
+      }else{
+        factorialAnswer <- factorialAnswer * count 
+      }
+    }
+    return(count)
+  }
+}
+
+findSmallestFactorial(3000)
## [1] 7
-
findSmallestFactorial("Hello")
+
findSmallestFactorial("Hello")
## Please provide a numeric argument!
+

Exercise 2 - Scripts

+

Lets try to put as much together that we have learnt thus far. This +will be a multistep challenge. Break it down and use pseudocode to help. +Start by working the code interactively, then turn it into a script.

+
    +
  1. Read in the “data/GeneExpression.txt” dataset.
  2. +
  3. Use a apply to calculate the Z score for each gene (per row). The +zscore is (gene_expression - mean)/standard deviation. You should use a +function to do this calculation.
  4. +
  5. Find which gene has the highest absolute max Zscore. This is a very +rough proxy for the variability of that gene.
  6. +
  7. Print out the gene name with the highest value
  8. +
  9. Turn this into a script and run the script
  10. +
  11. Think about what modifications you would need to make in order to +accept a different data set as input.
  12. +
+
geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
+geneExpression <-as.matrix(geneExpression)
+zscores <- function(x){
+    my_mean <- mean(x)
+    my_sd <- sd(x)
+    my_z <- (x-my_mean)/my_sd
+    return(my_z)}
+
+my_zs <- apply(geneExpression,2, zscores)
+
+inds <- which(my_zs == max(my_zs), arr.ind = T)       
+rownames(geneExpression)[inds[,1]]
+
## [1] "Gene_h"
diff --git a/r_course/exercises/answers/Dataframes_answers.html b/docs/exercises/answers/Lists_answers.html similarity index 98% rename from r_course/exercises/answers/Dataframes_answers.html rename to docs/exercises/answers/Lists_answers.html index 1ded227..86ec741 100644 --- a/r_course/exercises/answers/Dataframes_answers.html +++ b/docs/exercises/answers/Lists_answers.html @@ -12,7 +12,7 @@ -Factors and Data frames +Lists @@ -337,22 +437,68 @@

Introduction +

These exercises are about reading and writing data sections of Introduction to R.

Exercise 1

Have a look at all files in notepad or excel before attempting to read. All files can be found in the “data” directory.

    -
  • Read in the tab delimited file “GeneExpression.txt”.

  • -
  • Find the mean expression of all samples.

  • +
  • Check your current working directory. Set your working directory to +be in the downloaded course material. Specifically the r_course +subdirectory. [The exact path will depend on where you have save your +download]
  • +
+
getwd()
+
[1] "/Users/mattpaul"
+
setwd("~/Downloads/Intro_To_R_1Day-master/r_course")
+
    +
  • Read in the tab delimited file “GeneExpression.txt”. Check the data +type.
-
##  Sample_1.hi  Sample_2.hi  Sample_3.hi Sample_4.low Sample_5.low Sample_1.low 
-##     7.514996     6.774108     6.508127     6.262253     6.177761     6.017462
-
##  Sample_1.hi  Sample_2.hi  Sample_3.hi Sample_4.low Sample_5.low Sample_1.low 
-##     7.514996     6.774108     6.508127     6.262253     6.177761     6.017462
+
## [1] "data.frame"       "list"             "oldClass"         "vector"          
+## [5] "list_OR_List"     "vector_OR_factor" "vector_OR_Vector"
    -
  • Read in the tab delimited file “GeneExpressionWithMethods.txt”. This -file contains information on analysis steps used to produce file.
  • +
  • Coerce the data frame to a matrix

  • +
  • Find the mean expression of all genes.

  • +
+
##    Gene_a    Gene_b    Gene_c    Gene_d    Gene_e    Gene_f    Gene_g    Gene_h 
+##  4.660569  4.379796  4.259824  5.849420  5.850658  6.732781 10.405203 10.201357
+
    +
  • Coerce the matrix back to a data frame. Add an additional column +with extra gene info +“Kidney”,“Adrenal”,“Liver”,“Adrenal”,“Kidney”,“Liver”,“Liver”,“Kidney”. +Also add the mean expression as a column.
  • +
+
geneExpression <- as.data.frame(geneExpression)
+geneExpression$tissue_localisation <- factor(c("Kidney","Adrenal","Liver","Adrenal","Kidney","Liver","Liver","Kidney"))
+geneExpression$mean_expr <- sampleMeans
+geneExpression
+
##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
+## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
+## Gene_b    6.444368    5.896076    2.592581     5.089549     3.624812
+## Gene_c    3.083392    3.414723    3.706069     4.535536     5.104273
+## Gene_d    4.726498    3.023746    3.033173     8.017895     8.098800
+## Gene_e    9.909185    9.174323    9.957153     2.053501     3.276533
+## Gene_f   10.680459    9.951243    8.985412     3.360963     3.566663
+## Gene_g   10.516534   10.176163    9.778173    11.781520     9.005437
+## Gene_h    9.017020    9.342291    9.895636    12.046704    11.003240
+##        Sample_1.low tissue_localisation mean_expr
+## Gene_a    5.9350948              Kidney  4.660569
+## Gene_b    2.6313925             Adrenal  4.379796
+## Gene_c    5.7149521               Liver  4.259824
+## Gene_d    8.1964109             Adrenal  5.849420
+## Gene_e    0.7332521              Kidney  5.850658
+## Gene_f    3.8519471               Liver  6.732781
+## Gene_g   11.1733928               Liver 10.405203
+## Gene_h    9.9032500              Kidney 10.201357
+
    +
  • Read in tab delimited file “GeneExpression.txt”. Order genes by +decreasing expression in mean expression. Filter out genes with +expression below 5. Write out a new comma separated file with column +titles.

  • +
  • Read in the tab delimited file “GeneExpressionWithMethods.txt”. +This file contains information on analysis steps used to produce +file.

##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
 ## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
@@ -394,11 +540,6 @@ 

Introduction to R.

+

Exercise 1 - Functions

– Create a function which takes one number and returns the square of that number

## [1] 9

– Create a function which takes two numbers and returns the mean

## [1] 2
    -
  • Create a function which takes two vectors and returns the mean
  • +
  • Create a function which takes two vectors and returns the mean. +Include a message statement that gives the total length of both +vectors.
-
## [1] 2
+
## The total length of my vectors is 8
+
## [1] 4.25
@@ -481,7 +470,7 @@

Course Overview


Course Integrity

-

This course is compiled automatically on 2024-12-10 +

This course is compiled automatically on 2024-12-11

The course is tested and available on MacOS, Windows and diff --git a/docs/presentations/singlepage/introToR_Session1.html b/docs/presentations/singlepage/introToR_Session1.html index ba083d8..8c6933c 100644 --- a/docs/presentations/singlepage/introToR_Session1.html +++ b/docs/presentations/singlepage/introToR_Session1.html @@ -2084,11 +2084,11 @@

Combining logical vectors

Time for an exercise!

-

Exercise on vectors can be found here

+

Exercise on vectors can be found here

-
-

Answers to exercise.

-

Answers can be found here here

+
+

Answers to exercise

+

Answers can be found here here

@@ -2395,11 +2395,11 @@

Data types

Time for an exercise!

-

Exercise on matrices can be found here

+

Exercise on matrices can be found here

-
-

Answers to exercise.

-

Answers can be found here here

+
+

Answers to exercise

+

Answers can be found here here

@@ -2453,8 +2453,7 @@

Display order of levels

Nominal factors

In some cases there is no natural order to the categories such that -one category is greater than the other (nominal data). In this case we -can see that R is gender neutral.

+one category is greater than the other (nominal data).

factorExample <- factor(vectorExample, levels=c("male","female"))
 factorExample[1] < factorExample[2]
## Warning in Ops.factor(factorExample[1], factorExample[2]): '<' not meaningful
@@ -2798,11 +2797,11 @@ 

Merging data frames

Time for an exercise!

-

Exercise on data frames can be found here

+

Exercise on data frames can be found here

-
-

Answers to exercise.

-

Answers can be found here here

+
+

Answers to exercise

+

Answers can be found here here

@@ -2953,6 +2952,14 @@

Flattening lists to matrices

## Second 2 6 7 ## Third 1 4 7
+
+

Time for an exercise!

+

Exercise on matrices can be found here

+
+
+

Answers to exercise

+

Answers can be found here here

+

Coercing data formats

@@ -3075,7 +3082,7 @@

More complex objects

manage Dates or Times.

Time <- Sys.time()
 Time
-
## [1] "2024-12-10 21:07:46 UTC"
+
## [1] "2024-12-11 01:00:32 UTC"

More complex objects

@@ -3103,11 +3110,11 @@

More complex objects

More complex objects

We can also use the arithmetic operations with our time objects.

Time
-
## [1] "2024-12-10 21:07:46 UTC"
+
## [1] "2024-12-11 01:00:32 UTC"
Time - 120
-
## [1] "2024-12-10 21:05:46 UTC"
+
## [1] "2024-12-11 00:58:32 UTC"
TimeNow - Time
-
## Time difference of 0.1043649 secs
+
## Time difference of 0.1018391 secs

More complex objects

@@ -3118,18 +3125,18 @@

More complex objects

We can also change the timezone by specifying a tz parameter

format(Time,format="%H O'Clock %p %A on %B %dth")
-
## [1] "21 O'Clock PM Tuesday on December 10th"
+
## [1] "01 O'Clock AM Wednesday on December 11th"
format(Time,format="%H O'Clock %p %A on %B %dth",tz = "GMT")
-
## [1] "21 O'Clock PM Tuesday on December 10th"
+
## [1] "01 O'Clock AM Wednesday on December 11th"

Complex to base objects

Most of the time we can convert more complex object back to our basic object types we are more familar with.

as.character(Time)
-
## [1] "2024-12-10 21:07:46.094567"
+
## [1] "2024-12-11 01:00:32.629524"
as.numeric(TimeNow-Time)
-
## [1] 0.1043649
+
## [1] 0.1018391

Complex objects summary

@@ -3238,13 +3245,13 @@

Data from external sources

Gene_a -3.973947 +3.658047 -3.930245 +5.530241 -2.474149 +3.272248 @@ -3252,13 +3259,13 @@

Data from external sources

Gene_b -3.461118 +3.778873 -3.985219 +5.652030 -5.081024 +3.694406 @@ -3266,13 +3273,13 @@

Data from external sources

Gene_c -4.363544 +3.918055 -4.808006 +4.841402 -4.068423 +5.957884 @@ -3280,13 +3287,13 @@

Data from external sources

Gene_d -5.089677 +3.223094 -1.477786 +3.130542 -4.949975 +4.522371 @@ -3294,13 +3301,13 @@

Data from external sources

Gene_e -9.478252 +10.369538 -10.525974 +10.172782 -9.489895 +10.039658 @@ -3308,13 +3315,13 @@

Data from external sources

Gene_f -9.167348 +11.044072 -10.601165 +10.561451 -9.324316 +9.909005 @@ -3322,13 +3329,13 @@

Data from external sources

Gene_g -9.399451 +8.915426 -11.443669 +11.178636 -9.400897 +10.473599 @@ -3336,13 +3343,13 @@

Data from external sources

Gene_h -11.166681 +9.649608 -11.069730 +10.231282 -9.805737 +10.386836 @@ -3655,11 +3662,11 @@

Save and read data

remembers the objects original name i.e. Table or myList.

load("my_list.RData")
-
+

Time for an exercise!

Exercise on reading and writing data can be found here

-
+

Answers to exercise

Answers can be found here

diff --git a/docs/presentations/singlepage/introToR_Session2.html b/docs/presentations/singlepage/introToR_Session2.html index eb84eb9..65a321d 100644 --- a/docs/presentations/singlepage/introToR_Session2.html +++ b/docs/presentations/singlepage/introToR_Session2.html @@ -2151,11 +2151,11 @@

sapply() example 3

Time for an exercise!

-

Exercise on loops and conditional branching can be found here

+

Exercise on loops and conditional branching can be found here

Answers to exercise

-

Answers can be found here here

+

Answers can be found here here

@@ -2390,8 +2390,8 @@

Custom function example

A <- rnorm(20) my_zscore(my_number=A[1], my_vector=A)

-
## Mean is 0.389048273564465
-
## [1] 0.3648576
+
## Mean is 0.416867907369881
+
## [1] -0.002603881

Debugging functions

@@ -2406,10 +2406,10 @@

Debugging functions

Custom functions and apply

These custom functions can also be utilized with apply.

sapply(A, my_zscore, my_vector=A)
-
##  [1]  0.36485764  1.15853052 -0.87955629  0.30247335 -0.27396124 -0.41809173
-##  [7]  0.52441334  0.69940853  1.12897715  0.07544800 -1.69037492  1.56788608
-## [13] -1.27228116  1.51188113 -0.05544895  0.88612324 -0.78095665 -1.69200058
-## [19] -0.90606862 -0.25125886
+
##  [1] -0.002603881 -0.128616732  0.538543594  1.050796185 -0.562454904
+##  [6]  1.331683190 -0.208345806 -0.621922028 -2.431404457 -0.149177218
+## [11] -0.905864425  0.441651262 -0.378431902 -0.195380445 -1.361557140
+## [16]  1.155218949 -0.223992938  2.123924129  0.421056362  0.106878204
@@ -2515,7 +2515,7 @@

Sourcing scripts

}

source("scripts/dayOfWeek.r")
 dayOfWeek()
-
## [1] "Tue"
+
## [1] "Wed"

Rscript

diff --git a/docs/presentations/slides/introToR_Session1.html b/docs/presentations/slides/introToR_Session1.html index 173c79a..c10ab48 100644 --- a/docs/presentations/slides/introToR_Session1.html +++ b/docs/presentations/slides/introToR_Session1.html @@ -1020,14 +1020,14 @@ ## Time for an exercise! -Exercise on vectors can be found [here](../../exercises/exercises/vector_exercise.html) +Exercise on vectors can be found [here](../../exercises/exercises/Vectors_exercise.html) --- -## Answers to exercise. +## Answers to exercise -Answers can be found here [here](../../exercises/answers/vector_answers.html) +Answers can be found here [here](../../exercises/answers/Vectors_answers.html) --- @@ -1583,14 +1583,14 @@ ## Time for an exercise! -Exercise on matrices can be found [here](../../exercises/exercises/matrices_exercise.html) +Exercise on matrices can be found [here](../../exercises/exercises/Matrices_exercise.html) --- -## Answers to exercise. +## Answers to exercise -Answers can be found here [here](../../exercises/answers/matrices_answers.html) +Answers can be found here [here](../../exercises/answers/Matrices_answers.html) @@ -1690,7 +1690,6 @@ ## Nominal factors In some cases there is no natural order to the categories such that one category is greater than the other (nominal data). -In this case we can see that R is gender neutral. ``` r @@ -2264,13 +2263,13 @@ --- ## Time for an exercise! -Exercise on data frames can be found [here](../../exercises/exercises/factorsAndDataframes_exercise.html) +Exercise on data frames can be found [here](../../exercises/exercises/FactorsAndDataframes_exercise.html) --- -## Answers to exercise. +## Answers to exercise -Answers can be found here [here](../../exercises/answers/factorsAndDataframes_answers.html) +Answers can be found here [here](../../exercises/answers/FactorsAndDataframes_answers.html) --- @@ -2495,6 +2494,21 @@ ## Third 1 4 7 ``` +--- +## Time for an exercise! + + +Exercise on matrices can be found [here](../../exercises/exercises/Lists_exercise.html) + + +--- +## Answers to exercise + + +Answers can be found here [here](../../exercises/answers/Lists_answers.html) + + + --- class: inverse, center, middle @@ -2708,7 +2722,7 @@ ``` ``` -## [1] "2024-12-10 21:07:43 UTC" +## [1] "2024-12-11 01:00:30 UTC" ``` --- @@ -2763,7 +2777,7 @@ ``` ``` -## [1] "2024-12-10 21:07:43 UTC" +## [1] "2024-12-11 01:00:30 UTC" ``` ``` r @@ -2771,7 +2785,7 @@ ``` ``` -## [1] "2024-12-10 21:05:43 UTC" +## [1] "2024-12-11 00:58:30 UTC" ``` ``` r @@ -2779,7 +2793,7 @@ ``` ``` -## Time difference of 0.1014812 secs +## Time difference of 0.09647393 secs ``` --- @@ -2797,7 +2811,7 @@ ``` ``` -## [1] "21 O'Clock PM Tuesday on December 10th" +## [1] "01 O'Clock AM Wednesday on December 11th" ``` ``` r @@ -2805,7 +2819,7 @@ ``` ``` -## [1] "21 O'Clock PM Tuesday on December 10th" +## [1] "01 O'Clock AM Wednesday on December 11th" ``` --- @@ -2819,7 +2833,7 @@ ``` ``` -## [1] "2024-12-10 21:07:43.989243" +## [1] "2024-12-11 01:00:30.832828" ``` ``` r @@ -2827,7 +2841,7 @@ ``` ``` -## [1] 0.1014812 +## [1] 0.09647393 ``` --- @@ -2944,51 +2958,51 @@ <tbody> <tr> <td style="text-align:left;"> Gene_a </td> - <td style="text-align:right;"> 3.423364 </td> - <td style="text-align:right;"> 2.551130 </td> - <td style="text-align:right;"> 2.575654 </td> + <td style="text-align:right;"> 4.429622 </td> + <td style="text-align:right;"> 3.363374 </td> + <td style="text-align:right;"> 4.520210 </td> </tr> <tr> <td style="text-align:left;"> Gene_b </td> - <td style="text-align:right;"> 4.080952 </td> - <td style="text-align:right;"> 5.865549 </td> - <td style="text-align:right;"> 3.958085 </td> + <td style="text-align:right;"> 4.114483 </td> + <td style="text-align:right;"> 3.992814 </td> + <td style="text-align:right;"> 2.877940 </td> </tr> <tr> <td style="text-align:left;"> Gene_c </td> - <td style="text-align:right;"> 4.473275 </td> - <td style="text-align:right;"> 3.771709 </td> - <td style="text-align:right;"> 4.611821 </td> + <td style="text-align:right;"> 3.521606 </td> + <td style="text-align:right;"> 3.215211 </td> + <td style="text-align:right;"> 5.199799 </td> </tr> <tr> <td style="text-align:left;"> Gene_d </td> - <td style="text-align:right;"> 3.415446 </td> - <td style="text-align:right;"> 3.817052 </td> - <td style="text-align:right;"> 2.907351 </td> + <td style="text-align:right;"> 3.840954 </td> + <td style="text-align:right;"> 3.860644 </td> + <td style="text-align:right;"> 3.688784 </td> </tr> <tr> <td style="text-align:left;"> Gene_e </td> - <td style="text-align:right;"> 11.960879 </td> - <td style="text-align:right;"> 9.113075 </td> - <td style="text-align:right;"> 9.927639 </td> + <td style="text-align:right;"> 11.896069 </td> + <td style="text-align:right;"> 10.056214 </td> + <td style="text-align:right;"> 10.062543 </td> </tr> <tr> <td style="text-align:left;"> Gene_f </td> - <td style="text-align:right;"> 9.821113 </td> - <td style="text-align:right;"> 8.704334 </td> - <td style="text-align:right;"> 9.940262 </td> + <td style="text-align:right;"> 9.947901 </td> + <td style="text-align:right;"> 8.884012 </td> + <td style="text-align:right;"> 8.773162 </td> </tr> <tr> <td style="text-align:left;"> Gene_g </td> - <td style="text-align:right;"> 10.763196 </td> - <td style="text-align:right;"> 9.869513 </td> - <td style="text-align:right;"> 9.393374 </td> + <td style="text-align:right;"> 9.640603 </td> + <td style="text-align:right;"> 10.470026 </td> + <td style="text-align:right;"> 9.591002 </td> </tr> <tr> <td style="text-align:left;"> Gene_h </td> - <td style="text-align:right;"> 11.401766 </td> - <td style="text-align:right;"> 9.550342 </td> - <td style="text-align:right;"> 11.530905 </td> + <td style="text-align:right;"> 11.954618 </td> + <td style="text-align:right;"> 10.533096 </td> + <td style="text-align:right;"> 11.728931 </td> </tr> </tbody> </table> diff --git a/docs/presentations/slides/introToR_Session2.html b/docs/presentations/slides/introToR_Session2.html index dfdd2ed..cc513c9 100644 --- a/docs/presentations/slides/introToR_Session2.html +++ b/docs/presentations/slides/introToR_Session2.html @@ -1027,13 +1027,13 @@ ## Time for an exercise! -Exercise on loops and conditional branching can be found [here](../..//exercises/exercises/conditionsAndLoops_exercise.html) +Exercise on loops and conditional branching can be found [here](../..//exercises/exercises/ConditionsAndLoops_exercise.html) --- ## Answers to exercise -Answers can be found here [here](../..//exercises/answers/conditionsAndLoops_answers.html) +Answers can be found here [here](../..//exercises/answers/ConditionsAndLoops_answers.html) --- @@ -1388,11 +1388,11 @@ ``` ``` -## Mean is -0.0417158165317701 +## Mean is -0.138706455190791 ``` ``` -## [1] -0.555588 +## [1] 1.020659 ``` --- @@ -1417,10 +1417,10 @@ ``` ``` -## [1] -0.5555880 -0.1322663 0.1899138 -0.2299799 -1.6904861 0.4030488 -## [7] 0.9826960 -0.1899368 1.5052016 1.1021064 -0.8039631 -0.1358766 -## [13] -0.1283686 0.4864651 0.7693742 -2.5554399 1.2469762 -1.0641361 -## [19] 0.5323066 0.2679527 +## [1] 1.02065900 0.47143522 -0.11262739 0.35643079 -1.19629115 -0.07521402 +## [7] -0.55040037 0.13963265 1.40238593 -1.51405434 -1.32555234 -1.04840617 +## [13] 1.84708611 0.46460992 1.63750080 -0.51232894 -0.95341254 0.89065505 +## [19] -0.51716384 -0.42494437 ``` @@ -1569,7 +1569,7 @@ ``` ``` -## [1] "Tue" +## [1] "Wed" ``` --- diff --git a/r_course/_site.yml b/r_course/_site.yml index e320864..42727c8 100644 --- a/r_course/_site.yml +++ b/r_course/_site.yml @@ -24,42 +24,36 @@ navbar: href: presentations/r_code/introToR_Session2.R - text: Exercises menu: - - text: vector + - text: Vectors menu: - text: Exercise - href: exercises/exercises/vector_exercise.html + href: exercises/exercises/Vectors_exercise.html - text: Solution - href: exercises/answers/vector_answers.html - - text: matrices + href: exercises/answers/Vectors_answers.html + - text: Matrices menu: - text: Exercise - href: exercises/exercises/matrices_exercise.html + href: exercises/exercises/Matrices_exercise.html - text: Solution - href: exercises/answers/matrices_answers.html - - text: factorsAndDataframes + href: exercises/answers/Matrices_answers.html + - text: FactorsAndDataframes menu: - text: Exercise - href: exercises/exercises/factorsAndDataframes_exercise.html + href: exercises/exercises/FactorsAndDataframes_exercise.html - text: Solution - href: exercises/answers/factorsAndDataframes_answers.html - - text: Dataframes - menu: - - text: Exercise - href: exercises/exercises/Dataframes_exercise.html - - text: Solution - href: exercises/answers/Dataframes_answers.html + href: exercises/answers/FactorsAndDataframes_answers.html - text: DataInputOutput menu: - text: Exercise href: exercises/exercises/DataInputOutput_exercise.html - text: Solution href: exercises/answers/DataInputOutput_answers.html - - text: conditionsAndLoops + - text: ConditionsAndLoops menu: - text: Exercise - href: exercises/exercises/conditionsAndLoops_exercise.html + href: exercises/exercises/ConditionsAndLoops_exercise.html - text: Solution - href: exercises/answers/conditionsAndLoops_answers.html + href: exercises/answers/ConditionsAndLoops_answers.html - text: Functions menu: - text: Exercise diff --git a/r_course/data/writeThisMultipleXLSX.xlsx b/r_course/data/writeThisMultipleXLSX.xlsx index ae4f2172b4a819087ee7702eef73aa481008d9ca..f53e6846f5af336896ef29e9bee469c8c2a818c0 100644 GIT binary patch delta 334 zcmV-U0kQtAGORMNqZ0++mITEllc*Caf5|ST2-|_S2GN3`42-Z+nXW+}%X$8YvJknEytf7#w1t3X?OiP# z6V}6rtHCo1iA+Fbu^| zjn#sN`ZXZCjH9WVUc|w$Seh5 z`?yEtvwD76&QV*E@;;|!PU(_R97YJM%J1FCi~bW(ABNmaJOcK)FF!&14^T@31e4zw g8nXu#qXPxsmITElldl+50qBzi86yUg761SM0F}?50RR91 delta 335 zcmV-V0kHn8GOaSOqZ0+YUf6CWlc*Caf6EDJ5w-(u$*F-tQ(Afx(#8?Bevs5Ue)}p; z;w0cxkE@yec6L#@+^aVE1?Q#F6(U)V642ZTshbMDZ&v98N_=2iu-0g(P!Aq0>${R$ z%8i3pXDqlN!6y-|ASyAUkJ*z+AI=CkO~QQR7*Scs)-ux0$4HGqpeUtk4+e+d>C zJ^)iY6441M_=vPWt{nh^V`xEv4jz+?;EOkaQ+~2xP_CRxhTg)Y^(>C=y_ci0>$0|b-a h7#gz%6{72GN3`42-Z+nXW+}%X$8YvJknEytf7#w1t3X?OiP# z6V}6rtHCo1iA+Fbu^| zjn#sN`ZXZCjH9WVUc|w$Seh5 z`?yEtvwD76&QV*E@;;|!PU(_R7)A)I%J1FCi~bW(ABNmaJOcK)FF!&153{}!b^`_9 YmITEllQ|Yt0lkxs79$2?6aWAK085;lpa1{> delta 327 zcmV-N0l5CQEw?SOco7A=Uf6CWlYJ2?f6EDJ5w-(u$*F-tQ(Afx(#8?Bevs5Ue)}p; z;w0cxkE@yec6L#@+^aVE1?Q#F6(U)V642ZTshbMDZ&v98N_=2iu-0g(P!Aq0>${R$ z%8i3pXDqlN!6y-|ASyAUkJ*z+AI=CkO~QQR7*Scs)-ux0$4HGqpeUtk4+e+d>C zJ^)iY6441M_=vPWt{nh^V`xEv4jz+?;EOkaQ+~2xP_CRxhTg)Y^(>C=y_ci0>$Introduction +

These exercises are about reading and writing data sections of Introduction to R.

Exercise 1

Have a look at all files in notepad or excel before attempting to read. All files can be found in the “data” directory.

    -
  • Read in the tab delimited file “GeneExpression.txt”.
  • +
  • Check your current working directory. Set your working directory to +be in the downloaded course material. Specifically the r_course +subdirectory. [The exact path will depend on where you have save your +download]
-
geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
+
getwd()
+
[1] "/Users/mattpaul"
+
setwd("~/Downloads/Intro_To_R_1Day-master/r_course")
    -
  • Find the mean expression of all samples.
  • +
  • Read in the tab delimited file “GeneExpression.txt”. Check the data +type.
-
sampleMeans <- c(mean(geneExpression[,1]),mean(geneExpression[,2]),mean(geneExpression[,3]),mean(geneExpression[,4]),mean(geneExpression[,5]),mean(geneExpression[,6]))
-names(sampleMeans) <- colnames(geneExpression)
-sampleMeans
-
##  Sample_1.hi  Sample_2.hi  Sample_3.hi Sample_4.low Sample_5.low Sample_1.low 
-##     7.514996     6.774108     6.508127     6.262253     6.177761     6.017462
-
# Alternatively we could use the colMeans() function.
-colMeans(geneExpression)
-
##  Sample_1.hi  Sample_2.hi  Sample_3.hi Sample_4.low Sample_5.low Sample_1.low 
-##     7.514996     6.774108     6.508127     6.262253     6.177761     6.017462
+
geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
+is(geneExpression)
+
## [1] "data.frame"       "list"             "oldClass"         "vector"          
+## [5] "list_OR_List"     "vector_OR_factor" "vector_OR_Vector"
+
    +
  • Coerce the data frame to a matrix
  • +
+
geneExpression <- as.matrix(geneExpression)
+
    +
  • Find the mean expression of all genes.
  • +
+
sampleMeans <- c(mean(geneExpression[1,]),mean(geneExpression[2,]),mean(geneExpression[3,]),mean(geneExpression[4,]),mean(geneExpression[5,]),mean(geneExpression[6,]),mean(geneExpression[7,]),mean(geneExpression[8,]))
+names(sampleMeans) <- rownames(geneExpression)
+sampleMeans
+
##    Gene_a    Gene_b    Gene_c    Gene_d    Gene_e    Gene_f    Gene_g    Gene_h 
+##  4.660569  4.379796  4.259824  5.849420  5.850658  6.732781 10.405203 10.201357
+
# Alternatively we could use the colMeans() function.
+# rowMeans(geneExpression)
+
    +
  • Coerce the matrix back to a data frame. Add an additional column +with extra gene info +“Kidney”,“Adrenal”,“Liver”,“Adrenal”,“Kidney”,“Liver”,“Liver”,“Kidney”. +Also add the mean expression as a column.
  • +
+
geneExpression <- as.data.frame(geneExpression)
+geneExpression$tissue_localisation <- factor(c("Kidney","Adrenal","Liver","Adrenal","Kidney","Liver","Liver","Kidney"))
+geneExpression$mean_expr <- sampleMeans
+geneExpression
+
##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
+## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
+## Gene_b    6.444368    5.896076    2.592581     5.089549     3.624812
+## Gene_c    3.083392    3.414723    3.706069     4.535536     5.104273
+## Gene_d    4.726498    3.023746    3.033173     8.017895     8.098800
+## Gene_e    9.909185    9.174323    9.957153     2.053501     3.276533
+## Gene_f   10.680459    9.951243    8.985412     3.360963     3.566663
+## Gene_g   10.516534   10.176163    9.778173    11.781520     9.005437
+## Gene_h    9.017020    9.342291    9.895636    12.046704    11.003240
+##        Sample_1.low tissue_localisation mean_expr
+## Gene_a    5.9350948              Kidney  4.660569
+## Gene_b    2.6313925             Adrenal  4.379796
+## Gene_c    5.7149521               Liver  4.259824
+## Gene_d    8.1964109             Adrenal  5.849420
+## Gene_e    0.7332521              Kidney  5.850658
+## Gene_f    3.8519471               Liver  6.732781
+## Gene_g   11.1733928               Liver 10.405203
+## Gene_h    9.9032500              Kidney 10.201357
+
    +
  • Read in tab delimited file “GeneExpression.txt”. Order genes by +decreasing expression in mean expression. Filter out genes with +expression below 5. Write out a new comma separated file with column +titles.
  • +
+
orderedExpression <- geneExpression[order(geneExpression$mean_expr,decreasing=T),]
+filteredExpression <- orderedExpression[orderedExpression$mean_expr>5,]
+expressionDF <- cbind(rownames(filteredExpression),filteredExpression)
+colnames(expressionDF)[1] <- "geneNames"
+write.table(expressionDF,"orderedExpression.txt",sep=",",col.names = T,row.names=F)
  • Read in the tab delimited file “GeneExpressionWithMethods.txt”. This file contains information on analysis steps used to produce file.
-
geneExpression <- read.table("data/GeneExpressionWithMethods.txt",h=T,sep="\t",row.names=1,skip=3)
-geneExpression
+
geneExpression <- read.table("data/GeneExpressionWithMethods.txt",h=T,sep="\t",row.names=1,skip=3)
+geneExpression
##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
 ## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
 ## Gene_b    6.444368    5.896076    2.592581     5.089549     3.624812
@@ -486,8 +540,8 @@ 

geneExpression <- read.table("data/GeneExpressionWithNotes.txt",h=T,sep="\t",row.names=1,comment.char = ">")
-geneExpression

+
geneExpression <- read.table("data/GeneExpressionWithNotes.txt",h=T,sep="\t",row.names=1,comment.char = ">")
+geneExpression
##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
 ## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
 ## Gene_b    6.444368    5.896076    2.592581     5.089549     3.624812
@@ -506,16 +560,6 @@ 

geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
-orderedExpression <- geneExpression[order(geneExpression$Sample_1.hi,decreasing=T),]
-expressionDF <- cbind(rownames(orderedExpression),orderedExpression)
-colnames(expressionDF)[1] <- "geneNames"
-write.table(expressionDF,"orderedExpression.txt",sep=",",col.names = T,row.names=F)
diff --git a/r_course/exercises/answers/Functions_answers.html b/r_course/exercises/answers/Functions_answers.html index 8026aa8..f85ece8 100644 --- a/r_course/exercises/answers/Functions_answers.html +++ b/r_course/exercises/answers/Functions_answers.html @@ -439,6 +439,7 @@

Introduction to R.

+

Exercise 1 - Functions

– Create a function which takes one number and returns the square of that number

squareOfNumber <- function(number){
@@ -457,30 +458,35 @@ 

meanOfNumbers(1,3)

## [1] 2
    -
  • Create a function which takes two vectors and returns the mean
  • +
  • Create a function which takes two vectors and returns the mean. +Include a message statement that gives the total length of both +vectors.
meanOfVectors <- function(vector,vector2){
-  meanOfVectorsRes <- mean(c(vector,vector2))
-  return(meanOfVectorsRes)
-}
-
-meanOfVectors(1,3)
-
## [1] 2
+ bigvector <- c(vector,vector2) + message(paste("The total length of my vectors is", length(bigvector))) + meanOfVectorsRes <- mean(bigvector ) + return(meanOfVectorsRes) +} + +meanOfVectors(c(1,3,4),c(3,6,1,7,9))

+
## The total length of my vectors is 8
+
## [1] 4.25
  • Create a function which takes two numbers and returns the two numbers as a vector and the mean, summary and multiple as a data.frame.
-
dfAndVecReturn <- function(number,number2){
-  input <- c(number,number2)
-  df <- data.frame(mean=mean(c(number,number2)),
-                   sum=number+number2,
-                   multiple=number*number2
-                   )
-  return(list(input,df))
-}
-
-dfAndVecReturn(1,3)
+
dfAndVecReturn <- function(number,number2){
+  input <- c(number,number2)
+  df <- data.frame(mean=mean(c(number,number2)),
+                   sum=number+number2,
+                   multiple=number*number2
+                   )
+  return(list(input,df))
+}
+
+dfAndVecReturn(1,3)
## [[1]]
 ## [1] 1 3
 ## 
@@ -489,48 +495,77 @@ 

findSmallestFactorial <- function(x){
-    factorialAnswer <- 0
-    count <- 0
-    while(factorialAnswer <= x){
-      count <- count+1
-      if(count == 1){
-        factorialAnswer <- 1
-      }else{
-        factorialAnswer <- factorialAnswer * count 
-      }
-    }
-    return(count)
-}
-
-findSmallestFactorial(3000)
+
findSmallestFactorial <- function(x){
+    factorialAnswer <- 0
+    count <- 0
+    while(factorialAnswer <= x){
+      count <- count+1
+      if(count == 1){
+        factorialAnswer <- 1
+      }else{
+        factorialAnswer <- factorialAnswer * count 
+      }
+    }
+    return(count)
+}
+
+findSmallestFactorial(3000)
## [1] 7
-
findSmallestFactorial(10^100)
+
findSmallestFactorial(10^100)
## [1] 70

– Add a if and else statement in your function to only calculate factorial code if argument is a numeric.

-
findSmallestFactorial <- function(x){
-  if(!is.numeric(x)){
-    message("Please provide a numeric argument!")
-  }else{
-    factorialAnswer <- 0
-    count <- 0
-    while(factorialAnswer <= x){
-      count <- count+1
-      if(count == 1){
-        factorialAnswer <- 1
-      }else{
-        factorialAnswer <- factorialAnswer * count 
-      }
-    }
-    return(count)
-  }
-}
-
-findSmallestFactorial(3000)
+
findSmallestFactorial <- function(x){
+  if(!is.numeric(x)){
+    message("Please provide a numeric argument!")
+  }else{
+    factorialAnswer <- 0
+    count <- 0
+    while(factorialAnswer <= x){
+      count <- count+1
+      if(count == 1){
+        factorialAnswer <- 1
+      }else{
+        factorialAnswer <- factorialAnswer * count 
+      }
+    }
+    return(count)
+  }
+}
+
+findSmallestFactorial(3000)
## [1] 7
-
findSmallestFactorial("Hello")
+
findSmallestFactorial("Hello")
## Please provide a numeric argument!
+

Exercise 2 - Scripts

+

Lets try to put as much together that we have learnt thus far. This +will be a multistep challenge. Break it down and use pseudocode to help. +Start by working the code interactively, then turn it into a script.

+
    +
  1. Read in the “data/GeneExpression.txt” dataset.
  2. +
  3. Use a apply to calculate the Z score for each gene (per row). The +zscore is (gene_expression - mean)/standard deviation. You should use a +function to do this calculation.
  4. +
  5. Find which gene has the highest absolute max Zscore. This is a very +rough proxy for the variability of that gene.
  6. +
  7. Print out the gene name with the highest value
  8. +
  9. Turn this into a script and run the script
  10. +
  11. Think about what modifications you would need to make in order to +accept a different data set as input.
  12. +
+
geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
+geneExpression <-as.matrix(geneExpression)
+zscores <- function(x){
+    my_mean <- mean(x)
+    my_sd <- sd(x)
+    my_z <- (x-my_mean)/my_sd
+    return(my_z)}
+
+my_zs <- apply(geneExpression,2, zscores)
+
+inds <- which(my_zs == max(my_zs), arr.ind = T)       
+rownames(geneExpression)[inds[,1]]
+
## [1] "Gene_h"
diff --git a/r_course/exercises/answers/Lists_answers.html b/r_course/exercises/answers/Lists_answers.html new file mode 100644 index 0000000..86ec741 --- /dev/null +++ b/r_course/exercises/answers/Lists_answers.html @@ -0,0 +1,583 @@ + + + + + + + + + + + + + + +Lists + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +

+  +

+

These exercises are about the Lists sections of Introduction +to R.

+

Exercise 1

+
    +
  • Create a list containing a character vector, a numeric matrix and a +data frame with 2 or more datatypes.
  • +
+
firstElement <- c("A","B","C","D","E")
+secondElement <- matrix(1:5,nrow=5,ncol=5)
+thirdElement <- data.frame(Sample=c("Sample1","Sample2","Sample3","Sample4"), Age=c(25,21,24,25),factor=c("Smoker","Smoker","NonSmoker","Smoker"))
+
+my_list <- list(firstElement, secondElement, thirdElement)
+my_list
+
## [[1]]
+## [1] "A" "B" "C" "D" "E"
+## 
+## [[2]]
+##      [,1] [,2] [,3] [,4] [,5]
+## [1,]    1    1    1    1    1
+## [2,]    2    2    2    2    2
+## [3,]    3    3    3    3    3
+## [4,]    4    4    4    4    4
+## [5,]    5    5    5    5    5
+## 
+## [[3]]
+##    Sample Age    factor
+## 1 Sample1  25    Smoker
+## 2 Sample2  21    Smoker
+## 3 Sample3  24 NonSmoker
+## 4 Sample4  25    Smoker
+
    +
  • Rename each entry: “my_vector”, “my_matrix”, “my_df”
  • +
+
names(my_list) <- c("my_vector", "my_matrix", "my_df")
+my_list
+
## $my_vector
+## [1] "A" "B" "C" "D" "E"
+## 
+## $my_matrix
+##      [,1] [,2] [,3] [,4] [,5]
+## [1,]    1    1    1    1    1
+## [2,]    2    2    2    2    2
+## [3,]    3    3    3    3    3
+## [4,]    4    4    4    4    4
+## [5,]    5    5    5    5    5
+## 
+## $my_df
+##    Sample Age    factor
+## 1 Sample1  25    Smoker
+## 2 Sample2  21    Smoker
+## 3 Sample3  24 NonSmoker
+## 4 Sample4  25    Smoker
+
    +
  • Access the second column of the dataframe. Try to access it in at +least two different ways.
  • +
+
my_list$my_df$Age
+
## [1] 25 21 24 25
+
my_list[[3]][,2]
+
## [1] 25 21 24 25
+
    +
  • Add a list into the fourth slot of our list. This list should +contain 2 numeric vectors. The resulting list should be 4 long. +Check.
  • +
+
fourthElement <- list(1:5,
+                      2:6)
+
+my_list <- c(my_list,list(fourthElement ))
+length(my_list)
+
## [1] 4
+
    +
  • Access the second vector within the sublist in the 4th +position.
  • +
+
my_list[[4]][[2]]
+
## [1] 2 3 4 5 6
+
    +
  • Create a list with three numeric vectors: c(5,2,9), c(13,12,6), +c(1,3,4)
  • +
+
my_list <- list(First=c(5,2,9),Second=c(13,12,6),Third=c(1,3,4))
+
    +
  • Flatten the list and turn it into a matrix.
  • +
+
flat_list <- unlist(my_list)
+listAsMat <- matrix(flat_list,
+                    nrow=length(my_list),
+                    ncol=3,
+                    byrow=T,
+                    dimnames=list(names(my_list)))
+listAsMat
+
##        [,1] [,2] [,3]
+## First     5    2    9
+## Second   13   12    6
+## Third     1    3    4
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/r_course/exercises/answers/Vectors_answers.html b/r_course/exercises/answers/Vectors_answers.html new file mode 100644 index 0000000..8fdec69 --- /dev/null +++ b/r_course/exercises/answers/Vectors_answers.html @@ -0,0 +1,687 @@ + + + + + + + + + + + + + + +Vectors + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +

+  +

+

These exercises are about the vector sections of Introduction +to R.

+

Exercise 1

+
    +
  • Create a vector containing the values 1,2,3,4,5
  • +
+
c(1,2,3,4,5)
+
## [1] 1 2 3 4 5
+
    +
  • Create a vector containing the values 1 to 100. Save it as the +variable x.
  • +
+
x <- 1:100
+x
+
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
+##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
+##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
+##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
+##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
+##  [91]  91  92  93  94  95  96  97  98  99 100
+
    +
  • Multiply x by 2
  • +
+
x*2
+
##   [1]   2   4   6   8  10  12  14  16  18  20  22  24  26  28  30  32  34  36
+##  [19]  38  40  42  44  46  48  50  52  54  56  58  60  62  64  66  68  70  72
+##  [37]  74  76  78  80  82  84  86  88  90  92  94  96  98 100 102 104 106 108
+##  [55] 110 112 114 116 118 120 122 124 126 128 130 132 134 136 138 140 142 144
+##  [73] 146 148 150 152 154 156 158 160 162 164 166 168 170 172 174 176 178 180
+##  [91] 182 184 186 188 190 192 194 196 198 200
+
    +
  • Create a vector containing the values 0,5,10,15,20
  • +
+
x <- seq(0,20,5)
+x
+
## [1]  0  5 10 15 20
+
    +
  • Check the help page for the rep function
  • +
+
?rep
+
## Help on topic 'rep' was found in the following packages:
+## 
+##   Package               Library
+##   base                  /usr/local/lib/R/library
+##   S4Vectors             /usr/local/lib/R/host-site-library
+## 
+## 
+## Using the first match ...
+
    +
  • Create a vector containing the values 1,1,2,2,3,3 using rep
  • +
+
x <- c(rep(1,2),rep(2,2),rep(3,2))
+x
+
## [1] 1 1 2 2 3 3
+
    +
  • Create a vector containing the values 1,1,5,7,9,10. Overwrite the +variable x with this.
  • +
+
x <- c(rep(1,2),seq(5,9,2),10)
+x
+
## [1]  1  1  5  7  9 10
+

Exercise 2

+
    +
  • Create a vector containing the values 1 to 10.
  • +
+
x <- 1:10
+x
+
##  [1]  1  2  3  4  5  6  7  8  9 10
+
    +
  • Create a new vector with all but the first and last value.
  • +
+
y <- x[-c(1,length(x))]
+y
+
## [1] 2 3 4 5 6 7 8 9
+
    +
  • Create a new vector with all but the second and fifth value.
  • +
+
y <- x[-c(2,5)]
+y
+
## [1]  1  3  4  6  7  8  9 10
+
    +
  • Create a new vector of square root of the sixth and seventh +position.
  • +
+
y <- sqrt(x[6:7])
+y
+
## [1] 2.449490 2.645751
+
    +
  • Create a new vector of alternating positions in the vector using +another vector.
  • +
+
y <- x[seq(1,10,2)]
+y
+
## [1] 1 3 5 7 9
+

Exercise 3

+
    +
  • Check the help page for the paste() function.
  • +
+
?paste
+
## Help on topic 'paste' was found in the following packages:
+## 
+##   Package               Library
+##   base                  /usr/local/lib/R/library
+##   BiocGenerics          /usr/local/lib/R/host-site-library
+## 
+## 
+## Using the first match ...
+
    +
  • Combine the two characters: “A” and “B”
  • +
+
paste("A","B")
+
## [1] "A B"
+
    +
  • Change the separator to an underscore
  • +
+
paste("A","B", sep="_")
+
## [1] "A_B"
+

Exercise 4

+
    +
  • Create a vector with these gene names: “PKM”, “ADPRH”, “TDG”, +“ATP4A”, “SLC6A4”, “CAPN3”, “TDG”, “ATP1A2”,“PKM”
  • +
  • Subset to just the unique genes
  • +
+
my_genes <- c("PKM", "ADPRH", "TDG", "ATP4A", "SLC6A4", "CAPN3", "TDG", "ATP1A2","PKM")
+my_genes <- unique(my_genes)
+my_genes 
+
## [1] "PKM"    "ADPRH"  "TDG"    "ATP4A"  "SLC6A4" "CAPN3"  "ATP1A2"
+
    +
  • Create a second vector with these gene names: “SLC6A4”, “CAPN3”, +“TDG”, “ATP1A2”, “IMPA1”, “PDXK”.
  • +
  • Check which genes from vector 1 are present in vector 2.
  • +
  • Subset vector 1 depending on whether the gene is present in vector +2.
  • +
+
my_genes_of_interest <- c("SLC6A4", "CAPN3", "TDG", "ATP1A2", "IMPA1", "PDXK")
+idx <- my_genes %in% my_genes_of_interest
+idx
+
## [1] FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE
+
my_genes[idx]
+
## [1] "TDG"    "SLC6A4" "CAPN3"  "ATP1A2"
+
my_genes_of_interest <- c("SLC6A4", "CAPN3", "TDG", "ATP1A2", "IMPA1", "PDXK")
+idx <- my_genes %in% my_genes_of_interest
+idx
+
## [1] FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE
+
my_genes[idx]
+
## [1] "TDG"    "SLC6A4" "CAPN3"  "ATP1A2"
+
    +
  • Create a vector with these gene names: “SMC1”, “SMC3”, “SCC1”, +“SCC3”, “RAD21”, “NIPBL”, “SMC2”, “SMC4”,“CAPH”,“CAPD3”
  • +
  • Subset to just the genes containing SMC
  • +
+
my_genes <- c("SMC1", "SMC3", "SCC1", "SCC3", "RAD21", "NIPBL", "SMC2", "SMC4","CAPH","CAPD3")
+my_genes[grepl("SMC",my_genes)]
+
## [1] "SMC1" "SMC3" "SMC2" "SMC4"
+

Exercise 5

+
    +
  • Create a vector of the gene names Gene_1, Gene_2, Gene_3 Gene_4
  • +
  • Create a vector of the expression values 1000, 3000, 10000, +12000
  • +
  • Create a vector of the gene lengths 100, 3000, 200, 1000
  • +
+
geneNames <- c("Gene_1", "Gene_2", "Gene_3","Gene_4")
+expression <- c(1000, 3000, 10000, 12000)
+geneLengths <- c(100, 3000, 200, 1000)
+names(expression) <- geneNames
+names(geneLengths) <- geneNames
+expression
+
## Gene_1 Gene_2 Gene_3 Gene_4 
+##   1000   3000  10000  12000
+
geneLengths
+
## Gene_1 Gene_2 Gene_3 Gene_4 
+##    100   3000    200   1000
+
    +
  • Find the longest gene.
  • +
+
names(geneLengths[geneLengths == max(geneLengths)])
+
## [1] "Gene_2"
+
names(geneLengths[which.max(geneLengths)])
+
## [1] "Gene_2"
+
    +
  • Identify genes which have a length greater than 100 and expression +greater than 10000
  • +
+
geneNames[geneLengths > 100 & expression > 10000]
+
## [1] "Gene_4"
+

Bonus Questions

+
    +
  • Calculate the expression over the gene length for all genes (Length +normalised expression).
  • +
+
lne <- expression/geneLengths
+lne
+
## Gene_1 Gene_2 Gene_3 Gene_4 
+##     10      1     50     12
+
    +
  • Identify genes with a length normalised expression greater than the +average
  • +
+
geneNames[lne > mean(lne)]
+
## [1] "Gene_3"
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/r_course/exercises/answers/conditionsAndLoops_answers.html b/r_course/exercises/answers/conditionsAndLoops_answers.html index 2b37ffd..cc7526c 100644 --- a/r_course/exercises/answers/conditionsAndLoops_answers.html +++ b/r_course/exercises/answers/conditionsAndLoops_answers.html @@ -439,65 +439,166 @@

Introduction to R.

-

– Calculate the factorial (factorial of 3 = 3 * 2 * 1) of 10 using a -loop.

-
for(x in 1:10){
-  if(x == 1){
-    factorialAnswer <- 1
-  }else{
-    factorialAnswer <- factorialAnswer * x 
-  }
-}
-factorialAnswer
-
## [1] 3628800
-

– Adjusting your answer from before, what is the first number that -has a factorial greater than 1000.

-
factorialAnswer <- 0
-count <- 0
-
-while(factorialAnswer <= 1000){
-  count <- count+1
-  if(count == 1){
-    factorialAnswer <- 1
-  }else{
-    factorialAnswer <- factorialAnswer * count 
-  }
-}
-count
-
## [1] 7
+

Exercise 1 - If Else

+
    +
  • Build an if statement that test if x is a negative number. Write a +print statement that will confirm if x is less than 0. Test it with +several values of x.
  • +
+
x<- 42
+
+if (x<0){
+  print("It's a negative number!")
+}
+
+x <- -42
+
+if (x<0){
+  print("It's a negative number!")
+}
+
## [1] "It's a negative number!"
+
    +
  • Modify the previous if statement to include an else. Ensure a print +statement is returned to say if the number is not negative.
  • +
+
x <- 0
+
+if (x<0){
+  print("It's a negative number!")
+}else{
+  print("It's not a negative number!")
+}
+
## [1] "It's not a negative number!"
+
x <- -1
+
+if (x<0){
+  print("It's a negative number!")
+}else{
+  print("It's not a negative number!")
+}
+
## [1] "It's a negative number!"
+
    +
  • Finally add a else if statement. We want a response to confirm if x +is negative, positive or if it is zero.
  • +
+
x <- -1
+
+if (x<0){
+  print("It's a negative number!")
+}else if (x==0){
+  print("It's zero")
+}else{
+  print("It's a positive number!")
+}
+
## [1] "It's a negative number!"
+
x <- 0
+
+if (x<0){
+  print("It's a negative number!")
+}else if (x==0){
+  print("It's zero")
+}else{
+  print("It's a positive number!")
+}
+
## [1] "It's zero"
+
x <- 1
+
+if (x<0){
+  print("It's a negative number!")
+}else if (x==0){
+  print("It's zero")
+}else{
+  print("It's a positive number!")
+}
+
## [1] "It's a positive number!"
+
    +
  • Build an if/else statement that test if a variable is odd/even. +Include x in the printed output.
  • +
+

Hint: The modulus operator may be useful here i.e. x%%2 returns +the remainder after the value of x is divided by 2.

+
x <- 1
+
+if (x%%2==0){
+  paste(x,"is even")
+}else{
+  paste(x,"is odd")
+}
+
## [1] "1 is odd"
+
x <- 2
+
+if (x%%2==0){
+  paste(x,"is even")
+}else{
+  paste(x,"is odd")
+}
+
## [1] "2 is even"

– Using an ifelse() expression, create a factor from a vector of 1 to 40 where all numbers less than 10 are “small”,10 to 30 are “mid”,31 to 40 are “big”

-
condExercise <- 1:40
-condExercise
+
condExercise <- 1:40
+condExercise
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 ## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
-
vectorResult <- ifelse(condExercise<10,"small",ifelse(condExercise < 31,"mid","big"))
-temp <- factor(vectorResult,levels=c("small","mid","big"),order=T)
-temp
+
vectorResult <- ifelse(condExercise<10,"small",ifelse(condExercise < 31,"mid","big"))
+temp <- factor(vectorResult,levels=c("small","mid","big"),order=T)
+temp
##  [1] small small small small small small small small small mid   mid   mid  
 ## [13] mid   mid   mid   mid   mid   mid   mid   mid   mid   mid   mid   mid  
 ## [25] mid   mid   mid   mid   mid   mid   big   big   big   big   big   big  
 ## [37] big   big   big   big  
 ## Levels: small < mid < big
-

– Read in all files from expression directory with .txt extension and -create a table of gene expression results.

-
filesToRead <- dir("ExpressionResults/",pattern = "*\\.txt",full.names=T)
-fileRead <- vector("list",length=length(filesToRead))
-for(i in 1:length(filesToRead)){
-  fileRead[[i]] <- read.delim(filesToRead[i],header=F,sep="\t")
-  colnames(fileRead[[i]]) <- c("GeneNames",basename(filesToRead[i]))
-}
-mergedTable <- NULL
-for(i in fileRead){
-  if(is.null(mergedTable)){
-    mergedTable <- i
-  }else{
-    mergedTable <- merge(mergedTable,i,by=1,all=T)
-  }
-  
-  print(nrow(mergedTable))
-}
+

– Calculate the factorial (factorial of 3 = 3 * 2 * 1) of 10 using a +loop.

+
for(x in 1:10){
+  if(x == 1){
+    factorialAnswer <- 1
+  }else{
+    factorialAnswer <- factorialAnswer * x 
+  }
+}
+factorialAnswer
+
## [1] 3628800
+

– Adjusting your answer from before, what is the first number that +has a factorial greater than 1000.

+
factorialAnswer <- 0
+count <- 0
+
+while(factorialAnswer <= 1000){
+  count <- count+1
+  if(count == 1){
+    factorialAnswer <- 1
+  }else{
+    factorialAnswer <- factorialAnswer * count 
+  }
+}
+count
+
## [1] 7
+
    +
  • Set your working directory to be in the downloaded course material. +Specifically the r_course subdirectory. [The exact path will +depend on where you have save your download]. Read in all files from +expression directory (“ExpressionResults/”) with .txt extension and +create a table of gene expression results.
  • +
+
setwd("~/Downloads/Intro_To_R_1Day-master/r_course")
+
filesToRead <- dir("ExpressionResults/",pattern = "*\\.txt",full.names=T)
+fileRead <- vector("list",length=length(filesToRead))
+
+for(i in 1:length(filesToRead)){
+  fileRead[[i]] <- read.delim(filesToRead[i],header=F,sep="\t")
+  colnames(fileRead[[i]]) <- c("GeneNames",basename(filesToRead[i]))
+}
+mergedTable <- NULL
+for(i in fileRead){
+  if(is.null(mergedTable)){
+    mergedTable <- i
+  }else{
+    mergedTable <- merge(mergedTable,i,by=1,all=T)
+  }
+  
+  print(nrow(mergedTable))
+}
## [1] 5001
 ## [1] 5001
 ## [1] 5001
@@ -509,7 +610,7 @@ 

mergedTable[1:3,] 
+
mergedTable[1:3,] 
##   GeneNames Annotation.txt          NA ExpressionResults_Sample1.txt
 ## 1  GeneName        Ensembl     Pathway                            NA
 ## 2    Gene_1       Ens_1001 DNA_Binding                      3.448466
@@ -536,9 +637,9 @@ 

Annotation <- read.table("ExpressionResults/Annotation.txt",sep="\t",h=T)
-annotatedExpression <- merge(Annotation,mergedTable,by=1,all.x=F,all.y=T)
-annotatedExpression[1:2,]
+
Annotation <- read.table("ExpressionResults/Annotation.txt",sep="\t",h=T)
+annotatedExpression <- merge(Annotation,mergedTable,by=1,all.x=F,all.y=T)
+annotatedExpression[1:2,]
##   GeneName  Ensembl     Pathway Annotation.txt          NA
 ## 1 GeneName     <NA>        <NA>        Ensembl     Pathway
 ## 2   Gene_1 Ens_1001 DNA_Binding       Ens_1001 DNA_Binding
@@ -557,10 +658,10 @@ 

summary(annotatedExpression$Pathway)
+
summary(annotatedExpression$Pathway)
##    Length     Class      Mode 
 ##      5001 character character
-
summary(Annotation$Pathway)
+
summary(Annotation$Pathway)
##    Length     Class      Mode 
 ##      5000 character character
diff --git a/r_course/exercises/answers/factorsAndDataframes_answers.html b/r_course/exercises/answers/factorsAndDataframes_answers.html index fd2f156..3e8db17 100644 --- a/r_course/exercises/answers/factorsAndDataframes_answers.html +++ b/r_course/exercises/answers/factorsAndDataframes_answers.html @@ -437,33 +437,80 @@

Introduction +

These exercises are about the factors and data frames sections of Introduction to R.

-

These exercises cover the factors and data frames sections of Introduction to R.

Exercise 1 - Factors

    +
  • Create a nominal factor called CellType containing: +“DC1”,“DC1”,“DC1”,“NK”,“NK”,“Mono”,“Mono”,“DC2”,“NK”
  • +
+
CellType <- factor(c("DC1","DC1","DC1","NK","NK","Mono","Mono","DC2","NK"))
+CellType
+
## [1] DC1  DC1  DC1  NK   NK   Mono Mono DC2  NK  
+## Levels: DC1 DC2 Mono NK
+
    +
  • Modify the the third position of CellType to “Neu”, by modifying the +levels of the factor.
  • +
+
levels(CellType) <- c(levels(CellType),"Neu")
+CellType[3] <- "Neu"
+CellType
+
## [1] DC1  DC1  Neu  NK   NK   Mono Mono DC2  NK  
+## Levels: DC1 DC2 Mono NK Neu
+
    +
  • Create CellType2 with the same entries, but directly specify the +levels to include: “DC1”, “DC2”, “Mono”, “NK”, “Neu”, “Bcell”, +“Tcell”.
  • +
+
CellType2 <- factor(c("DC1","DC1","DC1","NK","NK","Mono","Mono","DC2","NK"), levels = c("DC1", "DC2", "Mono", "NK", "Neu", "Bcell", "Tcell"))
+CellType2
+
## [1] DC1  DC1  DC1  NK   NK   Mono Mono DC2  NK  
+## Levels: DC1 DC2 Mono NK Neu Bcell Tcell
+
    +
  • Use combine to increase the length of CellType2 to include: +“Neu”,“Neu”,“Bcell”,“DC1”
  • +
+
CellType2 <- c(CellType2, factor(c("Neu","Neu","Bcell","DC1"), levels = c("DC1", "DC2", "Mono", "NK", "Neu", "Bcell", "Tcell")))
+CellType2
+
##  [1] DC1   DC1   DC1   NK    NK    Mono  Mono  DC2   NK    Neu   Neu   Bcell
+## [13] DC1  
+## Levels: DC1 DC2 Mono NK Neu Bcell Tcell
+
    +
  • Summarize the number of entries for each cell type.
  • +
+
summary(CellType2)
+
##   DC1   DC2  Mono    NK   Neu Bcell Tcell 
+##     4     1     2     3     2     1     0
+
    +
  • Reorder the summary to alphabetical order
  • +
+
levels(CellType2) <- c("Bcell","DC1", "DC2", "Mono", "Neu","NK","Tcell")
+summary(CellType2)
+
## Bcell   DC1   DC2  Mono   Neu    NK Tcell 
+##     4     1     2     3     2     1     0
+
  • Create a ordinal factor named “Height” containing – high, low, mid, low, mid, low, mid, high, mid, high.
-
ordinalFactor <- factor(c("high", "low", "mid", "low", "mid", "low", "mid", "high", "mid", "high"),ordered=T,levels=c("low", "mid", "high"))
-ordinalFactor
+
Height <- factor(c("high", "low", "mid", "low", "mid", "low", "mid", "high", "mid", "high"),ordered=T,levels=c("low", "mid", "high"))
+Height
##  [1] high low  mid  low  mid  low  mid  high mid  high
 ## Levels: low < mid < high
  • Using a logical index, create new factor of only those from “Height”” greater than low.
-
filteredFactor <- ordinalFactor[ordinalFactor > "low"]
-filteredFactor
+
filteredHeight <- Height[Height > "low"]
+filteredHeight
## [1] high mid  mid  mid  high mid  high
 ## Levels: low < mid < high
  • Replace the last index in “Height” with veryHigh and create new factor with those greater than mid.
-
newFactor <- factor(ordinalFactor,ordered=T,levels=c("low", "mid", "high","veryHigh"))
-newFactor[length(newFactor)] <- "veryHigh"
-newFactor[newFactor > "mid"]
+
newFactor <- factor(Height,ordered=T,levels=c("low", "mid", "high","veryHigh"))
+newFactor[length(newFactor)] <- "veryHigh"
+newFactor[newFactor > "mid"]
## [1] high     high     veryHigh
 ## Levels: low < mid < high < veryHigh

Exercise 2 - Data frames

@@ -474,69 +521,129 @@

Annotation <- data.frame(geneNames=c("Gene_1", "Gene_2", "Gene_3","Gene_4","Gene_5"), ensembl=c("Ens001", "Ens003", "Ens006", "Ens007", "Ens010"),pathway=c("Glycolysis", "TGFb", "Glycolysis", "TGFb", "Glycolysis"),geneLengths=c(100, 3000, 200, 1000,1200))
+
Annotation <- data.frame(geneNames=c("Gene_1", "Gene_2", "Gene_3","Gene_4","Gene_5"), ensembl=c("Ens001", "Ens003", "Ens006", "Ens007", "Ens010"),pathway=c("Glycolysis", "TGFb", "Glycolysis", "TGFb", "Glycolysis"),geneLengths=c(100, 3000, 200, 1000,1200))
+Annotation
+
##   geneNames ensembl    pathway geneLengths
+## 1    Gene_1  Ens001 Glycolysis         100
+## 2    Gene_2  Ens003       TGFb        3000
+## 3    Gene_3  Ens006 Glycolysis         200
+## 4    Gene_4  Ens007       TGFb        1000
+## 5    Gene_5  Ens010 Glycolysis        1200
+
    +
  • Filter Annotation to geneLengths that are greater than 500 and less +than 2000. Use the dollar sign to extract column information.
  • +
+
Annotation[Annotation$geneLengths>500 & Annotation$geneLengths<2000,]
+
##   geneNames ensembl    pathway geneLengths
+## 4    Gene_4  Ens007       TGFb        1000
+## 5    Gene_5  Ens010 Glycolysis        1200
+
    +
  • Check the data types of each column. Update the pathway column to be +a factor.
  • +
+
class(Annotation[,1])
+
## [1] "character"
+
class(Annotation[,2])
+
## [1] "character"
+
class(Annotation[,3])
+
## [1] "character"
+
class(Annotation[,4])
+
## [1] "numeric"
+
Annotation[,3] <- factor(Annotation[,3])
+Annotation
+
##   geneNames ensembl    pathway geneLengths
+## 1    Gene_1  Ens001 Glycolysis         100
+## 2    Gene_2  Ens003       TGFb        3000
+## 3    Gene_3  Ens006 Glycolysis         200
+## 4    Gene_4  Ens007       TGFb        1000
+## 5    Gene_5  Ens010 Glycolysis        1200
+
class(Annotation[,3])
+
## [1] "factor"
  • Create data frame called Sample1 with ensembl gene names (“Ens001”, “Ens003”, “Ens006”, “Ens010”) and expression (1000, 3000, 10000,5000)
-
Sample1 <- data.frame(ensembl=c("Ens001", "Ens003", "Ens006","Ens010"),expression=c(1000, 3000, 10000,5000))
+
Sample1 <- data.frame(ensembl=c("Ens001", "Ens003", "Ens006","Ens010"),expression=c(1000, 3000, 10000,5000))
+Sample1
+
##   ensembl expression
+## 1  Ens001       1000
+## 2  Ens003       3000
+## 3  Ens006      10000
+## 4  Ens010       5000
  • Create data frame called Sample2 with ensembl gene names (“Ens001”, “Ens003”, “Ens006”, “Ens007”,“Ens010”) and expression (1500, 1500, 17000,500,10000)
-
Sample2 <- data.frame(ensembl=c("Ens001", "Ens003", "Ens006","Ens007","Ens010"),expression=c(1500, 1500, 17000,500,10000))
+
Sample2 <- data.frame(ensembl=c("Ens001", "Ens003", "Ens006","Ens007","Ens010"),expression=c(1500, 1500, 17000,500,10000))
+Sample2
+
##   ensembl expression
+## 1  Ens001       1500
+## 2  Ens003       1500
+## 3  Ens006      17000
+## 4  Ens007        500
+## 5  Ens010      10000
  • Create a data frame containing only those gene names common to all data frames with all information from Annotation and the expression from Sample 1 and Sample 2.
-
AnnoSample1 <- merge(Annotation,Sample1,by.x=2,by.y=1,all=F) 
-AnnoSample1And2 <- merge(AnnoSample1,Sample2,by=1,all=F) 
-AnnoSample1And2
+
AnnoSample1 <- merge(Annotation,Sample1,by.x=2,by.y=1,all=F) 
+AnnoSample1And2 <- merge(AnnoSample1,Sample2,by=1,all=F) 
+AnnoSample1And2
##   ensembl geneNames    pathway geneLengths expression.x expression.y
 ## 1  Ens001    Gene_1 Glycolysis         100         1000         1500
 ## 2  Ens003    Gene_2       TGFb        3000         3000         1500
 ## 3  Ens006    Gene_3 Glycolysis         200        10000        17000
 ## 4  Ens010    Gene_5 Glycolysis        1200         5000        10000
    -
  • Add an extra two columns containing the length normalised -expressions for Sample 1 and Sample 2
  • +
  • Order our new dataframe by geneLengths - biggest to smallest.
-
AnnoSample1And2$Sample1_lne <- AnnoSample1And2$expression.x/AnnoSample1And2$geneLengths
-AnnoSample1And2$Sample2_lne <- AnnoSample1And2$expression.y/AnnoSample1And2$geneLengths
-AnnoSample1And2
+
AnnoSample1And2 <- AnnoSample1And2[order(AnnoSample1And2$geneLengths, decreasing = T),]
+AnnoSample1And2
##   ensembl geneNames    pathway geneLengths expression.x expression.y
-## 1  Ens001    Gene_1 Glycolysis         100         1000         1500
 ## 2  Ens003    Gene_2       TGFb        3000         3000         1500
+## 4  Ens010    Gene_5 Glycolysis        1200         5000        10000
 ## 3  Ens006    Gene_3 Glycolysis         200        10000        17000
+## 1  Ens001    Gene_1 Glycolysis         100         1000         1500
+
    +
  • Add an extra two columns containing the length normalized +expressions for Sample 1 and Sample 2
  • +
+
AnnoSample1And2$Sample1_lne <- AnnoSample1And2$expression.x/AnnoSample1And2$geneLengths
+AnnoSample1And2$Sample2_lne <- AnnoSample1And2$expression.y/AnnoSample1And2$geneLengths
+AnnoSample1And2
+
##   ensembl geneNames    pathway geneLengths expression.x expression.y
+## 2  Ens003    Gene_2       TGFb        3000         3000         1500
 ## 4  Ens010    Gene_5 Glycolysis        1200         5000        10000
+## 3  Ens006    Gene_3 Glycolysis         200        10000        17000
+## 1  Ens001    Gene_1 Glycolysis         100         1000         1500
 ##   Sample1_lne Sample2_lne
-## 1   10.000000   15.000000
 ## 2    1.000000    0.500000
+## 4    4.166667    8.333333
 ## 3   50.000000   85.000000
-## 4    4.166667    8.333333
+## 1 10.000000 15.000000

    -
  • Identify the mean length normalised expression across Sample1 and +
  • Identify the mean length normalized expression across Sample1 and Sample2 for Ens006 genes
-
rownames(AnnoSample1And2) <- AnnoSample1And2$ensembl
-mean(c(AnnoSample1And2["Ens006","Sample1_lne"],AnnoSample1And2["Ens006","Sample2_lne"]))
+
rownames(AnnoSample1And2) <- AnnoSample1And2$ensembl
+mean(c(AnnoSample1And2["Ens006","Sample1_lne"],AnnoSample1And2["Ens006","Sample2_lne"]))
## [1] 67.5
    -
  • For all genes, identify the log2 fold change in length normalised +
  • For all genes, identify the log2 fold change in length normalized expression from Sample 1 to Sample 2.
-
log2FoldChange <- log2(AnnoSample1And2$Sample2_lne) - log2(AnnoSample1And2$Sample1_lne)
-names(log2FoldChange) <- AnnoSample1And2$geneNames
-log2FoldChange
-
##     Gene_1     Gene_2     Gene_3     Gene_5 
-##  0.5849625 -1.0000000  0.7655347  1.0000000
+
log2FoldChange <- log2(AnnoSample1And2$Sample2_lne) - log2(AnnoSample1And2$Sample1_lne)
+names(log2FoldChange) <- AnnoSample1And2$geneNames
+log2FoldChange
+
##     Gene_2     Gene_5     Gene_3     Gene_1 
+## -1.0000000  1.0000000  0.7655347  0.5849625
  • Identify the total length of genes in Glycolysis pathway.
-
sum(AnnoSample1And2[AnnoSample1And2$pathway == "Glycolysis","geneLengths"])
+
sum(AnnoSample1And2[AnnoSample1And2$pathway == "Glycolysis","geneLengths"])
## [1] 1500
diff --git a/r_course/exercises/exercises/DataInputOutput_exercise.html b/r_course/exercises/exercises/DataInputOutput_exercise.html index 728060b..5b0b1f5 100644 --- a/r_course/exercises/exercises/DataInputOutput_exercise.html +++ b/r_course/exercises/exercises/DataInputOutput_exercise.html @@ -226,6 +226,106 @@ + + + @@ -337,22 +437,68 @@

Introduction +

These exercises are about reading and writing data sections of Introduction to R.

Exercise 1

Have a look at all files in notepad or excel before attempting to read. All files can be found in the “data” directory.

    -
  • Read in the tab delimited file “GeneExpression.txt”.

  • -
  • Find the mean expression of all samples.

  • +
  • Check your current working directory. Set your working directory to +be in the downloaded course material. Specifically the r_course +subdirectory. [The exact path will depend on where you have save your +download]
  • +
+
getwd()
+
[1] "/Users/mattpaul"
+
setwd("~/Downloads/Intro_To_R_1Day-master/r_course")
+
    +
  • Read in the tab delimited file “GeneExpression.txt”. Check the data +type.
-
##  Sample_1.hi  Sample_2.hi  Sample_3.hi Sample_4.low Sample_5.low Sample_1.low 
-##     7.514996     6.774108     6.508127     6.262253     6.177761     6.017462
-
##  Sample_1.hi  Sample_2.hi  Sample_3.hi Sample_4.low Sample_5.low Sample_1.low 
-##     7.514996     6.774108     6.508127     6.262253     6.177761     6.017462
+
## [1] "data.frame"       "list"             "oldClass"         "vector"          
+## [5] "list_OR_List"     "vector_OR_factor" "vector_OR_Vector"
    -
  • Read in the tab delimited file “GeneExpressionWithMethods.txt”. This -file contains information on analysis steps used to produce file.
  • +
  • Coerce the data frame to a matrix

  • +
  • Find the mean expression of all genes.

  • +
+
##    Gene_a    Gene_b    Gene_c    Gene_d    Gene_e    Gene_f    Gene_g    Gene_h 
+##  4.660569  4.379796  4.259824  5.849420  5.850658  6.732781 10.405203 10.201357
+
    +
  • Coerce the matrix back to a data frame. Add an additional column +with extra gene info +“Kidney”,“Adrenal”,“Liver”,“Adrenal”,“Kidney”,“Liver”,“Liver”,“Kidney”. +Also add the mean expression as a column.
  • +
+
geneExpression <- as.data.frame(geneExpression)
+geneExpression$tissue_localisation <- factor(c("Kidney","Adrenal","Liver","Adrenal","Kidney","Liver","Liver","Kidney"))
+geneExpression$mean_expr <- sampleMeans
+geneExpression
+
##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
+## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
+## Gene_b    6.444368    5.896076    2.592581     5.089549     3.624812
+## Gene_c    3.083392    3.414723    3.706069     4.535536     5.104273
+## Gene_d    4.726498    3.023746    3.033173     8.017895     8.098800
+## Gene_e    9.909185    9.174323    9.957153     2.053501     3.276533
+## Gene_f   10.680459    9.951243    8.985412     3.360963     3.566663
+## Gene_g   10.516534   10.176163    9.778173    11.781520     9.005437
+## Gene_h    9.017020    9.342291    9.895636    12.046704    11.003240
+##        Sample_1.low tissue_localisation mean_expr
+## Gene_a    5.9350948              Kidney  4.660569
+## Gene_b    2.6313925             Adrenal  4.379796
+## Gene_c    5.7149521               Liver  4.259824
+## Gene_d    8.1964109             Adrenal  5.849420
+## Gene_e    0.7332521              Kidney  5.850658
+## Gene_f    3.8519471               Liver  6.732781
+## Gene_g   11.1733928               Liver 10.405203
+## Gene_h    9.9032500              Kidney 10.201357
+
    +
  • Read in tab delimited file “GeneExpression.txt”. Order genes by +decreasing expression in mean expression. Filter out genes with +expression below 5. Write out a new comma separated file with column +titles.

  • +
  • Read in the tab delimited file “GeneExpressionWithMethods.txt”. +This file contains information on analysis steps used to produce +file.

##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
 ## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
@@ -394,11 +540,6 @@ 

Introduction to R.

+

Exercise 1 - Functions

– Create a function which takes one number and returns the square of that number

## [1] 9

– Create a function which takes two numbers and returns the mean

## [1] 2
    -
  • Create a function which takes two vectors and returns the mean
  • +
  • Create a function which takes two vectors and returns the mean. +Include a message statement that gives the total length of both +vectors.
-
## [1] 2
+
## The total length of my vectors is 8
+
## [1] 4.25
@@ -481,7 +470,7 @@

Course Overview


Course Integrity

-

This course is compiled automatically on 2024-12-10 +

This course is compiled automatically on 2024-12-11

The course is tested and available on MacOS, Windows and diff --git a/r_course/presentations/singlepage/introToR_Session1.html b/r_course/presentations/singlepage/introToR_Session1.html index ba083d8..8c6933c 100644 --- a/r_course/presentations/singlepage/introToR_Session1.html +++ b/r_course/presentations/singlepage/introToR_Session1.html @@ -2084,11 +2084,11 @@

Combining logical vectors

Time for an exercise!

-

Exercise on vectors can be found here

+

Exercise on vectors can be found here

-
-

Answers to exercise.

-

Answers can be found here here

+
+

Answers to exercise

+

Answers can be found here here

@@ -2395,11 +2395,11 @@

Data types

Time for an exercise!

-

Exercise on matrices can be found here

+

Exercise on matrices can be found here

-
-

Answers to exercise.

-

Answers can be found here here

+
+

Answers to exercise

+

Answers can be found here here

@@ -2453,8 +2453,7 @@

Display order of levels

Nominal factors

In some cases there is no natural order to the categories such that -one category is greater than the other (nominal data). In this case we -can see that R is gender neutral.

+one category is greater than the other (nominal data).

factorExample <- factor(vectorExample, levels=c("male","female"))
 factorExample[1] < factorExample[2]
## Warning in Ops.factor(factorExample[1], factorExample[2]): '<' not meaningful
@@ -2798,11 +2797,11 @@ 

Merging data frames

Time for an exercise!

-

Exercise on data frames can be found here

+

Exercise on data frames can be found here

-
-

Answers to exercise.

-

Answers can be found here here

+
+

Answers to exercise

+

Answers can be found here here

@@ -2953,6 +2952,14 @@

Flattening lists to matrices

## Second 2 6 7 ## Third 1 4 7
+
+

Time for an exercise!

+

Exercise on matrices can be found here

+
+
+

Answers to exercise

+

Answers can be found here here

+

Coercing data formats

@@ -3075,7 +3082,7 @@

More complex objects

manage Dates or Times.

Time <- Sys.time()
 Time
-
## [1] "2024-12-10 21:07:46 UTC"
+
## [1] "2024-12-11 01:00:32 UTC"

More complex objects

@@ -3103,11 +3110,11 @@

More complex objects

More complex objects

We can also use the arithmetic operations with our time objects.

Time
-
## [1] "2024-12-10 21:07:46 UTC"
+
## [1] "2024-12-11 01:00:32 UTC"
Time - 120
-
## [1] "2024-12-10 21:05:46 UTC"
+
## [1] "2024-12-11 00:58:32 UTC"
TimeNow - Time
-
## Time difference of 0.1043649 secs
+
## Time difference of 0.1018391 secs

More complex objects

@@ -3118,18 +3125,18 @@

More complex objects

We can also change the timezone by specifying a tz parameter

format(Time,format="%H O'Clock %p %A on %B %dth")
-
## [1] "21 O'Clock PM Tuesday on December 10th"
+
## [1] "01 O'Clock AM Wednesday on December 11th"
format(Time,format="%H O'Clock %p %A on %B %dth",tz = "GMT")
-
## [1] "21 O'Clock PM Tuesday on December 10th"
+
## [1] "01 O'Clock AM Wednesday on December 11th"

Complex to base objects

Most of the time we can convert more complex object back to our basic object types we are more familar with.

as.character(Time)
-
## [1] "2024-12-10 21:07:46.094567"
+
## [1] "2024-12-11 01:00:32.629524"
as.numeric(TimeNow-Time)
-
## [1] 0.1043649
+
## [1] 0.1018391

Complex objects summary

@@ -3238,13 +3245,13 @@

Data from external sources

Gene_a -3.973947 +3.658047 -3.930245 +5.530241 -2.474149 +3.272248 @@ -3252,13 +3259,13 @@

Data from external sources

Gene_b -3.461118 +3.778873 -3.985219 +5.652030 -5.081024 +3.694406 @@ -3266,13 +3273,13 @@

Data from external sources

Gene_c -4.363544 +3.918055 -4.808006 +4.841402 -4.068423 +5.957884 @@ -3280,13 +3287,13 @@

Data from external sources

Gene_d -5.089677 +3.223094 -1.477786 +3.130542 -4.949975 +4.522371 @@ -3294,13 +3301,13 @@

Data from external sources

Gene_e -9.478252 +10.369538 -10.525974 +10.172782 -9.489895 +10.039658 @@ -3308,13 +3315,13 @@

Data from external sources

Gene_f -9.167348 +11.044072 -10.601165 +10.561451 -9.324316 +9.909005 @@ -3322,13 +3329,13 @@

Data from external sources

Gene_g -9.399451 +8.915426 -11.443669 +11.178636 -9.400897 +10.473599 @@ -3336,13 +3343,13 @@

Data from external sources

Gene_h -11.166681 +9.649608 -11.069730 +10.231282 -9.805737 +10.386836 @@ -3655,11 +3662,11 @@

Save and read data

remembers the objects original name i.e. Table or myList.

load("my_list.RData")
-
+

Time for an exercise!

Exercise on reading and writing data can be found here

-
+

Answers to exercise

Answers can be found here

diff --git a/r_course/presentations/singlepage/introToR_Session2.html b/r_course/presentations/singlepage/introToR_Session2.html index eb84eb9..65a321d 100644 --- a/r_course/presentations/singlepage/introToR_Session2.html +++ b/r_course/presentations/singlepage/introToR_Session2.html @@ -2151,11 +2151,11 @@

sapply() example 3

Time for an exercise!

-

Exercise on loops and conditional branching can be found here

+

Exercise on loops and conditional branching can be found here

Answers to exercise

-

Answers can be found here here

+

Answers can be found here here

@@ -2390,8 +2390,8 @@

Custom function example

A <- rnorm(20) my_zscore(my_number=A[1], my_vector=A)

-
## Mean is 0.389048273564465
-
## [1] 0.3648576
+
## Mean is 0.416867907369881
+
## [1] -0.002603881

Debugging functions

@@ -2406,10 +2406,10 @@

Debugging functions

Custom functions and apply

These custom functions can also be utilized with apply.

sapply(A, my_zscore, my_vector=A)
-
##  [1]  0.36485764  1.15853052 -0.87955629  0.30247335 -0.27396124 -0.41809173
-##  [7]  0.52441334  0.69940853  1.12897715  0.07544800 -1.69037492  1.56788608
-## [13] -1.27228116  1.51188113 -0.05544895  0.88612324 -0.78095665 -1.69200058
-## [19] -0.90606862 -0.25125886
+
##  [1] -0.002603881 -0.128616732  0.538543594  1.050796185 -0.562454904
+##  [6]  1.331683190 -0.208345806 -0.621922028 -2.431404457 -0.149177218
+## [11] -0.905864425  0.441651262 -0.378431902 -0.195380445 -1.361557140
+## [16]  1.155218949 -0.223992938  2.123924129  0.421056362  0.106878204
@@ -2515,7 +2515,7 @@

Sourcing scripts

}

source("scripts/dayOfWeek.r")
 dayOfWeek()
-
## [1] "Tue"
+
## [1] "Wed"

Rscript

diff --git a/r_course/presentations/slides/introToR_Session1.html b/r_course/presentations/slides/introToR_Session1.html index 173c79a..c10ab48 100644 --- a/r_course/presentations/slides/introToR_Session1.html +++ b/r_course/presentations/slides/introToR_Session1.html @@ -1020,14 +1020,14 @@ ## Time for an exercise! -Exercise on vectors can be found [here](../../exercises/exercises/vector_exercise.html) +Exercise on vectors can be found [here](../../exercises/exercises/Vectors_exercise.html) --- -## Answers to exercise. +## Answers to exercise -Answers can be found here [here](../../exercises/answers/vector_answers.html) +Answers can be found here [here](../../exercises/answers/Vectors_answers.html) --- @@ -1583,14 +1583,14 @@ ## Time for an exercise! -Exercise on matrices can be found [here](../../exercises/exercises/matrices_exercise.html) +Exercise on matrices can be found [here](../../exercises/exercises/Matrices_exercise.html) --- -## Answers to exercise. +## Answers to exercise -Answers can be found here [here](../../exercises/answers/matrices_answers.html) +Answers can be found here [here](../../exercises/answers/Matrices_answers.html) @@ -1690,7 +1690,6 @@ ## Nominal factors In some cases there is no natural order to the categories such that one category is greater than the other (nominal data). -In this case we can see that R is gender neutral. ``` r @@ -2264,13 +2263,13 @@ --- ## Time for an exercise! -Exercise on data frames can be found [here](../../exercises/exercises/factorsAndDataframes_exercise.html) +Exercise on data frames can be found [here](../../exercises/exercises/FactorsAndDataframes_exercise.html) --- -## Answers to exercise. +## Answers to exercise -Answers can be found here [here](../../exercises/answers/factorsAndDataframes_answers.html) +Answers can be found here [here](../../exercises/answers/FactorsAndDataframes_answers.html) --- @@ -2495,6 +2494,21 @@ ## Third 1 4 7 ``` +--- +## Time for an exercise! + + +Exercise on matrices can be found [here](../../exercises/exercises/Lists_exercise.html) + + +--- +## Answers to exercise + + +Answers can be found here [here](../../exercises/answers/Lists_answers.html) + + + --- class: inverse, center, middle @@ -2708,7 +2722,7 @@ ``` ``` -## [1] "2024-12-10 21:07:43 UTC" +## [1] "2024-12-11 01:00:30 UTC" ``` --- @@ -2763,7 +2777,7 @@ ``` ``` -## [1] "2024-12-10 21:07:43 UTC" +## [1] "2024-12-11 01:00:30 UTC" ``` ``` r @@ -2771,7 +2785,7 @@ ``` ``` -## [1] "2024-12-10 21:05:43 UTC" +## [1] "2024-12-11 00:58:30 UTC" ``` ``` r @@ -2779,7 +2793,7 @@ ``` ``` -## Time difference of 0.1014812 secs +## Time difference of 0.09647393 secs ``` --- @@ -2797,7 +2811,7 @@ ``` ``` -## [1] "21 O'Clock PM Tuesday on December 10th" +## [1] "01 O'Clock AM Wednesday on December 11th" ``` ``` r @@ -2805,7 +2819,7 @@ ``` ``` -## [1] "21 O'Clock PM Tuesday on December 10th" +## [1] "01 O'Clock AM Wednesday on December 11th" ``` --- @@ -2819,7 +2833,7 @@ ``` ``` -## [1] "2024-12-10 21:07:43.989243" +## [1] "2024-12-11 01:00:30.832828" ``` ``` r @@ -2827,7 +2841,7 @@ ``` ``` -## [1] 0.1014812 +## [1] 0.09647393 ``` --- @@ -2944,51 +2958,51 @@ <tbody> <tr> <td style="text-align:left;"> Gene_a </td> - <td style="text-align:right;"> 3.423364 </td> - <td style="text-align:right;"> 2.551130 </td> - <td style="text-align:right;"> 2.575654 </td> + <td style="text-align:right;"> 4.429622 </td> + <td style="text-align:right;"> 3.363374 </td> + <td style="text-align:right;"> 4.520210 </td> </tr> <tr> <td style="text-align:left;"> Gene_b </td> - <td style="text-align:right;"> 4.080952 </td> - <td style="text-align:right;"> 5.865549 </td> - <td style="text-align:right;"> 3.958085 </td> + <td style="text-align:right;"> 4.114483 </td> + <td style="text-align:right;"> 3.992814 </td> + <td style="text-align:right;"> 2.877940 </td> </tr> <tr> <td style="text-align:left;"> Gene_c </td> - <td style="text-align:right;"> 4.473275 </td> - <td style="text-align:right;"> 3.771709 </td> - <td style="text-align:right;"> 4.611821 </td> + <td style="text-align:right;"> 3.521606 </td> + <td style="text-align:right;"> 3.215211 </td> + <td style="text-align:right;"> 5.199799 </td> </tr> <tr> <td style="text-align:left;"> Gene_d </td> - <td style="text-align:right;"> 3.415446 </td> - <td style="text-align:right;"> 3.817052 </td> - <td style="text-align:right;"> 2.907351 </td> + <td style="text-align:right;"> 3.840954 </td> + <td style="text-align:right;"> 3.860644 </td> + <td style="text-align:right;"> 3.688784 </td> </tr> <tr> <td style="text-align:left;"> Gene_e </td> - <td style="text-align:right;"> 11.960879 </td> - <td style="text-align:right;"> 9.113075 </td> - <td style="text-align:right;"> 9.927639 </td> + <td style="text-align:right;"> 11.896069 </td> + <td style="text-align:right;"> 10.056214 </td> + <td style="text-align:right;"> 10.062543 </td> </tr> <tr> <td style="text-align:left;"> Gene_f </td> - <td style="text-align:right;"> 9.821113 </td> - <td style="text-align:right;"> 8.704334 </td> - <td style="text-align:right;"> 9.940262 </td> + <td style="text-align:right;"> 9.947901 </td> + <td style="text-align:right;"> 8.884012 </td> + <td style="text-align:right;"> 8.773162 </td> </tr> <tr> <td style="text-align:left;"> Gene_g </td> - <td style="text-align:right;"> 10.763196 </td> - <td style="text-align:right;"> 9.869513 </td> - <td style="text-align:right;"> 9.393374 </td> + <td style="text-align:right;"> 9.640603 </td> + <td style="text-align:right;"> 10.470026 </td> + <td style="text-align:right;"> 9.591002 </td> </tr> <tr> <td style="text-align:left;"> Gene_h </td> - <td style="text-align:right;"> 11.401766 </td> - <td style="text-align:right;"> 9.550342 </td> - <td style="text-align:right;"> 11.530905 </td> + <td style="text-align:right;"> 11.954618 </td> + <td style="text-align:right;"> 10.533096 </td> + <td style="text-align:right;"> 11.728931 </td> </tr> </tbody> </table> diff --git a/r_course/presentations/slides/introToR_Session2.html b/r_course/presentations/slides/introToR_Session2.html index dfdd2ed..cc513c9 100644 --- a/r_course/presentations/slides/introToR_Session2.html +++ b/r_course/presentations/slides/introToR_Session2.html @@ -1027,13 +1027,13 @@ ## Time for an exercise! -Exercise on loops and conditional branching can be found [here](../..//exercises/exercises/conditionsAndLoops_exercise.html) +Exercise on loops and conditional branching can be found [here](../..//exercises/exercises/ConditionsAndLoops_exercise.html) --- ## Answers to exercise -Answers can be found here [here](../..//exercises/answers/conditionsAndLoops_answers.html) +Answers can be found here [here](../..//exercises/answers/ConditionsAndLoops_answers.html) --- @@ -1388,11 +1388,11 @@ ``` ``` -## Mean is -0.0417158165317701 +## Mean is -0.138706455190791 ``` ``` -## [1] -0.555588 +## [1] 1.020659 ``` --- @@ -1417,10 +1417,10 @@ ``` ``` -## [1] -0.5555880 -0.1322663 0.1899138 -0.2299799 -1.6904861 0.4030488 -## [7] 0.9826960 -0.1899368 1.5052016 1.1021064 -0.8039631 -0.1358766 -## [13] -0.1283686 0.4864651 0.7693742 -2.5554399 1.2469762 -1.0641361 -## [19] 0.5323066 0.2679527 +## [1] 1.02065900 0.47143522 -0.11262739 0.35643079 -1.19629115 -0.07521402 +## [7] -0.55040037 0.13963265 1.40238593 -1.51405434 -1.32555234 -1.04840617 +## [13] 1.84708611 0.46460992 1.63750080 -0.51232894 -0.95341254 0.89065505 +## [19] -0.51716384 -0.42494437 ``` @@ -1569,7 +1569,7 @@ ``` ``` -## [1] "Tue" +## [1] "Wed" ``` ---