Functional Programming.Rmd

---
title: 'Lecture #26: Functional Programming'
author: "Nicholas J. Gotelli"
date: "April 21st, 2020"
output:
  html_document:
    highlight: tango
    theme: united
  pdf_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,
                      message=FALSE,
                      warning=FALSE)
```

### Different function types in R
```{r}
# different kinds of functions
z <- 1:10


# built-in functions ("prefix" functions)
mean(z)

# "in-fix" functions
`+`(z,100)

# user-defined functions
# --------------------------------------
# FUNCTION my_fun
# description: calculate maximum of sin of x + x
# inputs: numeric vector
# outputs: 1-element numeric vector
########################################
my_fun <- function(x=runif(5)) {

z <- max(sin(x) + x)
return(z)

} # end of my_fun
# --------------------------------------
my_fun()
my_fun(z)

# anonymous functions
# unnamed, used for simple calculations, usually with a single input, by convention called x

function(x) x + 3 # anonymous function
function(x) x + 3 (10) # try to provide input
(function(x) x + 3) (10) # use of parentheses to call

m <- matrix(1:20, nrow = 5, byrow = TRUE)
print(m)

output <- vector("list", nrow(m))
str(output)
print(output)

for (i in seq_len(nrow(m))) {
  output[[i]] <- my_fun(m[i,])
}
print(output)

# using tapply to do the same thing (t(agged)apply)
# tapply(X,INDEX,FUN...)
# X is a vector (atomic or list) to be subset
# index is a list of factors (or character strings) # with one or more groups
# FUN is a function applied to each element of the different subsetted groups
# ... additional inputs to FUN


row_out <- apply(X = m, 
                 MARGIN = 1, 
                 FUN = my_fun)
print(row_out)

apply(m, 2, my_fun)

apply(m, c(1,2), my_fun)

apply(m, 1, function(x) max(sin(x) + x))
apply(m, 2, function(x) max(sin(x) + x))

apply(m, 1, sample)

t(apply(m,1,sample))

apply(m, 1, function(x) x[sample(seq_along(x), size= sample(seq_along(x), size=1))])


df <- data.frame(x=runif(20), y=runif(20), z=runif(20))
output<- vector("list",ncol(df))
print(output)

for (i in seq_len(ncol(df))){
  output[[i]] <- sd(df[,i])/mean(df[,i])
}
print(output)

# using tapply to do the same thing (t(agged)apply)
# tapply(X,INDEX,FUN...)
# X is a vector (atomic or list) to be subset
# index is a list of factors (or character strings) # with one or more groups
# FUN is a function applied to each element of the different subsetted groups
# ... additional inputs to FUN

 summary_out <- lapply (X=df,
                        FUN = function(x) sd(x)/mean(x))
print(summary_out)

treatment <- rep(c("control", "treatment"), each = (nrow(df)/2))
print(treatment)
 
df2 <- cbind(df, treatment)
head(df2)

output2 <- vector("list",ncol(df2)) 
for (i in seq_len(ncol(df2))) {
  if(!is.numeric(df2[,i])) next
    output2[i] <- sd(df2[,i])/mean(df2[,i])
}

print(output2)

lapply(df2,function(x) if(is.numeric(x)) sd(x)/mean(x))

z <- lapply (df2, function(x) if(is.numeric(x)) sd(x)/mean(x))

z <- unlist(z)
print(z)

print(df2)
g <- unique(df2$treatment)
print(g)
out_g <- vector("list",length(g))
names(out_g) <- g
print(out_g)

for (i in seq_along(g)){
  df_sub <- df2[df2$treatment==g[i],]
  out_g[i] <- sd(df_sub$x)/mean(df_sub$x)
}

print(out_g)

z <- tapply(X=df2$x,
            INDEX=df2$treatment,
            FUN= function(x) sd(x)/mean(x))
print(z)

# --------------------------------------
# FUNCTION pop_gen
# description: generate a stochastic population track of varying length
# inputs: number of time steps
# outputs: population track
# randomly chosen integers
########################################
pop_gen <- function(z=sample.int(n=10,size=1)) {

n <- runif(z)

return(n) # note returns a numeric vector of stochastic length

} # end of pop_gen
# --------------------------------------
pop_gen()

n_reps <- 20
list_out <- vector("list",n_reps)
for(i in seq_len(n_reps)){
  list_out[[i]] <- list(pop_gen())
}
head(list_out)
list_out[[1]]

# using replicate to do the same thing

# replicate(n,expr)
# n is the number of times the operation is to be repeated
# expr is a function (base, or user-defined), or an expression (like an anonymous function, but without the function(x) header; just the bare code for execution).

z_out <- replicate(n=5,pop_gen())
print(z_out)


# use previous example of parameter sweep for 
# species area function S=cA^z
# this has parameters c, z, and A as inputs

# first, let's set up a data frame
# with all parameter combinations
a_pars <- 1:10
c_pars <- c(100,150,125)
z_pars <- c(0.10,0.16,0.26,0.30)
df <- expand.grid(a=a_pars,c=c_pars,z=z_pars)
head(df)

df_out <-cbind(df,s=NA)

for (i in seq_len(nrow(df))) {
  df_out$s[i] <- df$c[i]*(df$a[i]^df$z[i])
}
head(df_out)

# vector or variables that function needs to be applied to
df_out$s <- mapply(function(a, c, z) c*(a^z), df$a,df$c,df$z)

head(df_out)

# The best way to do it
df_out$s <- df_out$c*(df_out$a^df_out$z)
head(df_out)


# first create some short user-defined functions
my_sum <- function(a,b) a + b
my_dif <- function(a,b) a - b
my_mult <- function(a,b) a*b

# we already know that built in functions can be called directly from within a function

# build in finction in R
funct_1 <- function(a=3,b=2) sum(a,b)
funct_1()
# our function
funct_2 <- function(a=3,b=2) my_sum(a,b)
funct_2()

funct_3 <- function(a=3,b=2) my_mult(a,b)
funct_3()
# each time we want to use a different one of the "my" functions, we have to create a new function to call it. 

# now pass data AND another function into a function as parameters:

algebra <- function(x=my_sum,a=3,b=2) x(a,b)
algebra(x=my_sum)
algebra(x=my_dif)
algebra(x=my_mult)

algebra(x=sum)# 
algebra(x=mean)

# clumsy_function(fun_name="my_sum") {
#   if (fun_name =="my_sum") my_sum() else
#     }

print(algemy_mult())
print(algebra())

      
output2 <- rep(NA,ncol(df2))
for (i in seq_len(ncol(df2))) {
  if(!is.numeric(df2[,i])) next
    output2[i] <- sd(df2[,i])/mean(df2[,i])
}
print(output2)


```
### Functions that call functions

```{r}
# first create some short user-defined functions
my_sum <- function(a,b) a + b
my_dif <- function(a,b) a - b
my_mult <- function(a,b) a*b

# we already know that built in functions can be called directly from within a function

funct_1 <- function(a=3,b=2) sum(a,b)
funct_1()

funct_2 <- function(a=3,b=2) my_sum(a,b)
funct_2()

funct_3 <- function(a=3,b=2) my_mult(a,b)
funct_3()
# each time we want to use a different one of the "my" functions, we have to create a new function to call it. 

# now pass data AND another function into a function as parameters:

algebra <- function(x=my_sum,a=3,b=2) x(a,b)
output2 <- rep(NA,ncol(df2))
for (i in seq_len(ncol(df2))) {
  if(!is.numeric(df2[,i])) next
    output2[i] <- sd(df2[,i])/mean(df2[,i])
}
print(output2)
```

#### `lapply` solution
```{r}
lapply(df2,function(x) if(is.numeric(x)) sd(x)/mean(x))
# if you wanted the output as a vector, you could 
# just unlist it:
z <- lapply(df2,function(x) if(is.numeric(x)) sd(x)/mean(x))
z <- unlist(z)

print(z) # note difference in output length!
```

### Third Task: split/apply/combine for groups in a data frame

#### `for loop` solution
```{r}
# use df2 for this, and split over two groups
print(df2)
g <- unique(df2$treatment)
print(g)
out_g <- rep(NA,length(g))
names(out_g) <- g
print(out_g)
for (i in seq_along(g)){
  df_sub <- df2[df2$treatment==g[i],]
  out_g[i] <- sd(df_sub$x)/mean(df_sub$x)
}
print(out_g)
```

#### `tapply` solution
```{r}
# using tapply to do the same thing (t(agged)apply)
# tapply(X,INDEX,FUN...)
# X is a vector (atomic or list) to be subset
# index is a list of factors (or character strings) # with one or more groups
# FUN is a function applied to each element of the different subsetted groups
# ... additional inputs to FUN

z <- tapply(X=df2$x,
            INDEX=df2$treatment,
            FUN= function(x) sd(x)/mean(x))
print(z)
```

### Fourth Task: Replicate a stochastic process

```{r}
# --------------------------------------
# FUNCTION pop_gen
# description: generate a stochastic population track of varying length
# inputs: number of time steps
# outputs: population track
########################################
pop_gen <- function(z=sample.int(n=10,size=1)) {

n <- runif(z)

return(n) # note returns a numeric vector of stochastic length

} # end of pop_gen
# --------------------------------------
pop_gen()
```

#### `for loop` solution
```{r}
n_reps <- 20
list_out <- vector("list",n_reps)
for(i in seq_len(n_reps)){
  list_out[i] <- list(pop_gen())
}
head(list_out)
list_out[[1]]
```

#### `replicate` solution

```{r}
# using replicate to do the same thing

# replicate(n,expr)
# n is the number of times the operation is to be repeated
# expr is a function (base, or user-defined), or an expression (like an anonymous function, but without the function(x) header; just the bare code for execution).

z_out <- replicate(n=5,pop_gen())
print(z_out)
```

### Fifth Task: Sweep a function with all parameter combinations

```{r}
# use previous example of parameter sweep for 
# species area function S=cA^z
# this has parameters c, z, and A as inputs

# first, let's set up a data frame
# with all parameter combiinations
a_pars <- 1:10
c_pars <- c(100,150,125)
z_pars <- c(0.10,0.16,0.26,0.30)
df <- expand.grid(a=a_pars,c=c_pars,z=z_pars)
head(df)
```

#### `for loop` solution
```{r}
df_out <-cbind(df,s=NA)

for (i in seq_len(nrow(df))) {
  df_out$s[i] <- df$c[i]*(df$a[i]^df$z[i])
}
head(df_out)
```
#### `mapply` solution
```{r}
# using mapply to do the same thing (m(ultiple)apply)
# mapply(FUN,...,MoreArgs)
# FUN is the function to be used (note it is listed first!)
#...arguments to vectorize over(vectors or lists)
#MoreArgs list of additional arguments that are constant in all of the different runs
df_out$s <- mapply(function(a, c, z) c*(a^z), df$a,df$c,df$z)

head(df_out)
```

#### the correct solution
```{r}
# no need for loops or mapply for this simple 
# function. We can just vectorize it with a single line of code!

df_out$s <- df_out$c*(df_out$a^df_out$z)
head(df_out)