abm_simulator.Rmd

---
title: "Agent-based model simulator"
date: '2018-08-11'
output:
  html_document: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(warning = FALSE) #no warning messages
knitr::opts_chunk$set(message = FALSE) #no messages
knitr::opts_chunk$set(echo = TRUE) #no R code

```

## Theoretical model, version 01
In this initial model, we have 3 phases  
# Phase 01: Idea generation 
We assign a random number to each agent
```{r phase01.1}
target <- 1e9 # we set the target at 1'000'000'000
totAgents <- 1e3 # We use 1'000 agents
clusters <- 10 # We assume 10 domains for ideas

set.seed(1) #Setting a seed to allow comparable results
ideas <- runif(totAgents, min=0, max=target) # Generating random numbers in a uniform distribution
```
We cluster the results
```{r phase01.2}
ideaClusters <- 1+as.integer(ideas/(target/clusters)) #Extracting the cluster by dividing each number to the number of clusters
agentsId <- 1:totAgents # Adding a UID
agentsReward_01 <- rep(0,totAgents) # A variable associated to phase 01
agentsReward_02 <- rep(0,totAgents) # A variable associated to phase 02
agents <- data.frame(agentsId, ideas, ideaClusters,agentsReward_01,agentsReward_02) #Creating a dataframe with ID, value and cluster

# hist(agents$ideas)
# hist(agents$ideaClusters)
```
We assign prizes
```{r phase01.3}
idClusters <- seq(1:clusters)
idWinners <- rep(NA,clusters)
valueWinners <- rep(NA,clusters)
listWinners <- data.frame(idClusters,idWinners, valueWinners) #Creating a dataframe with Cluster ID, Winner ID and value

for(i in 1:totAgents){
  j <- agents[i,3] # Check the cluster of the agent
  if(is.na(listWinners[j,2])){ # If the cluster of the agent does not have a winner (it's value is NA) ...
    listWinners[j,2] <-agents[i,1] # ... use the agent's ID
    listWinners[j,3] <-agents[i,2] # ... use the agent's value 
    agents[i,4] <- 1 # rewarding the selected agent
  }
}

listWinners
```

#Phase 02: Idea pooling 
We use [lpSOlve](https://www.kdnuggets.com/2018/05/optimization-using-r.html) to define the objective function
```{r phase02.1}
library(lpSolve)
objective.in <- listWinners[,3] # Pooling the retained ideas by looking for the best combination
```

We define the constraints of the function
```{r phase02.2}
mat <- matrix(listWinners[,3], nrow=1, byrow=TRUE) # The sum of the pooled ideas ...
dir <- "<=" # ... should be below ...
rhs <- target # ... the target
```

We solve the Linear programming function and we reward the owners of the pooled ideas
```{r phase02.3}
optimum <-lp(direction="max",  objective.in, mat, dir, rhs, all.bin = TRUE) # Which is the best combination of pooled ideas?
optimum$solution # The selected ideas

for(i in 1:clusters){
  if(optimum$solution[i] >0){ # if an idea is pooled ...
    j <- listWinners[i,2] # ... select the owner of the pooled idea
    agents[j,5] <- 1 # ... and reward the agent another time
  }
}

```
# Phase 03: Analyze results

[Filter](http://r4ds.had.co.nz/transform.html#filter-rows-with-filter) the winners
```{r phase03.1}
totWinners <- filter(agents, agentsReward_01 >0) # This could reduce computational effort
```

[Gather](http://r4ds.had.co.nz/tidy-data.html#spreading-and-gathering) the two last columns of agents into one
```{r phase03.2}
library(tidyverse)
agentsRewards <- agents %>% 
  gather('agentsReward_01', 'agentsReward_02', key = "Phase", value = "Rewards")
```
[Summarize](http://r4ds.had.co.nz/transform.html#grouped-summaries-with-summarise) by agent
```{r phase03.3}
summarise(group_by(agentsRewards, agentsId), Rewards = sum(Rewards, na.rm = TRUE))
```

Check the reminder for another idea challenge
```{r phase03.4}
remainder <- target - sum(agents$ideas*agents$agentsReward_02) #Comparing the initial target with the sum of the pooled ideas
```


## Theoretical model, version 02

Setting up a function to generate ideas
```{r}
phase00 <- function(target, totAgents,totIdeas,clusters){
  
  set.seed(1) #Setting a seed to a fix number to allow comparable results
    agentsId <- as.integer(rnorm(totIdeas, totAgents/2, totAgents/10)) # Assigning an owner to each idea (this allows multiple ideas to one owner)
    
  set.seed(1) #Setting a seed again
    idea <- as.integer(runif(totIdeas, min = 0, max = target)) # Generating random numbers in a uniform distribution
  
  ideaClusters <- 1+as.integer(idea/(target/clusters)) #Extracting the cluster by dividing each number to the number of clusters
  agentsReward_01 <- rep(0,totIdeas) # A variable associated to phase 01
  agentsReward_02 <- rep(0,totIdeas) # A variable associated to phase 02
  ideas <- data.frame(agentsId, idea, ideaClusters,agentsReward_01,agentsReward_02) #Creating a dataframe with ID, value and cluster

  return(ideas)
}
```

Setting up a function to select ideas
```{r}
phase01 <- function(clusters, ideas){

  #Creating a dataframe with Cluster ID, Winner ID and value
  idClusters <- seq(1:clusters)
  idWinners <- rep(NA, clusters)
  valueWinners <- rep(NA, clusters)
  listWinners <-  data.frame(idClusters, idWinners, valueWinners) 
  
  #Reward the owners of the selected ideas
  lenIdeas <- dim(ideas)[1]
  for (i in 1:lenIdeas) {
    j <- ideas[i, 3] # Check the cluster of the idea
    if (is.na(listWinners[j, 2])) {
    # If the cluster of the agent does not have a winner (it's value is NA) ...
    listWinners[j, 2] <- ideas[i, 1] # ... use the agent's ID
    listWinners[j, 3] <- ideas[i, 2] # ... use the idea's value
    ideas[i, 4] <- 1 +ideas[i, 4] # rewarding the selected agent by increasing the reward to 1 (taking into account multiple challenges)
    }
  }
  return(ideas)
}
```

Setting up a function to pool ideas
```{r}
phase02 <- function (selectedIdeas, target) {

  library(lpSolve)
  ideas <- selectedIdeas
  listWinners <- filter(ideas, agentsReward_01 >0) #Filtering the list of winners before performing a left join with the pooled ideas

  #Linear programming
  objective.in <- listWinners[, 2] # Trying to pool the retained ideas by looking for the best combination of coefficients
  mat <-  matrix(listWinners[, 2], nrow = 1, byrow = TRUE) # The sum of the pooled ideas ...
  dir <- "<=" # ... should be below ...
  rhs <- target # ... the target
  optimum <-  lp(direction = "max", objective.in,  mat,  dir,  rhs,  all.bin = TRUE) # Which is the best combination of pooled ideas?
  optimum$solution # The selected ideas
  
  listWinners[,5] <- optimum$solution #Assign prizes
  polledIdeas <- ideas %>%
  left_join(listWinners, by = c("agentsId","idea","ideaClusters","agentsReward_01")) #Left join
  agentsReward_02 <- polledIdeas$agentsReward_02.x+ polledIdeas$agentsReward_02.y # Taking into account previous idea challenges
  polledIdeas <- data.frame(polledIdeas,agentsReward_02) #add the new column

  polledIdeas[5] <- NULL # Removing the redundant columns
  polledIdeas[5] <- NULL # Removing the redundant columns

  return(polledIdeas)
}
```

Setting up a function to summarize the rewards
```{r}
phase03 <- function(ideas, target){
  library(tidyverse)
  agentsRewards <- ideas %>% 
    gather('agentsReward_01', 'agentsReward_02', key = "Phase", value = "Rewards")

  winners <- summarise(group_by(agentsRewards, agentsId, idea), Rewards = sum(Rewards, na.rm = TRUE))
    winners <- arrange(winners, idea) #Sorting ideas
    problemSolved <- sum(filter(winners, Rewards > 1)[, 2])
    
    #Print results for comparison
    print("filter(winners, Rewards >0)")
      print(filter(winners, Rewards > 0)) # Test: selected ideas
    print("Mean(winners$Rewards)")
      print(mean(winners$Rewards))
    print("Problem Solved")
      print(problemSolved)
    print("Remainder")
      print(target-problemSolved)
    
    return(agentsRewards)
}
```

Setting up a function to run an idea Challenge
```{r}
ideaChallenge <- function(target, totAgents, totIdeas, clusters, sortedIdea) {
  
  generatedIdeas <- as.data.frame(seq(1:totIdeas))
  generatedIdeas <- phase00(target, totAgents, totIdeas, clusters) # Generating ideas

  if(sortedIdea) generatedIdeas <-arrange(generatedIdeas,desc(idea)) # Sorting ideas by experts

  selectedIdeas <- phase01(clusters, generatedIdeas) # Selecting ideas
  polledIdeas <- phase02(selectedIdeas, target) # Polling ideas 
  agentsRewards <- phase03(polledIdeas, target)
  return(agentsRewards)
}
```

# Run the simulation

## Standard idea challenge Benchmark
```{r}
target <- 1e9 # we set the target at 1'000'000'000
totAgents <- 1e3
totIdeas <- 1e4
clusters <- 1

generatedIdeas0 <- as.data.frame(seq(1:totIdeas))
generatedIdeas0 <- phase00(target, totAgents, totIdeas, clusters) # Generating ideas

generatedIdeas0 <-arrange(generatedIdeas0,desc(idea)) #Sorting ideas by experts

selectedIdeas0 <- phase01(clusters, generatedIdeas0) # Selecting ideas

agentsRewards0 <- selectedIdeas0 %>% 
    gather('agentsReward_01', 'agentsReward_02', key = "Phase", value = "Rewards")

winners <- dim(filter(agentsRewards0, Rewards>0))[1]
quality <-filter(agentsRewards0, Rewards==1)[,2]
prize <- mean(agentsRewards0$Rewards) * totAgents
challengeID <- "Single Winner"

performance <- data.frame(challengeID, winners, quality, prize)

performance$winners # Winners
performance$quality # Sum of the pooled ideas
performance$prize # Cost of prizes 

```


## First idea challenge
MODEL A: Selecting the first idea for each cluster
```{r}
target <- 1e9 # we set the target at 1'000'000'000
totAgents <- 1e3
totIdeas <- 1e4
clusters <- 10

agentsRewards1_1 <- ideaChallenge(target, totAgents, totIdeas, clusters, FALSE)
```

Model B: Selecting the idea for each cluster with the highest score
```{r}
agentsRewards2 <- ideaChallenge(target, totAgents, totIdeas, clusters, TRUE) #using sorted ideas
```

## New idea challenge starting from the remainder of the previous idea challenge 
```{r}
# Setting up the remainder as new target
  winners1_1 <-  summarise(group_by(agentsRewards1_1, agentsId, idea), Rewards = sum(Rewards, na.rm = TRUE))
  problemSolved1_1 <- sum(filter(winners1_1, Rewards > 1)[, 2]) 
  remainder1_1 <- target - problemSolved1_1

agentsRewards1_2 <- ideaChallenge(remainder1_1, totAgents, totIdeas, clusters, FALSE)
```

## New idea challenge: Cheatstorming with the remainder
```{r}
# agentsRewards1_3 <- ideaChallenge_Cheatstorming(remainder1_1, totAgents, totIdeas, clusters, agentsRewards1_1)  # Using old ideas

  target <- remainder1_1 # New target

  generatedIdeas<- agentsRewards1_1 %>%
    spread(key = Phase, value = Rewards)  # Putting the table back in shape
  
  generatedIdeas<-filter(generatedIdeas,idea<target) #Removing the ideas that are above the target  
  generatedIdeas$ideaClusters <- 1+as.integer(generatedIdeas$idea/(target/clusters))# New clusters

  generatedIdeas$agentsReward_01 <- 1/dim(generatedIdeas)[1] +generatedIdeas$agentsReward_01 # Reward all contributions in phase 01
  if(is.na(generatedIdeas$agentsReward_02)) {
    generatedIdeas$agentsReward_02 <- 0 # Remove NA from column 2
    }

  selectedIdeas <- generatedIdeas
  pooledIdeas <- phase02(selectedIdeas, target) # Pooling ideas
  agentsRewards <- phase03(pooledIdeas, target)
  filter(agentsRewards, Rewards>=1)
```

# Third idea challenge: more clusters
```{r}
clusters <- 50 # Increased the number of clusters

agentsRewards3 <- ideaChallenge(target, totAgents, totIdeas, clusters, FALSE)
```