-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathabm_simulator.Rmd
311 lines (248 loc) · 11 KB
/
abm_simulator.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
---
title: "Agent-based model simulator"
date: '2018-08-11'
output:
html_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(warning = FALSE) #no warning messages
knitr::opts_chunk$set(message = FALSE) #no messages
knitr::opts_chunk$set(echo = TRUE) #no R code
```
## Theoretical model, version 01
In this initial model, we have 3 phases
# Phase 01: Idea generation
We assign a random number to each agent
```{r phase01.1}
target <- 1e9 # we set the target at 1'000'000'000
totAgents <- 1e3 # We use 1'000 agents
clusters <- 10 # We assume 10 domains for ideas
set.seed(1) #Setting a seed to allow comparable results
ideas <- runif(totAgents, min=0, max=target) # Generating random numbers in a uniform distribution
```
We cluster the results
```{r phase01.2}
ideaClusters <- 1+as.integer(ideas/(target/clusters)) #Extracting the cluster by dividing each number to the number of clusters
agentsId <- 1:totAgents # Adding a UID
agentsReward_01 <- rep(0,totAgents) # A variable associated to phase 01
agentsReward_02 <- rep(0,totAgents) # A variable associated to phase 02
agents <- data.frame(agentsId, ideas, ideaClusters,agentsReward_01,agentsReward_02) #Creating a dataframe with ID, value and cluster
# hist(agents$ideas)
# hist(agents$ideaClusters)
```
We assign prizes
```{r phase01.3}
idClusters <- seq(1:clusters)
idWinners <- rep(NA,clusters)
valueWinners <- rep(NA,clusters)
listWinners <- data.frame(idClusters,idWinners, valueWinners) #Creating a dataframe with Cluster ID, Winner ID and value
for(i in 1:totAgents){
j <- agents[i,3] # Check the cluster of the agent
if(is.na(listWinners[j,2])){ # If the cluster of the agent does not have a winner (it's value is NA) ...
listWinners[j,2] <-agents[i,1] # ... use the agent's ID
listWinners[j,3] <-agents[i,2] # ... use the agent's value
agents[i,4] <- 1 # rewarding the selected agent
}
}
listWinners
```
#Phase 02: Idea pooling
We use [lpSOlve](https://www.kdnuggets.com/2018/05/optimization-using-r.html) to define the objective function
```{r phase02.1}
library(lpSolve)
objective.in <- listWinners[,3] # Pooling the retained ideas by looking for the best combination
```
We define the constraints of the function
```{r phase02.2}
mat <- matrix(listWinners[,3], nrow=1, byrow=TRUE) # The sum of the pooled ideas ...
dir <- "<=" # ... should be below ...
rhs <- target # ... the target
```
We solve the Linear programming function and we reward the owners of the pooled ideas
```{r phase02.3}
optimum <-lp(direction="max", objective.in, mat, dir, rhs, all.bin = TRUE) # Which is the best combination of pooled ideas?
optimum$solution # The selected ideas
for(i in 1:clusters){
if(optimum$solution[i] >0){ # if an idea is pooled ...
j <- listWinners[i,2] # ... select the owner of the pooled idea
agents[j,5] <- 1 # ... and reward the agent another time
}
}
```
# Phase 03: Analyze results
[Filter](http://r4ds.had.co.nz/transform.html#filter-rows-with-filter) the winners
```{r phase03.1}
totWinners <- filter(agents, agentsReward_01 >0) # This could reduce computational effort
```
[Gather](http://r4ds.had.co.nz/tidy-data.html#spreading-and-gathering) the two last columns of agents into one
```{r phase03.2}
library(tidyverse)
agentsRewards <- agents %>%
gather('agentsReward_01', 'agentsReward_02', key = "Phase", value = "Rewards")
```
[Summarize](http://r4ds.had.co.nz/transform.html#grouped-summaries-with-summarise) by agent
```{r phase03.3}
summarise(group_by(agentsRewards, agentsId), Rewards = sum(Rewards, na.rm = TRUE))
```
Check the reminder for another idea challenge
```{r phase03.4}
remainder <- target - sum(agents$ideas*agents$agentsReward_02) #Comparing the initial target with the sum of the pooled ideas
```
## Theoretical model, version 02
Setting up a function to generate ideas
```{r}
phase00 <- function(target, totAgents,totIdeas,clusters){
set.seed(1) #Setting a seed to a fix number to allow comparable results
agentsId <- as.integer(rnorm(totIdeas, totAgents/2, totAgents/10)) # Assigning an owner to each idea (this allows multiple ideas to one owner)
set.seed(1) #Setting a seed again
idea <- as.integer(runif(totIdeas, min = 0, max = target)) # Generating random numbers in a uniform distribution
ideaClusters <- 1+as.integer(idea/(target/clusters)) #Extracting the cluster by dividing each number to the number of clusters
agentsReward_01 <- rep(0,totIdeas) # A variable associated to phase 01
agentsReward_02 <- rep(0,totIdeas) # A variable associated to phase 02
ideas <- data.frame(agentsId, idea, ideaClusters,agentsReward_01,agentsReward_02) #Creating a dataframe with ID, value and cluster
return(ideas)
}
```
Setting up a function to select ideas
```{r}
phase01 <- function(clusters, ideas){
#Creating a dataframe with Cluster ID, Winner ID and value
idClusters <- seq(1:clusters)
idWinners <- rep(NA, clusters)
valueWinners <- rep(NA, clusters)
listWinners <- data.frame(idClusters, idWinners, valueWinners)
#Reward the owners of the selected ideas
lenIdeas <- dim(ideas)[1]
for (i in 1:lenIdeas) {
j <- ideas[i, 3] # Check the cluster of the idea
if (is.na(listWinners[j, 2])) {
# If the cluster of the agent does not have a winner (it's value is NA) ...
listWinners[j, 2] <- ideas[i, 1] # ... use the agent's ID
listWinners[j, 3] <- ideas[i, 2] # ... use the idea's value
ideas[i, 4] <- 1 +ideas[i, 4] # rewarding the selected agent by increasing the reward to 1 (taking into account multiple challenges)
}
}
return(ideas)
}
```
Setting up a function to pool ideas
```{r}
phase02 <- function (selectedIdeas, target) {
library(lpSolve)
ideas <- selectedIdeas
listWinners <- filter(ideas, agentsReward_01 >0) #Filtering the list of winners before performing a left join with the pooled ideas
#Linear programming
objective.in <- listWinners[, 2] # Trying to pool the retained ideas by looking for the best combination of coefficients
mat <- matrix(listWinners[, 2], nrow = 1, byrow = TRUE) # The sum of the pooled ideas ...
dir <- "<=" # ... should be below ...
rhs <- target # ... the target
optimum <- lp(direction = "max", objective.in, mat, dir, rhs, all.bin = TRUE) # Which is the best combination of pooled ideas?
optimum$solution # The selected ideas
listWinners[,5] <- optimum$solution #Assign prizes
polledIdeas <- ideas %>%
left_join(listWinners, by = c("agentsId","idea","ideaClusters","agentsReward_01")) #Left join
agentsReward_02 <- polledIdeas$agentsReward_02.x+ polledIdeas$agentsReward_02.y # Taking into account previous idea challenges
polledIdeas <- data.frame(polledIdeas,agentsReward_02) #add the new column
polledIdeas[5] <- NULL # Removing the redundant columns
polledIdeas[5] <- NULL # Removing the redundant columns
return(polledIdeas)
}
```
Setting up a function to summarize the rewards
```{r}
phase03 <- function(ideas, target){
library(tidyverse)
agentsRewards <- ideas %>%
gather('agentsReward_01', 'agentsReward_02', key = "Phase", value = "Rewards")
winners <- summarise(group_by(agentsRewards, agentsId, idea), Rewards = sum(Rewards, na.rm = TRUE))
winners <- arrange(winners, idea) #Sorting ideas
problemSolved <- sum(filter(winners, Rewards > 1)[, 2])
#Print results for comparison
print("filter(winners, Rewards >0)")
print(filter(winners, Rewards > 0)) # Test: selected ideas
print("Mean(winners$Rewards)")
print(mean(winners$Rewards))
print("Problem Solved")
print(problemSolved)
print("Remainder")
print(target-problemSolved)
return(agentsRewards)
}
```
Setting up a function to run an idea Challenge
```{r}
ideaChallenge <- function(target, totAgents, totIdeas, clusters, sortedIdea) {
generatedIdeas <- as.data.frame(seq(1:totIdeas))
generatedIdeas <- phase00(target, totAgents, totIdeas, clusters) # Generating ideas
if(sortedIdea) generatedIdeas <-arrange(generatedIdeas,desc(idea)) # Sorting ideas by experts
selectedIdeas <- phase01(clusters, generatedIdeas) # Selecting ideas
polledIdeas <- phase02(selectedIdeas, target) # Polling ideas
agentsRewards <- phase03(polledIdeas, target)
return(agentsRewards)
}
```
# Run the simulation
## Standard idea challenge Benchmark
```{r}
target <- 1e9 # we set the target at 1'000'000'000
totAgents <- 1e3
totIdeas <- 1e4
clusters <- 1
generatedIdeas0 <- as.data.frame(seq(1:totIdeas))
generatedIdeas0 <- phase00(target, totAgents, totIdeas, clusters) # Generating ideas
generatedIdeas0 <-arrange(generatedIdeas0,desc(idea)) #Sorting ideas by experts
selectedIdeas0 <- phase01(clusters, generatedIdeas0) # Selecting ideas
agentsRewards0 <- selectedIdeas0 %>%
gather('agentsReward_01', 'agentsReward_02', key = "Phase", value = "Rewards")
winners <- dim(filter(agentsRewards0, Rewards>0))[1]
quality <-filter(agentsRewards0, Rewards==1)[,2]
prize <- mean(agentsRewards0$Rewards) * totAgents
challengeID <- "Single Winner"
performance <- data.frame(challengeID, winners, quality, prize)
performance$winners # Winners
performance$quality # Sum of the pooled ideas
performance$prize # Cost of prizes
```
## First idea challenge
MODEL A: Selecting the first idea for each cluster
```{r}
target <- 1e9 # we set the target at 1'000'000'000
totAgents <- 1e3
totIdeas <- 1e4
clusters <- 10
agentsRewards1_1 <- ideaChallenge(target, totAgents, totIdeas, clusters, FALSE)
```
Model B: Selecting the idea for each cluster with the highest score
```{r}
agentsRewards2 <- ideaChallenge(target, totAgents, totIdeas, clusters, TRUE) #using sorted ideas
```
## New idea challenge starting from the remainder of the previous idea challenge
```{r}
# Setting up the remainder as new target
winners1_1 <- summarise(group_by(agentsRewards1_1, agentsId, idea), Rewards = sum(Rewards, na.rm = TRUE))
problemSolved1_1 <- sum(filter(winners1_1, Rewards > 1)[, 2])
remainder1_1 <- target - problemSolved1_1
agentsRewards1_2 <- ideaChallenge(remainder1_1, totAgents, totIdeas, clusters, FALSE)
```
## New idea challenge: Cheatstorming with the remainder
```{r}
# agentsRewards1_3 <- ideaChallenge_Cheatstorming(remainder1_1, totAgents, totIdeas, clusters, agentsRewards1_1) # Using old ideas
target <- remainder1_1 # New target
generatedIdeas<- agentsRewards1_1 %>%
spread(key = Phase, value = Rewards) # Putting the table back in shape
generatedIdeas<-filter(generatedIdeas,idea<target) #Removing the ideas that are above the target
generatedIdeas$ideaClusters <- 1+as.integer(generatedIdeas$idea/(target/clusters))# New clusters
generatedIdeas$agentsReward_01 <- 1/dim(generatedIdeas)[1] +generatedIdeas$agentsReward_01 # Reward all contributions in phase 01
if(is.na(generatedIdeas$agentsReward_02)) {
generatedIdeas$agentsReward_02 <- 0 # Remove NA from column 2
}
selectedIdeas <- generatedIdeas
pooledIdeas <- phase02(selectedIdeas, target) # Pooling ideas
agentsRewards <- phase03(pooledIdeas, target)
filter(agentsRewards, Rewards>=1)
```
# Third idea challenge: more clusters
```{r}
clusters <- 50 # Increased the number of clusters
agentsRewards3 <- ideaChallenge(target, totAgents, totIdeas, clusters, FALSE)
```