-
Notifications
You must be signed in to change notification settings - Fork 0
/
IMDbUtils.R
106 lines (89 loc) · 4.04 KB
/
IMDbUtils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
###############################################################################
### VSP implementation for Knowledge Discovery in Graphs Through Vertex ###
### Separation ###
### ###
### Copyright (C) 2017 Marc Sarfati, Marc Queudot, ###
### Catherine Mancel, Marie-Jean Meurs ###
### ###
### Permission is hereby granted, free of charge, to any person obtaining a ###
### copy of this software and associated documentation files ###
### (the "Software"), to deal in the Software without restriction, ###
### including without limitation the rights to use, copy, modify, merge, ###
### publish, distribute, sublicense, and/or sell copies of the Software, ###
### and to permit persons to whom the Software is furnished to do so, ###
### subject to the following conditions: ###
### ###
### The above copyright notice and this permission notice shall be included ###
### in all copies or substantial portions of the Software. ###
### ###
### THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ###
### OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ###
### MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ###
### IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ###
### CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ###
### TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ###
### SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ###
###############################################################################
getActorName <- function(artistid){
return(as.character(artists$fullname[artists$artistid==artistid]))
}
getMovieName <- function(movieid){
return(as.character(movies$imdbtitle[movies$movieid==movieid]))
}
getActorsMovies <- function(artistid){
return(roles$movieid[roles$artistid==artistid])
}
printActorsMovies <- function(artistid){
print(sapply(getActorsMovies(artistid), getMovieName))
}
getNodeActorNames <- function(node, actorsInNode=actorsInNode) {
actors <- actorsInNode[[node]]
return (sapply(actors, getActorName))
}
getNodeMovies<- function(node, actorsInNode=actorsInNode) {
return(getActorsMovies(actorsInNode[[node]][1]))
}
printNodeMovies<- function(node, actorsInNode=actorsInNode) {
return(sapply(getActorsMovies(actorsInNode[[node]][1]), getMovieName))
}
reduceInputSize <- function(nMovies = 20){
return(rolesData[rolesData$movieid <= sort(unique(rolesData$movieid))[nMovies], ])
}
keepBiggestConnectedComponent <- function(g){
cl <- clusters(g)
group <- which(cl$csize == max(cl$csize))
return(induced_subgraph(g, cl$membership == group))
}
readSolutionFromScip <- function(filename, n){
fileConn <- file(filename)
lines <- readLines(fileConn)
ones <- sapply(3:length(lines), function(i) {
varname <- strsplit(lines[i], " ")[[1]][1]
as.numeric(substr(varname, 2, nchar(varname)))
})
close(fileConn)
xy <- array(F, 2*n)
xy[ones] <- T
return(xy)
}
regroupActors <- function(g){
visited <- array(F, length(V(g)))
cluster <- 1
mapping <- array(0, length(V(g)))
while(prod(visited) == 0){
currentNode <- min(which(visited==F))
visited[currentNode] <- T
mapping[currentNode] <- cluster
## compare list of movies
nodeMovies <- getNodeMovies(currentNode, actorsInNode)
for(n in neighbors(g, currentNode)){
neighborMovies <- getNodeMovies(n, actorsInNode)
if(setequal(nodeMovies, neighborMovies)) {
visited[n] <- T
mapping[n] <- cluster
}
}
cluster <- cluster +1
}
return(mapping)
}