forked from JKetelaar/IMDB-5000-Movies
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathactors.R
122 lines (102 loc) · 2.82 KB
/
actors.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
actorsItsMovies <- function(row) {
# Remove the weird string after the movie title
movie = gsub('\302\240', '', as.character(row['movie_title']))
for (col in c('actor_1_name', 'actor_2_name', 'actor_3_name')) {
actorName <- row[[col]]
p <- new("Actor", name = actorName, movies = c(movie))
return(p)
}
}
findMostMakingActors <- function(actors, offset) {
return_actors <- c()
bigger <- TRUE
biggest <- NULL
while (bigger) {
possible_bigger <- FALSE
for (actor in actors) {
if (is.null(biggest) ||
length(actor@movies) > length(biggest@movies)) {
possible_bigger <- TRUE
biggest <- actor
break
}
}
if (!possible_bigger) {
bigger <- FALSE
}
}
for (actor in actors) {
if (length(actor@movies) >= length(biggest@movies) - offset) {
return_actors <- c(return_actors, actor)
}
}
return(return_actors)
}
createActorClass <- function() {
setClass("Actor", representation(name = "character", movies = "vector"))
setGeneric(
name = "addMovies",
def = function(theObject, movies)
{
standardGeneric("addMovies")
}
)
setMethod(
f = "addMovies",
signature = "Actor",
definition = function(theObject, movies)
{
theObject@movies <- c(theObject@movies, movies)
validObject(theObject)
return(theObject)
}
)
setGeneric(
name = "movies",
def = function(theObject)
{
standardGeneric("movies")
}
)
setMethod(
f = "movies",
signature = "Actor",
definition = function(theObject)
{
return(length(theObject@movies))
}
)
}
createMainActorRatingForGenre <- function(genre_set) {
main_actor_rating <- ddply(
genre_set,
c("actor_1_name"),
summarise,
M = mean(imdb_score, na.rm = T),
SE = sd(imdb_score, na.rm = T) / sqrt(length(na.omit(imdb_score))),
N = length(na.omit(imdb_score))
)
main_actor_ratings <-
main_actor_rating[which(main_actor_rating$N >= 5), ]
# make actor into an ordered factor, ordering by mean rating:
main_actor_ratings$actor_1_name <-
factor(main_actor_ratings$actor_1_name)
main_actor_ratings$actor_1_name <-
reorder(main_actor_ratings$actor_1_name, main_actor_ratings$M)
return (main_actor_ratings)
}
main_actor_rating <- ddply(
movies_set,
c("actor_1_name"),
summarise,
M = mean(imdb_score, na.rm = T),
SE = sd(imdb_score, na.rm = T) / sqrt(length(na.omit(imdb_score))),
N = length(na.omit(imdb_score))
)
main_actor_ratings <-
main_actor_rating[which(main_actor_rating$N >= 15), ]
# make actor into an ordered factor, ordering by mean rating:
main_actor_ratings$actor_1_name <-
factor(main_actor_ratings$actor_1_name)
main_actor_ratings$actor_1_name <-
reorder(main_actor_ratings$actor_1_name, main_actor_ratings$M)