From 5392002579d81a715e0bf00d9869ed2fc4be766b Mon Sep 17 00:00:00 2001 From: Michael Field Date: Fri, 4 Feb 2022 20:35:57 -0800 Subject: [PATCH 1/2] PCC13 kaijuking --- 13/kaijuking/directors.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 13/kaijuking/directors.py diff --git a/13/kaijuking/directors.py b/13/kaijuking/directors.py new file mode 100644 index 000000000..e0369f70f --- /dev/null +++ b/13/kaijuking/directors.py @@ -0,0 +1 @@ +print('test') \ No newline at end of file From 4bd3a257329408e92f1caa49747aec6466b21a39 Mon Sep 17 00:00:00 2001 From: Michael Field Date: Sat, 5 Feb 2022 21:54:05 -0800 Subject: [PATCH 2/2] PCC13 kaijuking --- 13/kaijuking/directors.py | 104 +++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/13/kaijuking/directors.py b/13/kaijuking/directors.py index e0369f70f..09413cb5e 100644 --- a/13/kaijuking/directors.py +++ b/13/kaijuking/directors.py @@ -1 +1,103 @@ -print('test') \ No newline at end of file +import csv +from collections import defaultdict, namedtuple +from operator import attrgetter + + +MOVIE_DATA = 'movie_metadata.csv' +NUM_TOP_DIRECTORS = 20 +MIN_MOVIES = 4 +MIN_YEAR = 1960 + + +Movie = namedtuple('Movie', 'title year score') +Director = namedtuple('Director', 'director average_score') +movie_dd = defaultdict(list) + + +def get_movies_by_director(): + '''Extracts all movies from csv and stores them in a dictionary + where keys are directors, and values is a list of movies (named tuples)''' + + with open(MOVIE_DATA, newline='', encoding="utf8") as csvfile: + csv_reader = csv.DictReader(csvfile, delimiter=',') + + for row in csv_reader: + + # Filter out any row whose 'title_year' meets or exceeds the minimum year + if row['title_year'] >= str(MIN_YEAR): + + # Construct the movie defaultdict + # key = director_name, value = Movie Tuple(title, year, score) + movie_dd[row['director_name']].append(Movie(row['movie_title'], row['title_year'], row['imdb_score'])) + + + # Sort the list of namedtuples by score from highlest to lowest + for director in movie_dd: + movie_dd[director] = sorted(movie_dd[director], key=attrgetter('score'), reverse=True) + + return movie_dd + + +def get_average_scores(directors): + '''Filter directors with < MIN_MOVIES and calculate averge score''' + + # Eventually will be a list of tuples like [('director', 'average_score')] + avg_scores = [] + + # Loop through the directors defaultdict and calculate the director's average movie score + for director, movies in directors.items(): + if len(movies) >= MIN_MOVIES: + avg_scores.append((director, _calc_mean(movies))) + + # Found here: https://pythonguides.com/python-sort-list-of-tuples/ + # Sort the averages list by the second element ('average_score') going from highest to lowest + avg_scores.sort(key=lambda y: y[1], reverse=True) + + # Return only the first 20 items of the sorted list + # This shoud equate to the top 20 directors and their movie score averages + return avg_scores[0:NUM_TOP_DIRECTORS] + + +def _calc_mean(movies): + '''Helper method to calculate mean of list of Movie namedtuples''' + total_score = 0 + for movie in movies: + total_score += float(movie.score) + + # Round the division to return a float to one decimal point (ex: 8.5) + avg_score = round(total_score / len(movies), 1) + return avg_score + + +def print_directors_and_averages(directors, averages): + counter = 1 + + # Loop through the list of tuple averages (ex: [('director', 'average_score')]) + for d1, avg in averages: + + # No need to sort as the averages tuple list is already sorted by average from highest to lowest + print('\n') + print(f'{counter:02d}. {d1:<52} {avg}') + print('-' * 60) + + # Loop through the defaultdict of movies + for d2, movies in directors.items(): + + # Print the list of movies if the director is found in both + # Movies are printed in order as is since movie defaultdict is already sorted by 'score' + if d2 == d1: + for movie in movies: + print(f'{movie.year}] {movie.title:<50} {movie.score}') + counter += 1 + + +def main(): + directors = get_movies_by_director() + averages = get_average_scores(directors) + print_directors_and_averages(directors, averages) + + +if __name__ == '__main__': + main() + + \ No newline at end of file