Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pcc13 #837

Open
wants to merge 2 commits into
base: community
Choose a base branch
from
Open

Pcc13 #837

Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions 13/kaijuking/directors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import csv
from collections import defaultdict, namedtuple
from operator import attrgetter


MOVIE_DATA = 'movie_metadata.csv'
NUM_TOP_DIRECTORS = 20
MIN_MOVIES = 4
MIN_YEAR = 1960


Movie = namedtuple('Movie', 'title year score')
Director = namedtuple('Director', 'director average_score')
movie_dd = defaultdict(list)


def get_movies_by_director():
'''Extracts all movies from csv and stores them in a dictionary
where keys are directors, and values is a list of movies (named tuples)'''

with open(MOVIE_DATA, newline='', encoding="utf8") as csvfile:
csv_reader = csv.DictReader(csvfile, delimiter=',')

for row in csv_reader:

# Filter out any row whose 'title_year' meets or exceeds the minimum year
if row['title_year'] >= str(MIN_YEAR):

# Construct the movie defaultdict
# key = director_name, value = Movie Tuple(title, year, score)
movie_dd[row['director_name']].append(Movie(row['movie_title'], row['title_year'], row['imdb_score']))


# Sort the list of namedtuples by score from highlest to lowest
for director in movie_dd:
movie_dd[director] = sorted(movie_dd[director], key=attrgetter('score'), reverse=True)

return movie_dd


def get_average_scores(directors):
'''Filter directors with < MIN_MOVIES and calculate averge score'''

# Eventually will be a list of tuples like [('director', 'average_score')]
avg_scores = []

# Loop through the directors defaultdict and calculate the director's average movie score
for director, movies in directors.items():
if len(movies) >= MIN_MOVIES:
avg_scores.append((director, _calc_mean(movies)))

# Found here: https://pythonguides.com/python-sort-list-of-tuples/
# Sort the averages list by the second element ('average_score') going from highest to lowest
avg_scores.sort(key=lambda y: y[1], reverse=True)

# Return only the first 20 items of the sorted list
# This shoud equate to the top 20 directors and their movie score averages
return avg_scores[0:NUM_TOP_DIRECTORS]


def _calc_mean(movies):
'''Helper method to calculate mean of list of Movie namedtuples'''
total_score = 0
for movie in movies:
total_score += float(movie.score)

# Round the division to return a float to one decimal point (ex: 8.5)
avg_score = round(total_score / len(movies), 1)
return avg_score


def print_directors_and_averages(directors, averages):
counter = 1

# Loop through the list of tuple averages (ex: [('director', 'average_score')])
for d1, avg in averages:

# No need to sort as the averages tuple list is already sorted by average from highest to lowest
print('\n')
print(f'{counter:02d}. {d1:<52} {avg}')
print('-' * 60)

# Loop through the defaultdict of movies
for d2, movies in directors.items():

# Print the list of movies if the director is found in both
# Movies are printed in order as is since movie defaultdict is already sorted by 'score'
if d2 == d1:
for movie in movies:
print(f'{movie.year}] {movie.title:<50} {movie.score}')
counter += 1


def main():
directors = get_movies_by_director()
averages = get_average_scores(directors)
print_directors_and_averages(directors, averages)


if __name__ == '__main__':
main()