Python 100 project #48: PyBites “Code Challenge 13 – Highest Rated Movie Directors”

This challenge is using csv and display a nice ranking based on calculated score. It’s as not fancy as wrangling the data with Pandas, still it’s very fun. I’ve not used namedtuple as much, but after this challenge I feel more comfortable with it. Actually it’s very handy compared to class.

 

Output:

$ python3 directors.py 
1. Sergio Leone                                         8.5
------------------------------------------------------------
1984] Once Upon a Time in America                        8.4
1968] Once Upon a Time in the West                       8.6
1966] The Good, the Bad and the Ugly                     8.9
1964] A Fistful of Dollars                               8.0

2. Christopher Nolan                                    8.4
------------------------------------------------------------
2012] The Dark Knight Rises                              8.5
2008] The Dark Knight                                    9.0
2014] Interstellar                                       8.6
2010] Inception                                          8.8
2005] Batman Begins                                      8.3
2002] Insomnia                                           7.2
2006] The Prestige                                       8.5
2000] Memento                                            8.5

3. Quentin Tarantino                                    8.2
------------------------------------------------------------
2012] Django Unchained                                   8.5
2009] Inglourious Basterds                               8.3
2015] The Hateful Eight                                  7.9
2003] Kill Bill: Vol. 1                                  8.1
2004] Kill Bill: Vol. 2                                  8.0
1997] Jackie Brown                                       7.5
1994] Pulp Fiction                                       8.9
1992] Reservoir Dogs                                     8.4

4. Hayao Miyazaki                                       8.2
------------------------------------------------------------
2008] Ponyo                                              7.7
2004] Howl's Moving Castle                               8.2
1997] Princess Mononoke                                  8.4
2001] Spirited Away                                      8.6

...

 

Code:

import csv
from collections import defaultdict, namedtuple

MOVIE_DATA = 'movie_metadata.csv'
NUM_TOP_DIRECTORS = 20
MIN_MOVIES = 4
MIN_YEAR = 1960

def get_movies_by_director():
    '''Extracts all movies from csv and stores them in a dictionary
    where keys are directors, and values is a list of movies (named tuples)'''
    directors = defaultdict(list)
    Movie = namedtuple("movie", ('movie_title', 'title_year', 'imdb_score'))
    with open(MOVIE_DATA) as f:
        movies = csv.DictReader(f)
        for movie in movies:
            directors[movie['director_name']].append(Movie(movie['movie_title'], movie['title_year'], movie['imdb_score']))

    return directors


def get_average_scores(directors):
    '''Filter directors with < MIN_MOVIES and calculate averge score'''
    nominated_directors = {}
    for director, movies in directors.items():
        if len(movies) >= MIN_MOVIES:
            nominated_directors[(director, _calc_mean(movies))] = movies

    return nominated_directors



def _calc_mean(movies):
    '''Helper method to calculate mean of list of Movie namedtuples'''
    scores = [float(movie.imdb_score) for movie in movies]

    return round(sum(scores)/len(scores),1)


def print_results(directors):
    '''Print directors ordered by highest average rating. For each director
    print his/her movies also ordered by highest rated movie.
    See http://pybit.es/codechallenge13.html for example output'''
    fmt_director_entry = '{counter}. {director:<52} {avg}'
    fmt_movie_entry = '{year}] {title:<50} {score}'
    sep_line = '-' * 60
    report = sorted(directors.items(), key=lambda x: float(x[0][1]), reverse=True)
    for i in range(NUM_TOP_DIRECTORS):
        print(fmt_director_entry.format(counter=i+1, director=report[i][0][0], avg=report[i][0][1]))
        print(sep_line)
        for movie in report[i][1]:
            print(fmt_movie_entry.format(year=movie.title_year, title=movie.movie_title, score=movie.imdb_score))
        print()


def main():
    '''This is a template, feel free to structure your code differently.
    We wrote some tests based on our solution: test_directors.py'''
    directors = get_movies_by_director()
    directors = get_average_scores(directors)
    print_results(directors)


if __name__ == '__main__':
    main()