-
Notifications
You must be signed in to change notification settings - Fork 0
/
imdb-scraping.py
36 lines (31 loc) · 960 Bytes
/
imdb-scraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import imdb
import json
# Create an instance of the IMDb class
ia = imdb.IMDb()
# Get the top 50 movies by rating
top250 = ia.get_top250_movies()
movies = []
for i in range(50):
print("Hello")
movie_id = top250[i].getID()
movie = ia.get_movie(movie_id)
title = movie.get('title')
description = movie.get('plot outline')
categories = [genre.strip() for genre in movie.get('genres', [])]
image = movie.get('full-size cover url')
actors = [actor['name'].strip() for actor in movie.get('cast', [])[:5]]
rating = movie.get('rating')
# Create the Movie object
movie_data = {
"movie_id": movie_id,
"title": title,
"description": description,
"categories": categories,
"image": image,
"actors": actors,
"current_recommended_rate": rating
}
movies.append(movie_data)
with open("movie.json", "w") as f:
json.dump(movies, f)
print(movies[0])