-
Notifications
You must be signed in to change notification settings - Fork 0
/
movieMeta.py
116 lines (94 loc) · 3.99 KB
/
movieMeta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import init
import sys
from identifyMovie import getAndExtract, HTTPException
import imdb
import urllib.request as ur
import urllib.error as ue
import re
import hashlib
import os, os.path
conf,logger = init.configure()
if conf is None:
logger.error("Could not open config file, reverting to defaults")
class movieMetaException(Exception):
def __init__(self, message):
self.message = message
URL_MP_MOVIE = conf['movieposterdb_URL']
RE_MP_IMG = conf['movieposterdb_poster_regexp']
ia = imdb.IMDb()
def getPosterURL(IMDBid):
logger.debug('Getting alternative poster URL from movieposterdb')
try:
# get a poster page (the bigger/main one) from movieposterdb
posterURL = getAndExtract(URL_MP_MOVIE + str(IMDBid), RE_MP_IMG)
return posterURL
except HTTPException:
logger.error('Could not get poster page from movieposterdb')
# getMovieMeta gets movie metadata from IMDB given the ID
# might be extended to something smarter
def getMovieMeta(IMDBid, downloadPosters = False):
logger.debug('Getting movie metadata from IMDB')
try:
m = ia.get_movie(IMDBid)
# -------------------------------------------------------
# clean runtime (as it is a list, just get first element)
# if key does not exist, just provide 0 as runtime
m['runtime_simple'] = m.get('runtime',[u'0'])[0]
# -------------------------------------------------------
# generate slug:
# (1) lowercase long imdb title
slug = m.get('long imdb title','').lower()
# remove anything which is not alphanumeric or a space
slug = re.sub('[^a-zA-Z0-9 ]','',slug)
# convert spaces into dashes
m['slug'] = re.sub(' ','-',slug)
# -------------------------------------------------------
# moviePosterDB limits connections - disabled by default
# add alternative poster URL if it exists
# altPosterURL = getPosterURL(IMDBid)
# if altPosterURL:
# m.set_item('altPosterURL', altPosterURL)
# -------------------------------------------------------
# download posters
if downloadPosters:
for posterKey in ('full-size cover url','altPosterURL'):
poster_url = m.get(posterKey)
if poster_url is not None:
# build filename as a hash of the URL
fname = os.path.join(conf['path_posters'],hashlib.new('md5',poster_url.encode()).hexdigest())
# check if file exists...
if os.path.isfile(fname):
logger.debug('Poster at %s has already been downloaded: skipping' % poster_url)
else:
logger.debug('Downloading poster file at url %s' % poster_url)
# if not, download and save poster
ur.urlretrieve(poster_url,fname)
except imdb.IMDbDataAccessError as e:
raise movieMetaException('IMDB Data Access Error: %s' % e.errmsg)
except ue.HTTPError as e:
raise hashException('HTTPError: %s' % e.code)
except ue.URLError as e:
raise hashException('URLError: %s' % e.reason)
except Exception as e:
raise movieMetaException('Unknown movieMetaException: %s' % e)
return m
if __name__ == '__main__':
if len(sys.argv)<2:
IMDBid = "3268458"
else:
IMDBid = sys.argv[1]
print ("[i] Getting metadata for IMDB id %s..." % IMDBid)
m = getMovieMeta(IMDBid)
if m:
# choose which keys we want printed (e.g. ignoring binaries)
imdbKeys = ('title', 'long imdb title', 'slug', 'year', 'rating', \
'runtime_simple', 'plot outline', 'cover url', 'full-size cover url', \
'altPosterURL')
for key in imdbKeys:
print (" %s: %s" % (key, m.get(key,"")))
# movie genres
print (" Genres:")
for genre in m.get('genres',[]):
print ("%s" %genre)
else:
print ("[x] No metadata found")