This repository has been archived by the owner on Dec 15, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
all_in_one.py
91 lines (71 loc) · 2.83 KB
/
all_in_one.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import urllib
import json
class Search(object):
@staticmethod
def search(*args):
for arg in args:
search_string = ('+'.join('' + item + '' for item in args))
API_key = "your_key"
base_url = "https://www.europeana.eu/api/v2/search.json?query="
url = "".join(base_url + '"' + search_string + '"' + "&rows=100&start=1&&text_fulltext=true&wskey=" + API_key)
base2 = "https://iiif.europeana.eu/presentation"
print url
response = urllib.urlopen(url)
data = json.load(response)
links = []
for i in data['items']:
print i['id']
links.append(i['id'])
final_links = []
for line in links:
url2 = "".join(base2 + line + "/manifest?wskey=" + API_key)
print url2
final_links.append(url2)
anno_links = []
fulltext_links = []
for link in final_links:
response = urllib.urlopen(link)
data = json.load(response)
try:
num_entries = len(data['sequences'])
except Exception as e:
print repr(e)
try:
for x in range(num_entries):
for i in data['sequences'][x]['canvases'][x]['otherContent']:
anno_links.append(i['@id'])
except Exception as e:
print repr(e)
try:
for x in range(num_entries):
for i in data['sequences'][x]['canvases'][x]['images']:
anno_links.append(i['@id'])
except Exception as e:
print repr(e)
print anno_links
anno_links = [x.strip() for x in anno_links]
for link in anno_links:
try:
response = urllib.urlopen(link)
data = json.load(response)
num_entries = len(data['resources'])
except Exception as e:
print repr(e)
try:
for x in range(num_entries):
for i in data['resources'][x]['resource']:
fulltext_links.append(data['resources'][x]['resource']['@id'])
except Exception as e:
print repr(e)
fulltext = []
for link in fulltext_links:
try:
response = urllib.urlopen(link)
data = json.load(response)
fulltext.append(data['value'])
except Exception as e:
print repr(e)
fulltext = [x.encode('utf-8') for x in fulltext]
with open('fulltext.txt', 'w') as outfile:
for item in fulltext:
outfile.write("%s\n" % item)