internetarchive · mekarpeles · Sep 25, 2020 · Aug 15, 2020 · Sep 16, 2020 · Sep 25, 2020
diff --git a/openlibrary/catalog/amazon/add_covers.py b/openlibrary/catalog/amazon/add_covers.py
@@ -1,16 +1,13 @@
 from __future__ import print_function
-import simplejson
-
-from six.moves.urllib.request import urlopen
-
+import requests
 
 base = 'http://ia331526.us.archive.org:7001/openlibrary.org/log/'
 
 out = open('edition_and_isbn', 'w')
 offset = '2009-06-01:0'
 while not offset.startswith('2010-03-17:'):
     url = base + offset
-    ret = simplejson.load(urlopen(url))
+    ret = requests.get(url).json()
     offset, data = ret['offset'], ret['data']
     print(offset, len(data))
     for i in data:

diff --git a/openlibrary/catalog/utils/edit.py b/openlibrary/catalog/utils/edit.py
@@ -1,13 +1,13 @@
 from __future__ import print_function
 import re
+import requests
 import web
 import json
 from openlibrary.catalog.importer.db_read import get_mc
 from openlibrary.api import unmarshal
 from time import sleep
 
 import six
-from six.moves import urllib
 
 re_meta_mrc = re.compile('([^/]+)_(meta|marc).(mrc|xml)')
 re_skip = re.compile(r'\b([A-Z]|Co|Dr|Jr|Capt|Mr|Mrs|Ms|Prof|Rev|Revd|Hon)\.$')
@@ -58,7 +58,7 @@ def undelete_author(a, ol):
     key = a['key']
     assert a['type'] == '/type/delete'
     url = 'http://openlibrary.org' + key + '.json?v=' + str(a['revision'] - 1)
-    prev = unmarshal(json.load(urllib.request.urlopen(url)))
+    prev = unmarshal(requests.get(url).json())
     assert prev['type'] == '/type/author'
     ol.save(key, prev, 'undelete author')
 

diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py
@@ -3,6 +3,7 @@
 import simplejson
 import web
 import re
+import requests
 
 from infogami.infobase import client
 
@@ -47,7 +48,7 @@ def info(self):
         if url.startswith("//"):
             url = "http:" + url
         try:
-            d = simplejson.loads(urllib.request.urlopen(url).read())
+            d = requests.get(url).json()
             d['created'] = h.parse_datetime(d['created'])
             if d['author'] == 'None':
                 d['author'] = None

diff --git a/openlibrary/coverstore/code.py b/openlibrary/coverstore/code.py
@@ -7,15 +7,12 @@
 import logging
 import array
 import memcache
-
-from six.moves.urllib.request import urlopen
+import requests
 
 from openlibrary.coverstore import config, db, ratelimit
 from openlibrary.coverstore.coverlib import save_image, read_image, read_file
 from openlibrary.coverstore.utils import safeint, rm_f, random_string, ol_things, ol_get, changequery, download
 
-from six.moves import urllib
-
 
 logger = logging.getLogger("coverstore")
 
@@ -165,8 +162,7 @@ def _locate_item(item):
     """Locates the archive.org item in the cluster and returns the server and directory.
     """
     print(time.asctime(), "_locate_item", item, file=web.debug)
-    text = urlopen("https://archive.org/metadata/" + item).read()
-    d = simplejson.loads(text)
+    d = requests.get("https://archive.org/metadata/" + item).json()
     return d['server'], d['dir']
 
 def locate_item(item):
@@ -283,8 +279,7 @@ def redirect(id):
     def get_ia_cover_url(self, identifier, size="M"):
         url = "https://archive.org/metadata/%s/metadata" % identifier
         try:
-            jsontext = urlopen(url).read()
-            d = simplejson.loads(jsontext).get("result", {})
+            d = requests.get(url).json().get("result", {})
         except (IOError, ValueError):
             return
 

diff --git a/openlibrary/plugins/books/readlinks.py b/openlibrary/plugins/books/readlinks.py
@@ -5,8 +5,8 @@
 from __future__ import print_function
 import sys
 import re
+import requests
 
-from six.moves import urllib
 import web
 from openlibrary.core import ia
 from openlibrary.core import helpers
@@ -44,10 +44,9 @@ def get_work_iaids(wkey):
     q = 'key:' + wkey
     stats.begin('solr', url=wkey)
     solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=%s&qt=standard&wt=json&fq=type:work" % (q, filter)
-    json_data = urllib.request.urlopen(solr_select).read()
+    reply = requests.get(solr_select).json()
     stats.end()
-    print(json_data)
-    reply = simplejson.loads(json_data)
+    print(reply)
     if reply['response']['numFound'] == 0:
         return []
     return reply["response"]['docs'][0].get(filter, [])
@@ -59,8 +58,7 @@ def get_works_iaids(wkeys):
     filter = 'ia'
     q = '+OR+'.join(['key:' + wkey for wkey in wkeys])
     solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=%s&qt=standard&wt=json&fq=type:work" % (q, filter)
-    json_data = urllib.request.urlopen(solr_select).read()
-    reply = simplejson.loads(json_data)
+    reply = requests.get(solr_select).json()
     if reply['response']['numFound'] == 0:
         return []
     return reply
@@ -73,8 +71,7 @@ def get_eids_for_wids(wids):
     filter = 'edition_key'
     q = '+OR+'.join(wids)
     solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=key,%s&qt=standard&wt=json&fq=type:work" % (q, filter)
-    json_data = urllib.request.urlopen(solr_select).read()
-    reply = simplejson.loads(json_data)
+    reply = requests.get(solr_select).json()
     if reply['response']['numFound'] == 0:
         return []
     rows = reply['response']['docs']
@@ -87,8 +84,7 @@ def get_solr_edition_records(iaids):
     filter = 'title'
     q = '+OR+'.join('ia:' + id for id in iaids)
     solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=key,%s&qt=standard&wt=json" % (q, filter)
-    json_data = urllib.request.urlopen(solr_select).read()
-    reply = simplejson.loads(json_data)
+    reply = requests.get(solr_select).json()
     if reply['response']['numFound'] == 0:
         return []
     rows = reply['response']['docs']

diff --git a/openlibrary/plugins/openlibrary/code.py b/openlibrary/plugins/openlibrary/code.py
@@ -4,8 +4,10 @@
 from __future__ import absolute_import
 from __future__ import print_function
 
+import requests
 import web
 import simplejson
+import json
 import os
 import sys
 import socket
@@ -733,20 +735,13 @@ def most_recent_change():
         return get_recent_changes(limit=1)[0]
 
 
-def wget(url):
-    # TODO: get rid of this, use requests instead.
-    try:
-        return urllib.request.urlopen(url).read()
-    except:
-        return ''
-
 
 @public
 def get_cover_id(key):
     try:
         _, cat, oln = key.split('/')
-        return simplejson.loads(wget('https://covers.openlibrary.org/%s/query?olid=%s&limit=1' % (cat, oln)))[0]
-    except (ValueError, IndexError, TypeError):
+        return requests.get('https://covers.openlibrary.org/%s/query?olid=%s&limit=1' % (cat, oln)).json()[0]
+    except (IndexError, json.decoder.JSONDecodeError, TypeError, ValueError):
         return None
 
 

diff --git a/openlibrary/plugins/upstream/borrow.py b/openlibrary/plugins/upstream/borrow.py
@@ -5,6 +5,7 @@
 import time
 import hmac
 import re
+import requests
 import simplejson
 import logging
 
@@ -571,7 +572,7 @@ def get_loan_status(resource_id):
 
     url = '%s/is_loaned_out/%s' % (loanstatus_url, resource_id)
     try:
-        response = simplejson.loads(urllib.request.urlopen(url).read())
+        response = requests.get(url).json()
         if len(response) == 0:
             # No outstanding loans
             return None
@@ -598,8 +599,7 @@ def get_all_loaned_out():
 
     url = '%s/is_loaned_out/' % loanstatus_url
     try:
-        response = simplejson.loads(urllib.request.urlopen(url).read())
-        return response
+        return requests.get(url).json()
     except IOError:
         raise Exception('Loan status server not available')
 

diff --git a/openlibrary/plugins/upstream/data.py b/openlibrary/plugins/upstream/data.py
@@ -5,23 +5,18 @@
 from infogami.utils import delegate
 from infogami.utils.view import public
 
-import simplejson
-
-from six.moves import urllib
+import requests
 
 
 IA_BASE_URL = config.get('ia_base_url')
 
 
-def wget(url):
-    return urllib.request.urlopen(url).read()
-
 
 def get_ol_dumps():
     """Get list of all archive.org items in the ol_exports collection uploaded by archive.org staff."""
     url = IA_BASE_URL + '/advancedsearch.php?q=(ol_dump+OR+ol_cdump)+AND+collection:ol_exports&fl[]=identifier&output=json&rows=1000'
-    d = simplejson.loads(wget(url))
-    return sorted(doc['identifier'] for doc in d['response']['docs'])
+    docs = requests.get(url).json()['response']['docs']
+    return sorted(doc['identifier'] for doc in docs)
 
 
 # cache the result for half an hour

diff --git a/openlibrary/plugins/upstream/models.py b/openlibrary/plugins/upstream/models.py
@@ -2,6 +2,7 @@
 
 import logging
 import re
+import requests
 import simplejson
 import web
 
@@ -27,7 +28,6 @@
 from openlibrary.utils.isbn import isbn_10_to_isbn_13, isbn_13_to_isbn_10
 
 import six
-from six.moves import urllib
 
 
 def follow_redirect(doc):
@@ -718,8 +718,7 @@ def get_covers(self, offset=0, limit=20):
 
         try:
             url = '%s/b/query?cmd=ids&olid=%s' % (get_coverstore_url(), ",".join(olids))
-            data = urllib.request.urlopen(url).read()
-            cover_ids = simplejson.loads(data)
+            cover_ids = requests.get(url).json()
         except IOError as e:
             print('ERROR in getting cover_ids', str(e), file=web.debug)
             cover_ids = {}

diff --git a/openlibrary/plugins/worksearch/subjects.py b/openlibrary/plugins/worksearch/subjects.py
@@ -3,6 +3,7 @@
 
 import web
 import re
+import requests
 import simplejson as json
 import logging
 from collections import defaultdict
@@ -12,7 +13,6 @@
 from infogami.plugins.api.code import jsonapi
 from infogami.utils import delegate, stats
 from infogami.utils.view import render, render_template, safeint
-from six.moves import urllib
 
 from openlibrary.core.models import Subject
 from openlibrary.core.lending import add_availability
@@ -400,7 +400,7 @@ def execute_ebook_count_query(q):
     solr_url = root_url % (rows, start, q)
 
     stats.begin("solr", url=solr_url)
-    response = json.load(urllib.request.urlopen(solr_url))['response']
+    response = requests.get(solr_url).json()['response']
     stats.end()
 
     num_found = response['numFound']
@@ -409,7 +409,7 @@ def execute_ebook_count_query(q):
         if start:
             solr_url = root_url % (rows, start, q)
             stats.begin("solr", url=solr_url)
-            response = json.load(urllib.request.urlopen(solr_url))['response']
+            response = requests.get(solr_url).json()['response']
             stats.end()
         for doc in response['docs']:
             for k in doc['edition_key']:

diff --git a/openlibrary/solr/find_modified_works.py b/openlibrary/solr/find_modified_works.py
@@ -4,12 +4,10 @@
 import argparse
 import datetime
 import itertools
-import json
 import os
 import sys
 import time
-
-from six.moves import urllib
+import requests
 
 
 BASE_URL = "https://openlibrary.org/recentchanges/"
@@ -47,7 +45,7 @@ def get_modified_works(frm, to):
     while frm < to:
         url = frm.strftime(BASE_URL+"%Y/%m/%d.json")
         logging.debug("Fetching changes from %s", url)
-        ret.append(extract_works(json.load(urllib.request.urlopen(url))))
+        ret.append(extract_works(requests.get(url).json()))
         frm += one_day
     return itertools.chain(*ret)
 
@@ -68,7 +66,7 @@ def poll_for_changes(start_time_file, max_chunk_size, delay):
     while True:
         url = date.strftime(BASE_URL+"%Y/%m/%d.json")
         logging.debug("-- Fetching changes from %s", url)
-        changes = list(json.load(urllib.request.urlopen(url)))
+        changes = list(requests.get(url).json())
         unseen_changes = list(x for x in changes if x['id'] not in seen)
         logging.debug("%d changes fetched", len(changes))
         logging.debug(" of which %d are unseen", len(unseen_changes))

diff --git a/openlibrary/utils/solr.py b/openlibrary/utils/solr.py
@@ -1,6 +1,7 @@
 """Python library for accessing Solr.
 """
 import re
+import requests
 import web
 import simplejson
 import logging
@@ -91,15 +92,15 @@ def select(self, query, fields=None, facets=None,
         if len(payload) < 500:
             url = url + "?" + payload
             logger.info("solr request: %s", url)
-            data = urllib.request.urlopen(url, timeout=10).read()
+            jsonData = requests.get(url, timeout=10).json()
         else:
             logger.info("solr request: %s ...", url)
             if not isinstance(payload, bytes):
                 payload = payload.encode("utf-8")
-            request = urllib.request.Request(url, payload, {"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8"})
-            data = urllib.request.urlopen(request, timeout=10).read()
+            headers={"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8"}
+            jsonData = requests.post(url, data=payload, headers=headers, timeout=10).json()
         return self._parse_solr_result(
-            simplejson.loads(data),
+            jsonData,
             doc_wrapper=doc_wrapper,
             facet_wrapper=facet_wrapper)