From 741ac6fc44ee83822902f1096bc2ba0444f867f3 Mon Sep 17 00:00:00 2001 From: aasifkhan7 Date: Fri, 14 Aug 2020 22:15:36 -0700 Subject: [PATCH 1/2] refactor to requests json --- openlibrary/catalog/amazon/add_covers.py | 7 ++----- openlibrary/catalog/utils/edit.py | 4 ++-- openlibrary/core/models.py | 3 ++- openlibrary/coverstore/code.py | 11 +++-------- openlibrary/plugins/books/readlinks.py | 16 ++++++---------- openlibrary/plugins/openlibrary/code.py | 13 ++++--------- openlibrary/plugins/upstream/borrow.py | 6 +++--- openlibrary/plugins/upstream/data.py | 11 +++-------- openlibrary/plugins/upstream/models.py | 5 ++--- openlibrary/plugins/worksearch/subjects.py | 6 +++--- openlibrary/solr/find_modified_works.py | 8 +++----- openlibrary/utils/solr.py | 9 +++++---- 12 files changed, 38 insertions(+), 61 deletions(-) diff --git a/openlibrary/catalog/amazon/add_covers.py b/openlibrary/catalog/amazon/add_covers.py index 545b30982ef..0f3b2fd91fb 100644 --- a/openlibrary/catalog/amazon/add_covers.py +++ b/openlibrary/catalog/amazon/add_covers.py @@ -1,8 +1,5 @@ from __future__ import print_function -import simplejson - -from six.moves.urllib.request import urlopen - +import requests base = 'http://ia331526.us.archive.org:7001/openlibrary.org/log/' @@ -10,7 +7,7 @@ offset = '2009-06-01:0' while not offset.startswith('2010-03-17:'): url = base + offset - ret = simplejson.load(urlopen(url)) + ret = requests.get(url).json() offset, data = ret['offset'], ret['data'] print(offset, len(data)) for i in data: diff --git a/openlibrary/catalog/utils/edit.py b/openlibrary/catalog/utils/edit.py index ec94e755efe..3441cc2b8e2 100644 --- a/openlibrary/catalog/utils/edit.py +++ b/openlibrary/catalog/utils/edit.py @@ -1,5 +1,6 @@ from __future__ import print_function import re +import requests import web import json from openlibrary.catalog.importer.db_read import get_mc @@ -7,7 +8,6 @@ from time import sleep import six -from six.moves import urllib re_meta_mrc = re.compile('([^/]+)_(meta|marc).(mrc|xml)') re_skip = re.compile(r'\b([A-Z]|Co|Dr|Jr|Capt|Mr|Mrs|Ms|Prof|Rev|Revd|Hon)\.$') @@ -58,7 +58,7 @@ def undelete_author(a, ol): key = a['key'] assert a['type'] == '/type/delete' url = 'http://openlibrary.org' + key + '.json?v=' + str(a['revision'] - 1) - prev = unmarshal(json.load(urllib.request.urlopen(url))) + prev = unmarshal(requests.get(url).json()) assert prev['type'] == '/type/author' ol.save(key, prev, 'undelete author') diff --git a/openlibrary/core/models.py b/openlibrary/core/models.py index 96053765b82..7a59a37c93b 100644 --- a/openlibrary/core/models.py +++ b/openlibrary/core/models.py @@ -3,6 +3,7 @@ import simplejson import web import re +import requests from infogami.infobase import client @@ -47,7 +48,7 @@ def info(self): if url.startswith("//"): url = "http:" + url try: - d = simplejson.loads(urllib.request.urlopen(url).read()) + d = requests.get(url).json() d['created'] = h.parse_datetime(d['created']) if d['author'] == 'None': d['author'] = None diff --git a/openlibrary/coverstore/code.py b/openlibrary/coverstore/code.py index 866508aeffb..93dd41a60d9 100644 --- a/openlibrary/coverstore/code.py +++ b/openlibrary/coverstore/code.py @@ -7,15 +7,12 @@ import logging import array import memcache - -from six.moves.urllib.request import urlopen +import requests from openlibrary.coverstore import config, db, ratelimit from openlibrary.coverstore.coverlib import save_image, read_image, read_file from openlibrary.coverstore.utils import safeint, rm_f, random_string, ol_things, ol_get, changequery, download -from six.moves import urllib - logger = logging.getLogger("coverstore") @@ -165,8 +162,7 @@ def _locate_item(item): """Locates the archive.org item in the cluster and returns the server and directory. """ print(time.asctime(), "_locate_item", item, file=web.debug) - text = urlopen("https://archive.org/metadata/" + item).read() - d = simplejson.loads(text) + d = requests.get("https://archive.org/metadata/" + item).json() return d['server'], d['dir'] def locate_item(item): @@ -283,8 +279,7 @@ def redirect(id): def get_ia_cover_url(self, identifier, size="M"): url = "https://archive.org/metadata/%s/metadata" % identifier try: - jsontext = urlopen(url).read() - d = simplejson.loads(jsontext).get("result", {}) + d = requests.get(url).json().get("result", {}) except (IOError, ValueError): return diff --git a/openlibrary/plugins/books/readlinks.py b/openlibrary/plugins/books/readlinks.py index ab631ecd1a8..1493c759950 100644 --- a/openlibrary/plugins/books/readlinks.py +++ b/openlibrary/plugins/books/readlinks.py @@ -5,8 +5,8 @@ from __future__ import print_function import sys import re +import requests -from six.moves import urllib import web from openlibrary.core import ia from openlibrary.core import helpers @@ -44,10 +44,9 @@ def get_work_iaids(wkey): q = 'key:' + wkey stats.begin('solr', url=wkey) solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=%s&qt=standard&wt=json&fq=type:work" % (q, filter) - json_data = urllib.request.urlopen(solr_select).read() + reply = requests.get(solr_select).json() stats.end() - print(json_data) - reply = simplejson.loads(json_data) + print(reply) if reply['response']['numFound'] == 0: return [] return reply["response"]['docs'][0].get(filter, []) @@ -59,8 +58,7 @@ def get_works_iaids(wkeys): filter = 'ia' q = '+OR+'.join(['key:' + wkey for wkey in wkeys]) solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=%s&qt=standard&wt=json&fq=type:work" % (q, filter) - json_data = urllib.request.urlopen(solr_select).read() - reply = simplejson.loads(json_data) + reply = requests.get(solr_select).json() if reply['response']['numFound'] == 0: return [] return reply @@ -73,8 +71,7 @@ def get_eids_for_wids(wids): filter = 'edition_key' q = '+OR+'.join(wids) solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=key,%s&qt=standard&wt=json&fq=type:work" % (q, filter) - json_data = urllib.request.urlopen(solr_select).read() - reply = simplejson.loads(json_data) + reply = requests.get(solr_select).json() if reply['response']['numFound'] == 0: return [] rows = reply['response']['docs'] @@ -87,8 +84,7 @@ def get_solr_edition_records(iaids): filter = 'title' q = '+OR+'.join('ia:' + id for id in iaids) solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=key,%s&qt=standard&wt=json" % (q, filter) - json_data = urllib.request.urlopen(solr_select).read() - reply = simplejson.loads(json_data) + reply = requests.get(solr_select).json() if reply['response']['numFound'] == 0: return [] rows = reply['response']['docs'] diff --git a/openlibrary/plugins/openlibrary/code.py b/openlibrary/plugins/openlibrary/code.py index 082a4b776a4..26d3f039c5d 100644 --- a/openlibrary/plugins/openlibrary/code.py +++ b/openlibrary/plugins/openlibrary/code.py @@ -4,8 +4,10 @@ from __future__ import absolute_import from __future__ import print_function +import requests import web import simplejson +import json import os import sys import socket @@ -733,20 +735,13 @@ def most_recent_change(): return get_recent_changes(limit=1)[0] -def wget(url): - # TODO: get rid of this, use requests instead. - try: - return urllib.request.urlopen(url).read() - except: - return '' - @public def get_cover_id(key): try: _, cat, oln = key.split('/') - return simplejson.loads(wget('https://covers.openlibrary.org/%s/query?olid=%s&limit=1' % (cat, oln)))[0] - except (ValueError, IndexError, TypeError): + return requests.get('https://covers.openlibrary.org/%s/query?olid=%s&limit=1' % (cat, oln)).json()[0] + except (IndexError, json.decoder.JSONDecodeError, TypeError, ValueError): return None diff --git a/openlibrary/plugins/upstream/borrow.py b/openlibrary/plugins/upstream/borrow.py index 4c4fca8c424..12259baa4ef 100644 --- a/openlibrary/plugins/upstream/borrow.py +++ b/openlibrary/plugins/upstream/borrow.py @@ -5,6 +5,7 @@ import time import hmac import re +import requests import simplejson import logging @@ -571,7 +572,7 @@ def get_loan_status(resource_id): url = '%s/is_loaned_out/%s' % (loanstatus_url, resource_id) try: - response = simplejson.loads(urllib.request.urlopen(url).read()) + response = requests.get(url).json() if len(response) == 0: # No outstanding loans return None @@ -598,8 +599,7 @@ def get_all_loaned_out(): url = '%s/is_loaned_out/' % loanstatus_url try: - response = simplejson.loads(urllib.request.urlopen(url).read()) - return response + return requests.get(url).json() except IOError: raise Exception('Loan status server not available') diff --git a/openlibrary/plugins/upstream/data.py b/openlibrary/plugins/upstream/data.py index 57085fc0708..821c4b29b43 100644 --- a/openlibrary/plugins/upstream/data.py +++ b/openlibrary/plugins/upstream/data.py @@ -5,23 +5,18 @@ from infogami.utils import delegate from infogami.utils.view import public -import simplejson - -from six.moves import urllib +import requests IA_BASE_URL = config.get('ia_base_url') -def wget(url): - return urllib.request.urlopen(url).read() - def get_ol_dumps(): """Get list of all archive.org items in the ol_exports collection uploaded by archive.org staff.""" url = IA_BASE_URL + '/advancedsearch.php?q=(ol_dump+OR+ol_cdump)+AND+collection:ol_exports&fl[]=identifier&output=json&rows=1000' - d = simplejson.loads(wget(url)) - return sorted(doc['identifier'] for doc in d['response']['docs']) + docs = requests.get(url).json()['response']['docs'] + return sorted(doc['identifier'] for doc in docs) # cache the result for half an hour diff --git a/openlibrary/plugins/upstream/models.py b/openlibrary/plugins/upstream/models.py index 9cdef5dff58..8026df06c31 100644 --- a/openlibrary/plugins/upstream/models.py +++ b/openlibrary/plugins/upstream/models.py @@ -2,6 +2,7 @@ import logging import re +import requests import simplejson import web @@ -27,7 +28,6 @@ from openlibrary.utils.isbn import isbn_10_to_isbn_13, isbn_13_to_isbn_10 import six -from six.moves import urllib def follow_redirect(doc): @@ -718,8 +718,7 @@ def get_covers(self, offset=0, limit=20): try: url = '%s/b/query?cmd=ids&olid=%s' % (get_coverstore_url(), ",".join(olids)) - data = urllib.request.urlopen(url).read() - cover_ids = simplejson.loads(data) + cover_ids = requests.get(url).json() except IOError as e: print('ERROR in getting cover_ids', str(e), file=web.debug) cover_ids = {} diff --git a/openlibrary/plugins/worksearch/subjects.py b/openlibrary/plugins/worksearch/subjects.py index 5a68979829f..6ec5fad7f75 100644 --- a/openlibrary/plugins/worksearch/subjects.py +++ b/openlibrary/plugins/worksearch/subjects.py @@ -3,6 +3,7 @@ import web import re +import requests import simplejson as json import logging from collections import defaultdict @@ -12,7 +13,6 @@ from infogami.plugins.api.code import jsonapi from infogami.utils import delegate, stats from infogami.utils.view import render, render_template, safeint -from six.moves import urllib from openlibrary.core.models import Subject from openlibrary.core.lending import add_availability @@ -400,7 +400,7 @@ def execute_ebook_count_query(q): solr_url = root_url % (rows, start, q) stats.begin("solr", url=solr_url) - response = json.load(urllib.request.urlopen(solr_url))['response'] + response = requests.get(solr_url).json()['response'] stats.end() num_found = response['numFound'] @@ -409,7 +409,7 @@ def execute_ebook_count_query(q): if start: solr_url = root_url % (rows, start, q) stats.begin("solr", url=solr_url) - response = json.load(urllib.request.urlopen(solr_url))['response'] + response = requests.get(solr_url).json()['response'] stats.end() for doc in response['docs']: for k in doc['edition_key']: diff --git a/openlibrary/solr/find_modified_works.py b/openlibrary/solr/find_modified_works.py index 0a260879d6b..e73b55a141b 100644 --- a/openlibrary/solr/find_modified_works.py +++ b/openlibrary/solr/find_modified_works.py @@ -4,12 +4,10 @@ import argparse import datetime import itertools -import json import os import sys import time - -from six.moves import urllib +import requests BASE_URL = "https://openlibrary.org/recentchanges/" @@ -47,7 +45,7 @@ def get_modified_works(frm, to): while frm < to: url = frm.strftime(BASE_URL+"%Y/%m/%d.json") logging.debug("Fetching changes from %s", url) - ret.append(extract_works(json.load(urllib.request.urlopen(url)))) + ret.append(extract_works(requests.get(url).json())) frm += one_day return itertools.chain(*ret) @@ -68,7 +66,7 @@ def poll_for_changes(start_time_file, max_chunk_size, delay): while True: url = date.strftime(BASE_URL+"%Y/%m/%d.json") logging.debug("-- Fetching changes from %s", url) - changes = list(json.load(urllib.request.urlopen(url))) + changes = list(requests.get(url).json()) unseen_changes = list(x for x in changes if x['id'] not in seen) logging.debug("%d changes fetched", len(changes)) logging.debug(" of which %d are unseen", len(unseen_changes)) diff --git a/openlibrary/utils/solr.py b/openlibrary/utils/solr.py index a56c6af1833..8cbb29d1bd2 100644 --- a/openlibrary/utils/solr.py +++ b/openlibrary/utils/solr.py @@ -1,6 +1,7 @@ """Python library for accessing Solr. """ import re +import requests import web import simplejson import logging @@ -91,15 +92,15 @@ def select(self, query, fields=None, facets=None, if len(payload) < 500: url = url + "?" + payload logger.info("solr request: %s", url) - data = urllib.request.urlopen(url, timeout=10).read() + jsonData = requests.get(url, timeout=10).json() else: logger.info("solr request: %s ...", url) if not isinstance(payload, bytes): payload = payload.encode("utf-8") - request = urllib.request.Request(url, payload, {"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8"}) - data = urllib.request.urlopen(request, timeout=10).read() + headers={"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8"} + jsonData = requests.post(url, data=payload, headers=headers, timeout=10).json() return self._parse_solr_result( - simplejson.loads(data), + jsonData, doc_wrapper=doc_wrapper, facet_wrapper=facet_wrapper) From 39c1c877c2c11e7849253f11ef40d42ee71fd09d Mon Sep 17 00:00:00 2001 From: Aasif Khan Date: Fri, 25 Sep 2020 18:16:54 +0530 Subject: [PATCH 2/2] fix PEP errors --- openlibrary/plugins/openlibrary/code.py | 4 +++- openlibrary/utils/solr.py | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/openlibrary/plugins/openlibrary/code.py b/openlibrary/plugins/openlibrary/code.py index 693b5c5fc0d..ee3d2c1b491 100644 --- a/openlibrary/plugins/openlibrary/code.py +++ b/openlibrary/plugins/openlibrary/code.py @@ -743,7 +743,9 @@ def most_recent_change(): def get_cover_id(key): try: _, cat, oln = key.split('/') - return requests.get('https://covers.openlibrary.org/%s/query?olid=%s&limit=1' % (cat, oln)).json()[0] + return requests.get( + "https://covers.openlibrary.org/%s/query?olid=%s&limit=1" % (cat, oln) + ).json()[0] except (IndexError, json.decoder.JSONDecodeError, TypeError, ValueError): return None diff --git a/openlibrary/utils/solr.py b/openlibrary/utils/solr.py index 8cbb29d1bd2..22b5e15b75b 100644 --- a/openlibrary/utils/solr.py +++ b/openlibrary/utils/solr.py @@ -97,8 +97,12 @@ def select(self, query, fields=None, facets=None, logger.info("solr request: %s ...", url) if not isinstance(payload, bytes): payload = payload.encode("utf-8") - headers={"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8"} - jsonData = requests.post(url, data=payload, headers=headers, timeout=10).json() + headers = { + "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8" + } + jsonData = requests.post( + url, data=payload, headers=headers, timeout=10 + ).json() return self._parse_solr_result( jsonData, doc_wrapper=doc_wrapper,