From 46d499785da55af012aa2df5b8e3cdc295c68dab Mon Sep 17 00:00:00 2001 From: Darrel Herbst Date: Thu, 14 Jan 2021 09:45:05 -0500 Subject: [PATCH 1/5] Resolves lint warnings with reformatting, et al. --- openlibrary/plugins/ol_infobase.py | 81 +++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 23 deletions(-) diff --git a/openlibrary/plugins/ol_infobase.py b/openlibrary/plugins/ol_infobase.py index 731c413e681..a5cf5646721 100644 --- a/openlibrary/plugins/ol_infobase.py +++ b/openlibrary/plugins/ol_infobase.py @@ -23,6 +23,7 @@ logger = logging.getLogger("infobase.ol") + def init_plugin(): """Initialize infobase plugin.""" from infogami.infobase import common, dbstore, server, logger as infobase_logger @@ -45,30 +46,31 @@ def init_plugin(): if ol: # install custom indexer - #XXX-Anand: this might create some trouble. Commenting out. + # XXX-Anand: this might create some trouble. Commenting out. # ol.store.indexer = Indexer() if config.get('http_listeners'): logger.info("setting up http listeners") ol.add_trigger(None, http_notify) - ## memcache invalidator is not required now. It was added for future use. - #_cache = config.get("cache", {}) - #if _cache.get("type") == "memcache": - # logger.info("setting up memcache invalidater") - # ol.add_trigger(None, MemcacheInvalidater()) + # # memcache invalidator is not required now. It was added for future use. + # _cache = config.get("cache", {}) + # if _cache.get("type") == "memcache": + # logger.info("setting up memcache invalidater") + # ol.add_trigger(None, MemcacheInvalidater()) # hook to add count functionality - server.app.add_mapping("/([^/]*)/count_editions_by_author", __name__ + ".count_editions_by_author") - server.app.add_mapping("/([^/]*)/count_editions_by_work", __name__ + ".count_editions_by_work") - server.app.add_mapping("/([^/]*)/count_edits_by_user", __name__ + ".count_edits_by_user") - server.app.add_mapping("/([^/]*)/most_recent", __name__ + ".most_recent") - server.app.add_mapping("/([^/]*)/clear_cache", __name__ + ".clear_cache") - server.app.add_mapping("/([^/]*)/stats/(\d\d\d\d-\d\d-\d\d)", __name__ + ".stats") - server.app.add_mapping("/([^/]*)/has_user", __name__ + ".has_user") - server.app.add_mapping("/([^/]*)/olid_to_key", __name__ + ".olid_to_key") - server.app.add_mapping("/_reload_config", __name__ + ".reload_config") - server.app.add_mapping("/_inspect", __name__ + "._inspect") + server.app.add_mapping(r"/([^/]*)/count_editions_by_author", __name__ + ".count_editions_by_author") + server.app.add_mapping(r"/([^/]*)/count_editions_by_work", __name__ + ".count_editions_by_work") + server.app.add_mapping(r"/([^/]*)/count_edits_by_user", __name__ + ".count_edits_by_user") + server.app.add_mapping(r"/([^/]*)/most_recent", __name__ + ".most_recent") + server.app.add_mapping(r"/([^/]*)/clear_cache", __name__ + ".clear_cache") + server.app.add_mapping(r"/([^/]*)/stats/(\d\d\d\d-\d\d-\d\d)", __name__ + ".stats") + server.app.add_mapping(r"/([^/]*)/has_user", __name__ + ".has_user") + server.app.add_mapping(r"/([^/]*)/olid_to_key", __name__ + ".olid_to_key") + server.app.add_mapping(r"/_reload_config", __name__ + ".reload_config") + server.app.add_mapping(r"/_inspect", __name__ + "._inspect") + def setup_logging(): try: @@ -81,6 +83,7 @@ def setup_logging(): print("Unable to set logging configuration:", str(e), file=sys.stderr) raise + class reload_config: @server.jsonify def POST(self): @@ -88,6 +91,7 @@ def POST(self): setup_logging() return {"ok": "true"} + class _inspect: """Backdoor to inspect the running process. @@ -98,13 +102,15 @@ def GET(self): try: import _inspect return _inspect.inspect() - except Exception as e: + except Exception: return traceback.format_exc() + def get_db(): site = server.get_site('openlibrary.org') return site.store.db + @web.memoize def get_property_id(type, name): db = get_db() @@ -114,12 +120,14 @@ def get_property_id(type, name): except IndexError: return None + def get_thing_id(key): try: return get_db().where('thing', key=key)[0].id except IndexError: return None + def count(table, type, key, value): pid = get_property_id(type, key) @@ -128,18 +136,21 @@ def count(table, type, key, value): return 0 return get_db().query("SELECT count(*) FROM " + table + " WHERE key_id=$pid AND value=$value_id", vars=locals())[0].count + class count_editions_by_author: @server.jsonify def GET(self, sitename): i = server.input('key') return count('edition_ref', '/type/edition', 'authors', i.key) + class count_editions_by_work: @server.jsonify def GET(self, sitename): i = server.input('key') return count('edition_ref', '/type/edition', 'works', i.key) + class count_edits_by_user: @server.jsonify def GET(self, sitename): @@ -147,6 +158,7 @@ def GET(self, sitename): author_id = get_thing_id(i.key) return get_db().query("SELECT count(*) as count FROM transaction WHERE author_id=$author_id", vars=locals())[0].count + class has_user: @server.jsonify def GET(self, sitename): @@ -161,6 +173,7 @@ def GET(self, sitename): d = get_db().query("SELECT * from thing WHERE lower(key) = $key AND type=$type_user", vars=locals()) return bool(d) + class stats: @server.jsonify def GET(self, sitename, today): @@ -203,12 +216,15 @@ def count(self, tables, where, vars): vars=vars )[0].value + most_recent_change = None + def invalidate_most_recent_change(event): global most_recent_change most_recent_change = None + class most_recent: @server.jsonify def GET(self, sitename): @@ -218,6 +234,7 @@ def GET(self, sitename): most_recent_change = site.versions({'limit': 1})[0] return most_recent_change + class clear_cache: @server.jsonify def POST(self, sitename): @@ -225,6 +242,7 @@ def POST(self, sitename): cache.global_cache.clear() return {'done': True} + class olid_to_key: @server.jsonify def GET(self, sitename): @@ -233,6 +251,7 @@ def GET(self, sitename): key = d and d[0].key or None return {'olid': i.olid, 'key': key} + def write(path, data): dir = os.path.dirname(path) if not os.path.exists(dir): @@ -241,19 +260,23 @@ def write(path, data): f.write(data) f.close() + def save_error(dir, prefix): try: logger.error("Error", exc_info=True) error = web.djangoerror() now = datetime.datetime.utcnow() - path = '%s/%04d-%02d-%02d/%s-%02d%02d%02d.%06d.html' % (dir, \ + path = '%s/%04d-%02d-%02d/%s-%02d%02d%02d.%06d.html' % ( + dir, now.year, now.month, now.day, prefix, - now.hour, now.minute, now.second, now.microsecond) + now.hour, now.minute, now.second, now.microsecond + ) logger.error('Error saved to %s', path) write(path, web.safestr(error)) - except: + except Exception: logger.error('Exception in saving the error', exc_info=True) + def get_object_data(site, thing): """Return expanded data of specified object.""" def expand(value): @@ -272,6 +295,7 @@ def expand(value): d[k] = expand(v) return d + def http_notify(site, old, new): """Notify listeners over http.""" if isinstance(new, dict): @@ -285,7 +309,8 @@ def http_notify(site, old, new): # optimize the most common case. # The following prefixes are never cached at the client. Avoid cache invalidation in that case. - not_cached = ['/b/', '/a/', '/books/', '/authors/', '/works/', '/subjects/', '/publishers/', '/user/', '/usergroup/', '/people/'] + not_cached = ['/b/', '/a/', '/books/', '/authors/', '/works/', '/subjects/', '/publishers/', '/user/', + '/usergroup/', '/people/'] for prefix in not_cached: if key.startswith(prefix): return @@ -294,11 +319,12 @@ def http_notify(site, old, new): try: response = urllib.request.urlopen(url, json).read() print('http_notify', repr(url), repr(key), repr(response), file=web.debug) - except: + except Exception: print('failed to send http_notify', repr(url), repr(key), file=web.debug) import traceback traceback.print_exc() + class MemcacheInvalidater: def __init__(self): self.memcache = self.get_memcache_client() @@ -362,6 +388,7 @@ def invalidate_work(self, site, old): def invalidate_default(self, site, old): yield old.key + # openlibrary.utils can't be imported directly because # openlibrary.plugins.openlibrary masks openlibrary module olmemcache = __import__('openlibrary.utils.olmemcache', None, None, ['x']) @@ -373,8 +400,10 @@ def MemcachedDict(servers=None): client = olmemcache.Client(servers) return cache.MemcachedDict(memcache_client=client) + cache.register_cache('memcache', MemcachedDict) + def _process_key(key): mapping = ( '/l/', '/languages/', @@ -387,6 +416,7 @@ def _process_key(key): return new + key[len(old):] return key + def _process_data(data): if isinstance(data, list): return [_process_data(d) for d in data] @@ -397,6 +427,7 @@ def _process_data(data): else: return data + def safeint(value, default=0): """Convers the value to integer. Returns 0, if the conversion fails.""" try: @@ -404,6 +435,7 @@ def safeint(value, default=0): except Exception: return default + def fix_table_of_contents(table_of_contents): """Some books have bad table_of_contents. This function converts them in to correct format. """ @@ -431,6 +463,7 @@ def row(r): d = [row(r) for r in table_of_contents] return [row for row in d if any(row.values())] + def process_json(key, json): if key is None or json is None: return None @@ -445,12 +478,14 @@ def process_json(key, json): json = simplejson.dumps(data) return json + dbstore.process_json = process_json _Indexer = dbstore.Indexer re_normalize = re.compile('[^[:alphanum:] ]', re.U) + class OLIndexer(_Indexer): """OL custom indexer to index normalized_title etc. """ @@ -489,7 +524,7 @@ def normalize_edition_title(self, title): # http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string def strip_accents(s): - return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')) + return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')) norm = strip_accents(title).lower() norm = norm.replace(' and ', ' ') From d25bb554fb4ba4141f58db7209ff04bb734d2981 Mon Sep 17 00:00:00 2001 From: Darrel Herbst Date: Thu, 14 Jan 2021 09:56:07 -0500 Subject: [PATCH 2/5] Ran isort --profile black on ol_infobase --- openlibrary/plugins/ol_infobase.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/openlibrary/plugins/ol_infobase.py b/openlibrary/plugins/ol_infobase.py index a5cf5646721..2625399afe7 100644 --- a/openlibrary/plugins/ol_infobase.py +++ b/openlibrary/plugins/ol_infobase.py @@ -2,31 +2,36 @@ """Open Library plugin for infobase. """ from __future__ import print_function -import os + import datetime -import simplejson import logging import logging.config +import os +import re import sys import traceback -import re import unicodedata +import simplejson import six -from six.moves import urllib import web -from infogami.infobase import config, common, server, cache, dbstore +from six.moves import urllib + +from infogami.infobase import cache, common, config, dbstore, server + +from ..utils.isbn import isbn_10_to_isbn_13, isbn_13_to_isbn_10, normalize_isbn # relative import from .openlibrary import schema -from ..utils.isbn import isbn_10_to_isbn_13, isbn_13_to_isbn_10, normalize_isbn logger = logging.getLogger("infobase.ol") def init_plugin(): """Initialize infobase plugin.""" - from infogami.infobase import common, dbstore, server, logger as infobase_logger + from infogami.infobase import common, dbstore + from infogami.infobase import logger as infobase_logger + from infogami.infobase import server dbstore.default_schema = schema.get_schema() # Replace infobase Indexer with OL custom Indexer From 5c23c4b3dac8ca51f5ae84bda360f1a78dfaf78d Mon Sep 17 00:00:00 2001 From: Darrel Herbst Date: Thu, 14 Jan 2021 10:27:08 -0500 Subject: [PATCH 3/5] Replace simplejson with json and urlopen with requests.get --- openlibrary/plugins/ol_infobase.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/openlibrary/plugins/ol_infobase.py b/openlibrary/plugins/ol_infobase.py index 2625399afe7..4930778a241 100644 --- a/openlibrary/plugins/ol_infobase.py +++ b/openlibrary/plugins/ol_infobase.py @@ -4,6 +4,7 @@ from __future__ import print_function import datetime +import json import logging import logging.config import os @@ -12,10 +13,9 @@ import traceback import unicodedata -import simplejson +import requests import six import web -from six.moves import urllib from infogami.infobase import cache, common, config, dbstore, server @@ -309,7 +309,7 @@ def http_notify(site, old, new): # new is a thing. call format_data to get the actual data. data = new.format_data() - json = simplejson.dumps(data) + json_data = json.dumps(data) key = data['key'] # optimize the most common case. @@ -322,7 +322,8 @@ def http_notify(site, old, new): for url in config.http_listeners: try: - response = urllib.request.urlopen(url, json).read() + response = requests.get(url, json_data).text + response.raise_for_status() print('http_notify', repr(url), repr(key), repr(response), file=web.debug) except Exception: print('failed to send http_notify', repr(url), repr(key), file=web.debug) @@ -469,19 +470,19 @@ def row(r): return [row for row in d if any(row.values())] -def process_json(key, json): - if key is None or json is None: +def process_json(key, json_data): + if key is None or json_data is None: return None base = key[1:].split('/')[0] if base in ['authors', 'books', 'works', 'languages', 'people', 'usergroup', 'permission']: - data = simplejson.loads(json) + data = json.loads(json_data) data = _process_data(data) if base == 'books' and 'table_of_contents' in data: data['table_of_contents'] = fix_table_of_contents(data['table_of_contents']) - json = simplejson.dumps(data) - return json + json_data = json.dumps(data) + return json_data dbstore.process_json = process_json From 3ff44f77d7fc3c096ffc43e05aabbf880cfe6af5 Mon Sep 17 00:00:00 2001 From: Darrel Herbst Date: Thu, 14 Jan 2021 13:18:40 -0500 Subject: [PATCH 4/5] use more descriptive name, and add named param for requests.get --- openlibrary/plugins/ol_infobase.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/openlibrary/plugins/ol_infobase.py b/openlibrary/plugins/ol_infobase.py index 4930778a241..371d23f6e31 100644 --- a/openlibrary/plugins/ol_infobase.py +++ b/openlibrary/plugins/ol_infobase.py @@ -322,7 +322,7 @@ def http_notify(site, old, new): for url in config.http_listeners: try: - response = requests.get(url, json_data).text + response = requests.get(url, params=json_data).text response.raise_for_status() print('http_notify', repr(url), repr(key), repr(response), file=web.debug) except Exception: @@ -470,19 +470,19 @@ def row(r): return [row for row in d if any(row.values())] -def process_json(key, json_data): - if key is None or json_data is None: +def process_json(key, json_str): + if key is None or json_str is None: return None base = key[1:].split('/')[0] if base in ['authors', 'books', 'works', 'languages', 'people', 'usergroup', 'permission']: - data = json.loads(json_data) + data = json.loads(json_str) data = _process_data(data) if base == 'books' and 'table_of_contents' in data: data['table_of_contents'] = fix_table_of_contents(data['table_of_contents']) - json_data = json.dumps(data) - return json_data + json_str = json.dumps(data) + return json_str dbstore.process_json = process_json From 532c1406e96b4ee361c39b78bbefdbdaee584db6 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Mon, 18 Jan 2021 10:06:34 +0100 Subject: [PATCH 5/5] Requests raise_for_status() needs to be called from a response --- openlibrary/plugins/ol_infobase.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openlibrary/plugins/ol_infobase.py b/openlibrary/plugins/ol_infobase.py index 371d23f6e31..745cb4c462f 100644 --- a/openlibrary/plugins/ol_infobase.py +++ b/openlibrary/plugins/ol_infobase.py @@ -322,9 +322,9 @@ def http_notify(site, old, new): for url in config.http_listeners: try: - response = requests.get(url, params=json_data).text + response = requests.get(url, params=json_data) response.raise_for_status() - print('http_notify', repr(url), repr(key), repr(response), file=web.debug) + print('http_notify', repr(url), repr(key), repr(response.text), file=web.debug) except Exception: print('failed to send http_notify', repr(url), repr(key), file=web.debug) import traceback