diff --git a/openlibrary/solr/update_work.py b/openlibrary/solr/update_work.py index e137a90b670..176b5fc55da 100644 --- a/openlibrary/solr/update_work.py +++ b/openlibrary/solr/update_work.py @@ -3,6 +3,7 @@ import logging import os import re +import requests import sys import time from collections import defaultdict @@ -10,7 +11,6 @@ import simplejson as json import six -from six.moves import urllib from six.moves.http_client import HTTPConnection import web from lxml.etree import tostring, Element, SubElement @@ -42,14 +42,14 @@ solr_host = None -def urlopen(url, data=None): +def urlopen(url, params=None, data=None): version = "%s.%s.%s" % sys.version_info[:3] user_agent = 'Mozilla/5.0 (openlibrary; %s) Python/%s' % (__file__, version) headers = { 'User-Agent': user_agent } - req = urllib.request.Request(url, data, headers) - return urllib.request.urlopen(req) + response = requests.post(url, params=params, data=data, headers=headers) + return response def get_solr(): """ @@ -1110,8 +1110,7 @@ def get_subject(key): 'facet.limit': 100 } base_url = 'http://' + get_solr() + '/solr/select' - url = base_url + '?' + urllib.parse.urlencode(params) - result = json.load(urlopen(url)) + result = urlopen(base_url, params).json() work_count = result['response']['numFound'] facets = result['facet_counts']['facet_fields'].get(facet_field, []) @@ -1249,7 +1248,7 @@ def update_author(akey, a=None, handle_redirects=True): logger.info("urlopen %s", url) - reply = json.load(urlopen(url)) + reply = urlopen(url).json() work_count = reply['response']['numFound'] docs = reply['response'].get('docs', []) top_work = None @@ -1323,7 +1322,7 @@ def solr_select_work(edition_key): get_solr(), url_quote(edition_key) ) - reply = json.load(urlopen(url)) + reply = urlopen(url).json() docs = reply['response'].get('docs', []) if docs: return docs[0]['key'] # /works/ prefix is in solr diff --git a/openlibrary/tests/solr/test_update_work.py b/openlibrary/tests/solr/test_update_work.py index c975d46e152..f8db60357a6 100644 --- a/openlibrary/tests/solr/test_update_work.py +++ b/openlibrary/tests/solr/test_update_work.py @@ -1,4 +1,9 @@ import pytest +import unittest +try: + from unittest import mock +except ImportError: + import mock from openlibrary.solr import update_work from openlibrary.solr.data_provider import DataProvider @@ -425,8 +430,39 @@ def test_ddcs(self, doc_ddcs, solr_ddcs, sort_ddc_index): assert 'ddc' not in d assert 'ddc_sort' not in d +def mocked_urlopen(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + if args[0] == ( + "http://solr:8080/solr/select?wt=json&json.nl=arrarr&q=author_key:OL25A&" + "sort=edition_count+desc&rows=1&fl=title,subtitle&facet=true&facet.mincount=1&" + "facet.field=subject_facet&facet.field=time_facet&facet.field=person_facet&" + "facet.field=place_facet" + ): + return MockResponse( + { + "facet_counts": { + "facet_fields": { + "place_facet": [], + "person_facet": [], + "subject_facet": [], + "time_facet": [], + } + }, + "response": {"numFound": 0}, + }, + 200, + ) + + return MockResponse(None, 404) -class Test_update_items(): +class Test_update_items(unittest.TestCase): @classmethod def setup_class(cls): update_work.data_provider = FakeDataProvider() @@ -449,20 +485,11 @@ def test_redirect_author(self): assert isinstance(requests[0], update_work.DeleteRequest) assert requests[0].toxml() == 'key:/authors/OL24A' + @mock.patch('openlibrary.solr.update_work.urlopen', side_effect=mocked_urlopen) def test_update_author(self, monkeypatch): update_work.data_provider = FakeDataProvider([ make_author(key='/authors/OL25A', name='Somebody') ]) - # Minimal Solr response, author not found in Solr - solr_response = """{ - "facet_counts": { - "facet_fields": { - "place_facet": [], "person_facet": [], "subject_facet": [], "time_facet": [] - } - }, - "response": {"numFound": 0} - }""" - monkeypatch.setattr(update_work, 'urlopen', lambda url: StringIO(solr_response)) requests = update_work.update_author('/authors/OL25A') assert len(requests) == 1 assert isinstance(requests, list)