From dedc92b9b36c0a7b487f4eac273609ff58a4ec19 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sat, 6 Apr 2019 23:24:00 +0200 Subject: [PATCH 01/15] Fix simplify_index on scalars --- HISTORY.md | 14 ++++++++++++++ tests/test_indicator.py | 6 ++++++ world_bank_data/indicator.py | 8 +++++++- world_bank_data/version.py | 2 +- 4 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 HISTORY.md diff --git a/HISTORY.md b/HISTORY.md new file mode 100644 index 0000000..0935f6d --- /dev/null +++ b/HISTORY.md @@ -0,0 +1,14 @@ +Release History +=============== + +0.1.1 (2019-04-??) +------------------ + +**BugFixes** + +- Fix `simplify_index` when the data is a scalar + +0.1.0 (2019-04-06) +------------------ + +Initial release diff --git a/tests/test_indicator.py b/tests/test_indicator.py index 769229c..196eaef 100644 --- a/tests/test_indicator.py +++ b/tests/test_indicator.py @@ -1,4 +1,5 @@ import pytest +import numbers from world_bank_data import get_indicators, get_series from .tools import assert_numeric_or_string @@ -44,6 +45,11 @@ def test_indicator_use_id(): assert idx.index.names == ['Country'] +def test_indicator_simplify_scalar(): + pop = get_series('SP.POP.TOTL', 'CHN', mrv=1, simplify_index=True) + assert isinstance(pop, numbers.Number) + + def test_indicator_date(): idx = get_series('SP.POP.TOTL', date='2010:2018') assert len(idx.index) > 200 * 8 diff --git a/world_bank_data/indicator.py b/world_bank_data/indicator.py index 36a1d46..746a2e6 100644 --- a/world_bank_data/indicator.py +++ b/world_bank_data/indicator.py @@ -53,9 +53,15 @@ def get_series(indicator, country=None, id_or_value=None, simplify_index=False, index = [dim for dim in index if len(dim) != 1] if len(index) > 1: + # Our series is indexed by a multi-index index = pd.MultiIndex.from_product(index, names=[dim.name for dim in index]) - else: + elif len(index) == 1: + # A simple index is enough index = index[0] + else: + # Index has dimension zero. Data should be a scalar + assert len(value) == 1, 'Data has no dimension and was expected to be a scalar' + return value[0] return pd.Series(value, index=index, name=indicator) diff --git a/world_bank_data/version.py b/world_bank_data/version.py index 973e52c..b751c89 100644 --- a/world_bank_data/version.py +++ b/world_bank_data/version.py @@ -1,3 +1,3 @@ """version number""" -__version__ = '0.1.0' +__version__ = '0.1.1-dev' From 2f9dbe228f9dac5d4d980e81ca460fdc2f542adf Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 00:41:17 +0200 Subject: [PATCH 02/15] Load ADI indicators Fix #4 --- tests/test_indicator.py | 6 ++++++ world_bank_data/indicator.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_indicator.py b/tests/test_indicator.py index 196eaef..d601182 100644 --- a/tests/test_indicator.py +++ b/tests/test_indicator.py @@ -37,6 +37,12 @@ def test_indicator_most_recent_value(): assert_numeric_or_string(idx_mrv5) +def test_non_wdi_indicator(): + idx = get_series('TX.VAL.MRCH.CD.WB', mrv=1) + assert len(idx.index) > 50 + assert_numeric_or_string(idx) + + def test_indicator_use_id(): idx = get_series('SP.POP.TOTL', mrv=1, id_or_value='id', simplify_index=True) assert len(idx.index) > 200 diff --git a/world_bank_data/indicator.py b/world_bank_data/indicator.py index 746a2e6..e68030d 100644 --- a/world_bank_data/indicator.py +++ b/world_bank_data/indicator.py @@ -2,9 +2,9 @@ import numpy as np import pandas as pd +import world_bank_data.options as options from .request import wb_get, wb_get_table from .search import search -import world_bank_data.options as options def get_indicators(indicator=None, language=None, id_or_value=None, **params): @@ -39,7 +39,7 @@ def get_series(indicator, country=None, id_or_value=None, simplify_index=False, id_or_value = id_or_value or options.id_or_value idx = wb_get('country', country, 'indicator', indicator, data_format='jsonstat', **params) - idx = idx['WDI'] + _, idx = idx.popitem() dimension = idx.pop('dimension') value = idx.pop('value') From ee2d901c4e7d139a3cccc9e470e20f9b46c2dd25 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 00:43:11 +0200 Subject: [PATCH 03/15] Raise error message on error And allow integer parameters Fix #3 --- tests/test_indicator.py | 9 +++++++++ tests/test_others.py | 16 ++++++++++++++++ world_bank_data/request.py | 23 +++++++++++++++++------ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/tests/test_indicator.py b/tests/test_indicator.py index d601182..9feaf5f 100644 --- a/tests/test_indicator.py +++ b/tests/test_indicator.py @@ -27,6 +27,15 @@ def test_indicators_topic(): assert_numeric_or_string(idx) +def test_indicators_source(): + idx = get_indicators(source=11) + assert len(idx.index) < 2000 + assert_numeric_or_string(idx) + + with pytest.raises(ValueError): + get_indicators(source=21) + + def test_indicator_most_recent_value(): idx = get_series('SP.POP.TOTL', mrv=1) assert len(idx.index) > 200 diff --git a/tests/test_others.py b/tests/test_others.py index 3ea1de7..1cc6463 100644 --- a/tests/test_others.py +++ b/tests/test_others.py @@ -29,3 +29,19 @@ def test_sources(): assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', 'metadataavailability', 'concepts'] assert_numeric_or_string(df) + + +def test_sources_int(): + df = get_sources(11) + assert df.index.names == ['id'] + assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', + 'metadataavailability', 'concepts'] + assert_numeric_or_string(df) + + +def test_sources_two_int(): + df = get_sources([11, 36]) + assert df.index.names == ['id'] + assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', + 'metadataavailability', 'concepts'] + assert_numeric_or_string(df) diff --git a/world_bank_data/request.py b/world_bank_data/request.py index 2334944..053d104 100644 --- a/world_bank_data/request.py +++ b/world_bank_data/request.py @@ -12,9 +12,15 @@ class WBRequestError(HTTPError): """An error occured when downloading the WB data""" -def collapse(country_list): - """Collapse multiple countries to a colon-separated list of countries""" - return country_list if isinstance(country_list, str) else ';'.join(country_list) if country_list else 'all' +def collapse(values): + """Collapse multiple values to a colon-separated list of values""" + if isinstance(values, str): + return values + if values is None: + return 'all' + if isinstance(values, list): + return ';'.join([collapse(v) for v in values]) + return str(values) def extract_preferred_field(data, id_or_value): @@ -56,6 +62,13 @@ def wb_get(*args, language='en', data_format='json', **kwargs): response = get(url=url, params=params) response.raise_for_status() data = response.json() + if isinstance(data, list) and data and 'message' in data[0]: + try: + msg = data[0]['message'][0]['value'] + except (KeyError, IndexError): + msg = str(msg) + + raise ValueError("{msg}\nurl={url}\nparams={params}".format(msg=msg, url=url, params=params)) # Redo the request and get the full information when the first response is incomplete if data_format == 'json' and isinstance(data, list): @@ -98,9 +111,7 @@ def _wb_get_table_cached(name, only=None, language=None, id_or_value=None, **par def wb_get_table(name, only=None, language=None, id_or_value=None, expected=None, **params): """Request data and return it in the form of a data frame""" - if isinstance(only, list): - only = ';'.join(only) - + only = collapse(only) id_or_value = id_or_value or options.id_or_value if expected and id_or_value not in expected: From 419b96a1795a4b79173f1e7a638d746fa37acf70 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 01:18:51 +0200 Subject: [PATCH 04/15] utf-8 --- tests/test_language.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_language.py b/tests/test_language.py index f1bd37c..87de79d 100644 --- a/tests/test_language.py +++ b/tests/test_language.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import re import mock from world_bank_data import search_countries @@ -5,10 +7,10 @@ def test_language(): assert search_countries(re.compile('ES')).name[0] == 'Spain' - assert search_countries(re.compile('ES'), language='es').name[0] == 'España' + assert search_countries(re.compile('ES'), language='es').name[0] == u'España' def test_language_through_options(): assert search_countries(re.compile('ES')).name[0] == 'Spain' with mock.patch('world_bank_data.options.language', 'es'): - assert search_countries(re.compile('ES')).name[0] == 'España' + assert search_countries(re.compile('ES')).name[0] == u'España' From f104497ff6a7f654fcadd4f372e5ac8e36ddae4e Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 01:19:39 +0200 Subject: [PATCH 05/15] Python 2.7-3.7 --- .travis.yml | 3 +++ tests/test_indicator.py | 12 ++++++++++++ tests/test_others.py | 12 ++++++------ tests/tools.py | 5 ++--- world_bank_data/indicator.py | 15 ++++++++------- world_bank_data/request.py | 13 ++++++++----- 6 files changed, 39 insertions(+), 21 deletions(-) diff --git a/.travis.yml b/.travis.yml index ccda731..f512216 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,9 @@ language: python python: - "3.6" - "3.7" + - "2.7" + - "3.4" + - "3.5" install: # command to install dependencies - pip install -r requirements-dev.txt diff --git a/tests/test_indicator.py b/tests/test_indicator.py index 9feaf5f..565b0a4 100644 --- a/tests/test_indicator.py +++ b/tests/test_indicator.py @@ -71,6 +71,18 @@ def test_indicator_date(): assert_numeric_or_string(idx) +def test_indicator_values(): + idx = get_series('SP.POP.TOTL', date='2017', simplify_index=True).sort_values(ascending=False) + assert len(idx.index) > 200 + assert idx.index.values[0] == 'World' + assert idx.iloc[0] == 7530360149.0 + + idx = get_series('SP.POP.TOTL', date='2017', simplify_index=True, id_or_value='id').sort_values(ascending=False) + assert len(idx.index) > 200 + assert idx.index.values[0] == 'WLD' + assert idx.iloc[0] == 7530360149.0 + + @pytest.mark.skip('jsonstat format not supported here') def test_indicator_monthly(): idx = get_series('DPANUSSPB', country=['CHN', 'BRA'], date='2012M01:2012M08') diff --git a/tests/test_others.py b/tests/test_others.py index 1cc6463..7dcdf42 100644 --- a/tests/test_others.py +++ b/tests/test_others.py @@ -26,22 +26,22 @@ def test_topics(): def test_sources(): df = get_sources() assert df.index.names == ['id'] - assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', - 'metadataavailability', 'concepts'] + assert set(df.columns) == set(['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', + 'metadataavailability', 'concepts']) assert_numeric_or_string(df) def test_sources_int(): df = get_sources(11) assert df.index.names == ['id'] - assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', - 'metadataavailability', 'concepts'] + assert set(df.columns) == set(['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', + 'metadataavailability', 'concepts']) assert_numeric_or_string(df) def test_sources_two_int(): df = get_sources([11, 36]) assert df.index.names == ['id'] - assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', - 'metadataavailability', 'concepts'] + assert set(df.columns) == set(['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', + 'metadataavailability', 'concepts']) assert_numeric_or_string(df) diff --git a/tests/tools.py b/tests/tools.py index 0d7b7e3..f2ac3d0 100644 --- a/tests/tools.py +++ b/tests/tools.py @@ -5,9 +5,8 @@ def assert_numeric_or_string(x): """Make sure that the Series or Dataframe in argument only contains simple types""" if isinstance(x, pd.Series): if x.dtype.kind not in ['i', 'f']: - for y in x: - assert isinstance(y, str), "Series '{}' is expected to contain " \ - 'only numeric or string types, found {}'.format(x.name, y) + assert x.apply(type).isin([type(u''), type('')]).all(), \ + "Series '{}' is neither numeric nor strings".format(x.name) else: for col in x: assert_numeric_or_string(x[col]) diff --git a/world_bank_data/indicator.py b/world_bank_data/indicator.py index e68030d..320c402 100644 --- a/world_bank_data/indicator.py +++ b/world_bank_data/indicator.py @@ -37,8 +37,9 @@ def get_series(indicator, country=None, id_or_value=None, simplify_index=False, :param params: Additional parameters for the World Bank API, like date or mrv""" id_or_value = id_or_value or options.id_or_value + params['format'] = 'jsonstat' - idx = wb_get('country', country, 'indicator', indicator, data_format='jsonstat', **params) + idx = wb_get('country', country, 'indicator', indicator, **params) _, idx = idx.popitem() dimension = idx.pop('dimension') @@ -71,14 +72,14 @@ def _parse_category(cat, use_labels): cat = cat['category'] index = np.array(list(cat['index'].values())) - assert np.array_equal(index, np.arange(len(index))), 'Index should be ordered. Please use Python 3.6 or above.' - codes = np.array(list(cat['index'].keys())) + + codes = pd.Series(codes, index=index, name=name).sort_index() if not use_labels: - return pd.Series(codes, index=index, name=name) + return codes codes2 = np.array(list(cat['label'].keys())) - assert np.array_equal(codes, codes2), 'Codes should be identical' - labels = np.array(list(cat['label'].values())) - return pd.Series(labels, index=index, name=name) + labels = pd.Series(labels, index=codes2, name=name).sort_index() + + return pd.Series(labels.loc[codes].values, index=codes.index, name=name) diff --git a/world_bank_data/request.py b/world_bank_data/request.py index 053d104..589b6b9 100644 --- a/world_bank_data/request.py +++ b/world_bank_data/request.py @@ -41,10 +41,11 @@ def extract_preferred_field(data, id_or_value): return data -def wb_get(*args, language='en', data_format='json', **kwargs): +def wb_get(*args, **kwargs): """Request the World Bank for the desired information""" params = copy(kwargs) - params['format'] = data_format + language = params.pop('language') if 'language' in params else 'en' + params.setdefault('format', 'json') # collapse the list of countries to a single str if len(args) > 1: @@ -57,7 +58,7 @@ def wb_get(*args, language='en', data_format='json', **kwargs): if language != 'en': args = [language] + args - url = '/'.join([WORLD_BANK_URL, *args]) + url = '/'.join([WORLD_BANK_URL] + args) response = get(url=url, params=params) response.raise_for_status() @@ -71,7 +72,7 @@ def wb_get(*args, language='en', data_format='json', **kwargs): raise ValueError("{msg}\nurl={url}\nparams={params}".format(msg=msg, url=url, params=params)) # Redo the request and get the full information when the first response is incomplete - if data_format == 'json' and isinstance(data, list): + if params['format'] == 'json' and isinstance(data, list): page_information, data = data if int(page_information['pages']) > 1: params['per_page'] = page_information['total'] @@ -91,7 +92,9 @@ def wb_get(*args, language='en', data_format='json', **kwargs): @cached(TTLCache(128, 3600)) def _wb_get_table_cached(name, only=None, language=None, id_or_value=None, **params): - data = wb_get(name, only, language=language, **params) + if language: + params['language'] = language + data = wb_get(name, only, **params) # We get a list (countries) of dictionary (properties) columns = data[0].keys() From 5978df06ecac4e885fc5a7058d3c550593ef2ae4 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sat, 6 Apr 2019 23:24:00 +0200 Subject: [PATCH 06/15] Fix simplify_index on scalars --- HISTORY.md | 14 ++++++++++++++ tests/test_indicator.py | 6 ++++++ world_bank_data/indicator.py | 8 +++++++- world_bank_data/version.py | 2 +- 4 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 HISTORY.md diff --git a/HISTORY.md b/HISTORY.md new file mode 100644 index 0000000..0935f6d --- /dev/null +++ b/HISTORY.md @@ -0,0 +1,14 @@ +Release History +=============== + +0.1.1 (2019-04-??) +------------------ + +**BugFixes** + +- Fix `simplify_index` when the data is a scalar + +0.1.0 (2019-04-06) +------------------ + +Initial release diff --git a/tests/test_indicator.py b/tests/test_indicator.py index 769229c..196eaef 100644 --- a/tests/test_indicator.py +++ b/tests/test_indicator.py @@ -1,4 +1,5 @@ import pytest +import numbers from world_bank_data import get_indicators, get_series from .tools import assert_numeric_or_string @@ -44,6 +45,11 @@ def test_indicator_use_id(): assert idx.index.names == ['Country'] +def test_indicator_simplify_scalar(): + pop = get_series('SP.POP.TOTL', 'CHN', mrv=1, simplify_index=True) + assert isinstance(pop, numbers.Number) + + def test_indicator_date(): idx = get_series('SP.POP.TOTL', date='2010:2018') assert len(idx.index) > 200 * 8 diff --git a/world_bank_data/indicator.py b/world_bank_data/indicator.py index 36a1d46..746a2e6 100644 --- a/world_bank_data/indicator.py +++ b/world_bank_data/indicator.py @@ -53,9 +53,15 @@ def get_series(indicator, country=None, id_or_value=None, simplify_index=False, index = [dim for dim in index if len(dim) != 1] if len(index) > 1: + # Our series is indexed by a multi-index index = pd.MultiIndex.from_product(index, names=[dim.name for dim in index]) - else: + elif len(index) == 1: + # A simple index is enough index = index[0] + else: + # Index has dimension zero. Data should be a scalar + assert len(value) == 1, 'Data has no dimension and was expected to be a scalar' + return value[0] return pd.Series(value, index=index, name=indicator) diff --git a/world_bank_data/version.py b/world_bank_data/version.py index 973e52c..b751c89 100644 --- a/world_bank_data/version.py +++ b/world_bank_data/version.py @@ -1,3 +1,3 @@ """version number""" -__version__ = '0.1.0' +__version__ = '0.1.1-dev' From 254afc17400d8669513e3c36e7a01d1518c11b66 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 00:41:17 +0200 Subject: [PATCH 07/15] Load ADI indicators Fix #4 --- tests/test_indicator.py | 6 ++++++ world_bank_data/indicator.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_indicator.py b/tests/test_indicator.py index 196eaef..d601182 100644 --- a/tests/test_indicator.py +++ b/tests/test_indicator.py @@ -37,6 +37,12 @@ def test_indicator_most_recent_value(): assert_numeric_or_string(idx_mrv5) +def test_non_wdi_indicator(): + idx = get_series('TX.VAL.MRCH.CD.WB', mrv=1) + assert len(idx.index) > 50 + assert_numeric_or_string(idx) + + def test_indicator_use_id(): idx = get_series('SP.POP.TOTL', mrv=1, id_or_value='id', simplify_index=True) assert len(idx.index) > 200 diff --git a/world_bank_data/indicator.py b/world_bank_data/indicator.py index 746a2e6..e68030d 100644 --- a/world_bank_data/indicator.py +++ b/world_bank_data/indicator.py @@ -2,9 +2,9 @@ import numpy as np import pandas as pd +import world_bank_data.options as options from .request import wb_get, wb_get_table from .search import search -import world_bank_data.options as options def get_indicators(indicator=None, language=None, id_or_value=None, **params): @@ -39,7 +39,7 @@ def get_series(indicator, country=None, id_or_value=None, simplify_index=False, id_or_value = id_or_value or options.id_or_value idx = wb_get('country', country, 'indicator', indicator, data_format='jsonstat', **params) - idx = idx['WDI'] + _, idx = idx.popitem() dimension = idx.pop('dimension') value = idx.pop('value') From de5da092a9caed74fe75025521d9aab4b17b3a00 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 00:43:11 +0200 Subject: [PATCH 08/15] Raise error message on error And allow integer parameters Fix #3 --- tests/test_indicator.py | 9 +++++++++ tests/test_others.py | 16 ++++++++++++++++ world_bank_data/request.py | 23 +++++++++++++++++------ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/tests/test_indicator.py b/tests/test_indicator.py index d601182..9feaf5f 100644 --- a/tests/test_indicator.py +++ b/tests/test_indicator.py @@ -27,6 +27,15 @@ def test_indicators_topic(): assert_numeric_or_string(idx) +def test_indicators_source(): + idx = get_indicators(source=11) + assert len(idx.index) < 2000 + assert_numeric_or_string(idx) + + with pytest.raises(ValueError): + get_indicators(source=21) + + def test_indicator_most_recent_value(): idx = get_series('SP.POP.TOTL', mrv=1) assert len(idx.index) > 200 diff --git a/tests/test_others.py b/tests/test_others.py index 3ea1de7..1cc6463 100644 --- a/tests/test_others.py +++ b/tests/test_others.py @@ -29,3 +29,19 @@ def test_sources(): assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', 'metadataavailability', 'concepts'] assert_numeric_or_string(df) + + +def test_sources_int(): + df = get_sources(11) + assert df.index.names == ['id'] + assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', + 'metadataavailability', 'concepts'] + assert_numeric_or_string(df) + + +def test_sources_two_int(): + df = get_sources([11, 36]) + assert df.index.names == ['id'] + assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', + 'metadataavailability', 'concepts'] + assert_numeric_or_string(df) diff --git a/world_bank_data/request.py b/world_bank_data/request.py index 2334944..053d104 100644 --- a/world_bank_data/request.py +++ b/world_bank_data/request.py @@ -12,9 +12,15 @@ class WBRequestError(HTTPError): """An error occured when downloading the WB data""" -def collapse(country_list): - """Collapse multiple countries to a colon-separated list of countries""" - return country_list if isinstance(country_list, str) else ';'.join(country_list) if country_list else 'all' +def collapse(values): + """Collapse multiple values to a colon-separated list of values""" + if isinstance(values, str): + return values + if values is None: + return 'all' + if isinstance(values, list): + return ';'.join([collapse(v) for v in values]) + return str(values) def extract_preferred_field(data, id_or_value): @@ -56,6 +62,13 @@ def wb_get(*args, language='en', data_format='json', **kwargs): response = get(url=url, params=params) response.raise_for_status() data = response.json() + if isinstance(data, list) and data and 'message' in data[0]: + try: + msg = data[0]['message'][0]['value'] + except (KeyError, IndexError): + msg = str(msg) + + raise ValueError("{msg}\nurl={url}\nparams={params}".format(msg=msg, url=url, params=params)) # Redo the request and get the full information when the first response is incomplete if data_format == 'json' and isinstance(data, list): @@ -98,9 +111,7 @@ def _wb_get_table_cached(name, only=None, language=None, id_or_value=None, **par def wb_get_table(name, only=None, language=None, id_or_value=None, expected=None, **params): """Request data and return it in the form of a data frame""" - if isinstance(only, list): - only = ';'.join(only) - + only = collapse(only) id_or_value = id_or_value or options.id_or_value if expected and id_or_value not in expected: From 16022419ffb33e451e90b93aca813fed38261760 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 01:18:51 +0200 Subject: [PATCH 09/15] utf-8 --- tests/test_language.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_language.py b/tests/test_language.py index f1bd37c..87de79d 100644 --- a/tests/test_language.py +++ b/tests/test_language.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import re import mock from world_bank_data import search_countries @@ -5,10 +7,10 @@ def test_language(): assert search_countries(re.compile('ES')).name[0] == 'Spain' - assert search_countries(re.compile('ES'), language='es').name[0] == 'España' + assert search_countries(re.compile('ES'), language='es').name[0] == u'España' def test_language_through_options(): assert search_countries(re.compile('ES')).name[0] == 'Spain' with mock.patch('world_bank_data.options.language', 'es'): - assert search_countries(re.compile('ES')).name[0] == 'España' + assert search_countries(re.compile('ES')).name[0] == u'España' From e60237d9ae1f886746116fba22c75360f356350d Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 01:19:39 +0200 Subject: [PATCH 10/15] Python 2.7-3.7 --- .travis.yml | 3 +++ tests/test_indicator.py | 12 ++++++++++++ tests/test_others.py | 12 ++++++------ tests/tools.py | 5 ++--- world_bank_data/indicator.py | 15 ++++++++------- world_bank_data/request.py | 13 ++++++++----- 6 files changed, 39 insertions(+), 21 deletions(-) diff --git a/.travis.yml b/.travis.yml index ccda731..f512216 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,9 @@ language: python python: - "3.6" - "3.7" + - "2.7" + - "3.4" + - "3.5" install: # command to install dependencies - pip install -r requirements-dev.txt diff --git a/tests/test_indicator.py b/tests/test_indicator.py index 9feaf5f..565b0a4 100644 --- a/tests/test_indicator.py +++ b/tests/test_indicator.py @@ -71,6 +71,18 @@ def test_indicator_date(): assert_numeric_or_string(idx) +def test_indicator_values(): + idx = get_series('SP.POP.TOTL', date='2017', simplify_index=True).sort_values(ascending=False) + assert len(idx.index) > 200 + assert idx.index.values[0] == 'World' + assert idx.iloc[0] == 7530360149.0 + + idx = get_series('SP.POP.TOTL', date='2017', simplify_index=True, id_or_value='id').sort_values(ascending=False) + assert len(idx.index) > 200 + assert idx.index.values[0] == 'WLD' + assert idx.iloc[0] == 7530360149.0 + + @pytest.mark.skip('jsonstat format not supported here') def test_indicator_monthly(): idx = get_series('DPANUSSPB', country=['CHN', 'BRA'], date='2012M01:2012M08') diff --git a/tests/test_others.py b/tests/test_others.py index 1cc6463..7dcdf42 100644 --- a/tests/test_others.py +++ b/tests/test_others.py @@ -26,22 +26,22 @@ def test_topics(): def test_sources(): df = get_sources() assert df.index.names == ['id'] - assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', - 'metadataavailability', 'concepts'] + assert set(df.columns) == set(['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', + 'metadataavailability', 'concepts']) assert_numeric_or_string(df) def test_sources_int(): df = get_sources(11) assert df.index.names == ['id'] - assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', - 'metadataavailability', 'concepts'] + assert set(df.columns) == set(['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', + 'metadataavailability', 'concepts']) assert_numeric_or_string(df) def test_sources_two_int(): df = get_sources([11, 36]) assert df.index.names == ['id'] - assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', - 'metadataavailability', 'concepts'] + assert set(df.columns) == set(['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability', + 'metadataavailability', 'concepts']) assert_numeric_or_string(df) diff --git a/tests/tools.py b/tests/tools.py index 0d7b7e3..f2ac3d0 100644 --- a/tests/tools.py +++ b/tests/tools.py @@ -5,9 +5,8 @@ def assert_numeric_or_string(x): """Make sure that the Series or Dataframe in argument only contains simple types""" if isinstance(x, pd.Series): if x.dtype.kind not in ['i', 'f']: - for y in x: - assert isinstance(y, str), "Series '{}' is expected to contain " \ - 'only numeric or string types, found {}'.format(x.name, y) + assert x.apply(type).isin([type(u''), type('')]).all(), \ + "Series '{}' is neither numeric nor strings".format(x.name) else: for col in x: assert_numeric_or_string(x[col]) diff --git a/world_bank_data/indicator.py b/world_bank_data/indicator.py index e68030d..320c402 100644 --- a/world_bank_data/indicator.py +++ b/world_bank_data/indicator.py @@ -37,8 +37,9 @@ def get_series(indicator, country=None, id_or_value=None, simplify_index=False, :param params: Additional parameters for the World Bank API, like date or mrv""" id_or_value = id_or_value or options.id_or_value + params['format'] = 'jsonstat' - idx = wb_get('country', country, 'indicator', indicator, data_format='jsonstat', **params) + idx = wb_get('country', country, 'indicator', indicator, **params) _, idx = idx.popitem() dimension = idx.pop('dimension') @@ -71,14 +72,14 @@ def _parse_category(cat, use_labels): cat = cat['category'] index = np.array(list(cat['index'].values())) - assert np.array_equal(index, np.arange(len(index))), 'Index should be ordered. Please use Python 3.6 or above.' - codes = np.array(list(cat['index'].keys())) + + codes = pd.Series(codes, index=index, name=name).sort_index() if not use_labels: - return pd.Series(codes, index=index, name=name) + return codes codes2 = np.array(list(cat['label'].keys())) - assert np.array_equal(codes, codes2), 'Codes should be identical' - labels = np.array(list(cat['label'].values())) - return pd.Series(labels, index=index, name=name) + labels = pd.Series(labels, index=codes2, name=name).sort_index() + + return pd.Series(labels.loc[codes].values, index=codes.index, name=name) diff --git a/world_bank_data/request.py b/world_bank_data/request.py index 053d104..589b6b9 100644 --- a/world_bank_data/request.py +++ b/world_bank_data/request.py @@ -41,10 +41,11 @@ def extract_preferred_field(data, id_or_value): return data -def wb_get(*args, language='en', data_format='json', **kwargs): +def wb_get(*args, **kwargs): """Request the World Bank for the desired information""" params = copy(kwargs) - params['format'] = data_format + language = params.pop('language') if 'language' in params else 'en' + params.setdefault('format', 'json') # collapse the list of countries to a single str if len(args) > 1: @@ -57,7 +58,7 @@ def wb_get(*args, language='en', data_format='json', **kwargs): if language != 'en': args = [language] + args - url = '/'.join([WORLD_BANK_URL, *args]) + url = '/'.join([WORLD_BANK_URL] + args) response = get(url=url, params=params) response.raise_for_status() @@ -71,7 +72,7 @@ def wb_get(*args, language='en', data_format='json', **kwargs): raise ValueError("{msg}\nurl={url}\nparams={params}".format(msg=msg, url=url, params=params)) # Redo the request and get the full information when the first response is incomplete - if data_format == 'json' and isinstance(data, list): + if params['format'] == 'json' and isinstance(data, list): page_information, data = data if int(page_information['pages']) > 1: params['per_page'] = page_information['total'] @@ -91,7 +92,9 @@ def wb_get(*args, language='en', data_format='json', **kwargs): @cached(TTLCache(128, 3600)) def _wb_get_table_cached(name, only=None, language=None, id_or_value=None, **params): - data = wb_get(name, only, language=language, **params) + if language: + params['language'] = language + data = wb_get(name, only, **params) # We get a list (countries) of dictionary (properties) columns = data[0].keys() From 167320f5eb446da8ddad1b6fea249672210352af Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 01:25:28 +0200 Subject: [PATCH 11/15] Version 0.1.1 --- HISTORY.md | 4 +++- setup.py | 3 +++ world_bank_data/version.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 0935f6d..2ab345c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,12 +1,14 @@ Release History =============== -0.1.1 (2019-04-??) +0.1.1 (2019-04-09) ------------------ **BugFixes** - Fix `simplify_index` when the data is a scalar +- Make sure that non-WDI indicators can be loaded (#4) +- Python 2.7 is supported (#1) 0.1.0 (2019-04-06) ------------------ diff --git a/setup.py b/setup.py index 9690e0d..288b4a5 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,9 @@ 'Intended Audience :: Education', 'Intended Audience :: Science/Research', 'Programming Language :: Python', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7') ) diff --git a/world_bank_data/version.py b/world_bank_data/version.py index b751c89..7076bb0 100644 --- a/world_bank_data/version.py +++ b/world_bank_data/version.py @@ -1,3 +1,3 @@ """version number""" -__version__ = '0.1.1-dev' +__version__ = '0.1.1' From 6715ffa007d1c8a816a02c84c673c70427acb44b Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 01:30:24 +0200 Subject: [PATCH 12/15] Notebook is Python 2/3 compatible --- ...unburst plot of the world population.ipynb | 31 +++++++++++++------ ...A sunburst plot of the world population.py | 14 +++++++-- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/examples/A sunburst plot of the world population.ipynb b/examples/A sunburst plot of the world population.ipynb index bcdfdf7..ceacfbf 100644 --- a/examples/A sunburst plot of the world population.ipynb +++ b/examples/A sunburst plot of the world population.ipynb @@ -42,18 +42,24 @@ ], "source": [ "import pandas as pd\n", - "import urllib\n", "import mock\n", "import plotly.offline as offline\n", "import world_bank_data as wb\n", "\n", + "try:\n", + " # Python 3.6\n", + " from urllib.request import urlopen\n", + "except ImportError:\n", + " # Python 2.7\n", + " from urllib import urlopen\n", + "\n", "# Only show head and tail of dataframes\n", "pd.set_option('display.max_rows', 6)\n", "\n", "\n", "# Plotly.js in version 1.46.1\n", "def get_latest_plotlyjs(url='https://cdn.plot.ly/plotly-1.46.1.min.js'):\n", - " return urllib.request.urlopen(url).read().decode('utf-8')\n", + " return urlopen(url).read().decode('utf-8')\n", "\n", "\n", "with mock.patch('plotly.offline.offline.get_plotlyjs', get_latest_plotlyjs):\n", @@ -1490,16 +1496,16 @@ } }, "text/html": [ - "
" ], "text/vnd.plotly.v1+html": [ - "
" ] @@ -1511,11 +1517,18 @@ "source": [ "# And now we can plot the World Population\n", "offline.iplot(dict(\n", - " data=[dict(type='sunburst', **all_levels, hoverinfo='text')],\n", + " data=[dict(type='sunburst', hoverinfo='text', **all_levels)],\n", " layout=dict(title='World Population (World Bank, 2017)
Click on a region to zoom',\n", " width=800, height=800)),\n", " validate=False)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/examples/A sunburst plot of the world population.py b/examples/A sunburst plot of the world population.py index 03d90e2..1a1e128 100644 --- a/examples/A sunburst plot of the world population.py +++ b/examples/A sunburst plot of the world population.py @@ -20,18 +20,24 @@ # + import pandas as pd -import urllib import mock import plotly.offline as offline import world_bank_data as wb +try: + # Python 3.6 + from urllib.request import urlopen +except ImportError: + # Python 2.7 + from urllib import urlopen + # Only show head and tail of dataframes pd.set_option('display.max_rows', 6) # Plotly.js in version 1.46.1 def get_latest_plotlyjs(url='https://cdn.plot.ly/plotly-1.46.1.min.js'): - return urllib.request.urlopen(url).read().decode('utf-8') + return urlopen(url).read().decode('utf-8') with mock.patch('plotly.offline.offline.get_plotlyjs', get_latest_plotlyjs): @@ -80,7 +86,9 @@ def get_latest_plotlyjs(url='https://cdn.plot.ly/plotly-1.46.1.min.js'): # And now we can plot the World Population offline.iplot(dict( - data=[dict(type='sunburst', **all_levels, hoverinfo='text')], + data=[dict(type='sunburst', hoverinfo='text', **all_levels)], layout=dict(title='World Population (World Bank, 2017)
Click on a region to zoom', width=800, height=800)), validate=False) + + From d59b0fb6a7fddc3faacd9cf306ef2598277130fd Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 01:37:23 +0200 Subject: [PATCH 13/15] Python 3.4-5 may change column order --- tests/test_others.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_others.py b/tests/test_others.py index 7dcdf42..cb162ef 100644 --- a/tests/test_others.py +++ b/tests/test_others.py @@ -5,21 +5,21 @@ def test_lending_types(): df = get_lendingtypes() assert df.index.names == ['id'] - assert df.columns.to_list() == ['iso2code', 'value'] + assert set(df.columns) == set(['iso2code', 'value']) assert_numeric_or_string(df) def test_income_levels(): df = get_incomelevels() assert df.index.names == ['id'] - assert df.columns.to_list() == ['iso2code', 'value'] + assert set(df.columns) == set(['iso2code', 'value']) assert_numeric_or_string(df) def test_topics(): df = get_topics() assert df.index.names == ['id'] - assert df.columns.to_list() == ['value', 'sourceNote'] + assert set(df.columns) == set(['value', 'sourceNote']) assert_numeric_or_string(df) From 38d4e2700ffa31abb481f9b52b7464d9e8d8d3e1 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 02:03:50 +0200 Subject: [PATCH 14/15] Python < 3.6 --- tests/test_country.py | 2 +- tests/test_region.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_country.py b/tests/test_country.py index d5c3e8c..7837bf7 100644 --- a/tests/test_country.py +++ b/tests/test_country.py @@ -25,7 +25,7 @@ def test_country_language(): def test_two_countries(): cnt = get_countries(['FRA', 'ITA']) - assert cnt.index.to_list() == ['FRA', 'ITA'] + assert set(cnt.index) == set(['FRA', 'ITA']) assert cnt.latitude.dtype == float assert_numeric_or_string(cnt) diff --git a/tests/test_region.py b/tests/test_region.py index a6dfdb3..a01be3d 100644 --- a/tests/test_region.py +++ b/tests/test_region.py @@ -24,7 +24,7 @@ def test_one_region_list(): def test_two_regions(): reg = get_regions(['AFR', 'ANR']) assert 'id' not in reg.columns - assert reg.index.to_list() == ['AFR', 'ANR'] + assert set(reg.index) == set(['AFR', 'ANR']) assert_numeric_or_string(reg) From 26eae3f5576be4354471c54ce53e507b7b1b12fe Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 9 Apr 2019 02:10:22 +0200 Subject: [PATCH 15/15] Update README.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b52fb2b..b90bf8d 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ Go to our Binder and run either this [README](https://mybinder.org/v2/gh/mwouts/ ## The World Bank -The [World Bank](https://www.worldbank.org/) Data has a [Data Catalog](https://datacatalog.worldbank.org/), and an interactive [data explorer](https://data.worldbank.org/indicator/sp.pop.totl). +The [World Bank](https://www.worldbank.org/) has a [Data Catalog](https://datacatalog.worldbank.org/), and an interactive [data explorer](https://data.worldbank.org/indicator/sp.pop.totl). Third party applications that allow to access the data from various languages are listed [here](https://data.worldbank.org/products/third-party-apps). @@ -110,9 +110,10 @@ The World Bank data is also available in Google's [Data Explorer](https://data.w ## Python -Alternatively to `world_bank_data`, Python users may find useful the following two packages: +Alternatively to `world_bank_data`, Python users may find useful the following packages: - [`wbpy`](https://github.com/mattduck/wbpy/blob/master/README.rst), nicely documented but last released in 2013. - [`wbdata`](https://github.com/oliversherouse/wbdata/blob/master/README.rst), which works well. +- [`pandas_datareader`](https://pandas-datareader.readthedocs.io/en/latest/readers/world-bank.html) The reason for which I wrote `world_bank_data` is mostly speed, e.g. I wanted to use the lastest version of the World Bank API (v2) and benefit from significant speed improvements. Reimplementing the API also gave me a finer control on the mapping of options. @@ -128,7 +129,7 @@ See also the [Introduction to the wbstats R-package](https://cran.r-project.org/ ## Country and indicator description in non-English languages -The World Bank describes their sources and indicators in a other languages than English. Use either the `language` argument in each of `get_countries`, `get_indicators`, etc, or change the default globally: +The World Bank describes their sources and indicators in other languages than English. Use either the `language` argument in each of `get_countries`, `get_indicators`, etc, or change the default globally: ```python wb.options.language = 'vi'