diff --git a/bblocks/import_tools/imf.py b/bblocks/import_tools/imf.py index 0169077..33b1509 100644 --- a/bblocks/import_tools/imf.py +++ b/bblocks/import_tools/imf.py @@ -4,33 +4,18 @@ from typing import Optional import pandas as pd +import numpy as np +from imf_reader import weo +import os + +from imf_reader.weo.reader import gen_latest_version from bblocks import config -from bblocks.cleaning_tools.clean import clean_numeric_series, convert_to_datetime +from bblocks.cleaning_tools.clean import convert_to_datetime, convert_id from bblocks.import_tools.common import ImportData -from bblocks.import_tools.imf_weo import WEO from bblocks.logger import logger -def _check_parameters(latest_y: int | None, latest_r: int | None) -> str | tuple: - if latest_y is None and latest_r is None: - release = "latest" - - else: - release = (latest_y, latest_r) - - return release - - -def _update_weo(latest_y: int = None, latest_r: int = None) -> None: - """Update _data from the World Economic Outlook, using WEO package""" - - release = _check_parameters(latest_y, latest_r) - - # Download the file from the IMF website and store in directory - WEO(release).update_data() - - @dataclass class WorldEconomicOutlook(ImportData): """World Economic Outlook _data""" @@ -38,12 +23,19 @@ class WorldEconomicOutlook(ImportData): year: Optional[int] = None release: Optional[int] = None + # if year and release are not both None or both not None raise error + def __post_init__(self) -> None: + if (self.year is None and self.release is not None) or ( + self.year is not None and self.release is None + ): + raise ValueError( + "Both year and release must be specified or must both be `None`" + ) + def __repr__(self) -> str: return f"IMF WEO(year={self.year}, release={self.release})" - def __load_data( - self, latest_y: int | None = None, latest_r: int | None = None - ) -> None: + def __load_data(self) -> None: """loading WEO as a clean dataframe Args: @@ -54,36 +46,58 @@ def __load_data( """ names = { - "ISO": "iso_code", - "WEO Subject Code": "indicator", - "Subject Descriptor": "indicator_name", - # "Country/Series-specific Notes": "indicator_description", - "Units": "units", - "Scale": "scale", - "Estimates Start After": "estimates_start_after", + "CONCEPT_CODE": "indicator", + "CONCEPT_LABEL": "indicator_name", + "UNIT_LABEL": "units", + "SCALE_LABEL": "scale", + "LASTACTUALDATE": "estimates_start_after", + "OBS_VALUE": "value", + "TIME_PERIOD": "year", + "REF_AREA_LABEL": "entity_name", } - to_drop = [ - "WEO Country Code", - "Country", - "Country/Series-specific Notes", - ] + # If year and release are not specified, get the latest version + if self.year is None and self.release is None: + version = gen_latest_version() + self.release, self.year = version - release = _check_parameters(latest_y, latest_r) - df = WEO(version=release).load_data().get_old_format_data() + # For compatibility, if the version is provided as int, convert + if self.release == 1: + version = ("April", self.year) + elif self.release == 2: + version = ("October", self.year) + + # Define the path where the data will be stored (or should be stored) + path = f"{config.BBPaths.raw_data}/weo_{self.year}_{self.release}.feather" + + # try read from disk + if os.path.exists(path): + self._raw_data = pd.read_feather(path) + return + + # If not found, fetch the data + df = weo.fetch_data(version=version) + + # Check if the fetched version is the same as the requested version + fetched_version = weo.fetch_data.last_version_fetched + if fetched_version != version: + self.release, self.year = version + path = f"{config.BBPaths.raw_data}/weo_{self.year}_{self.release}.feather" # Load _data into _data object self._raw_data = ( - df.drop( - columns=to_drop, - ) + df.loc[:, names.keys()] .rename(columns=names) - .melt(id_vars=names.values(), var_name="year", value_name="value") .assign(year=lambda d: convert_to_datetime(d.year)) + .assign(iso_code=lambda d: convert_id(d.entity_name, not_found=np.NaN)) + .dropna(subset=["iso_code"]) .dropna(subset=["value"]) .reset_index(drop=True) ) + # save data to disk` + self._raw_data.to_feather(path) + def _check_indicators(self, indicators: str | list | None = None) -> None | dict: if self._raw_data is None: self.__load_data() @@ -140,23 +154,26 @@ def __load_indicator(ind_: str) -> None: return self - def update_data( - self, year: int | None, release: int | None, reload_data: bool = True - ) -> None: + def update_data(self, reload_data: bool = True) -> None: """Update the stored WEO _data, using WEO package. Args: """ - _update_weo(latest_y=year, latest_r=release) + # clear cache + weo.clear_cache() # Reset the _data self._raw_data = None - self._data = {} - - logger.info("WEO data updated.") if reload_data: - self.load_data(indicator=list(self._data.keys())) + indicators_to_load = list(self._data.keys()) + self._data = {} + self.load_data(indicator=indicators_to_load) + + else: + self._data = {} + + logger.info("WEO data updated.") def available_indicators(self) -> None: """Print the available indicators in the dataset""" diff --git a/bblocks/import_tools/imf_weo.py b/bblocks/import_tools/imf_weo.py index 9dee482..ec50c12 100644 --- a/bblocks/import_tools/imf_weo.py +++ b/bblocks/import_tools/imf_weo.py @@ -316,7 +316,7 @@ def load_data(self, indicators: str | list = "all") -> ImportData: else: self._data[indicator] = self._raw_data[ self._raw_data["concept_code"] == indicator - ].reset_index(drop=True) + ].reset_index(drop=True) logger.info("Data loaded to object") return self @@ -417,4 +417,4 @@ def get_old_format_data(self) -> pd.DataFrame: .assign(ISO=lambda d: clean.convert_id(d.Country, not_found=np.nan)) .dropna(subset="ISO") .reset_index(drop=True) - ) + ) \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 1009358..d4ead76 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "alabaster" @@ -770,6 +770,23 @@ files = [ {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"}, ] +[[package]] +name = "imf-reader" +version = "1.0.0" +description = "A package to access imf data" +optional = false +python-versions = "<4.0,>=3.10" +files = [ + {file = "imf_reader-1.0.0-py3-none-any.whl", hash = "sha256:754aebf5db90869e1d1c6c538c0699ed0ed2e34108fb6db3192d922384ad49fb"}, + {file = "imf_reader-1.0.0.tar.gz", hash = "sha256:b0b338256c1546206922c59ca04c971d260c46d5e650e7a64b7e6fad4c3e7ad1"}, +] + +[package.dependencies] +beautifulsoup4 = ">=4.12.3,<5.0.0" +chardet = ">=5.2.0,<6.0.0" +pandas = ">=2.2.2,<3.0.0" +requests = ">=2.32.1,<3.0.0" + [[package]] name = "importlib-metadata" version = "7.1.0" @@ -1330,12 +1347,9 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.21.2", markers = "python_version >= \"3.10\""}, - {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\""}, - {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, - {version = ">=1.19.3", markers = "python_version >= \"3.6\" and platform_system == \"Linux\" and platform_machine == \"aarch64\" or python_version >= \"3.9\""}, - {version = ">=1.17.0", markers = "python_version >= \"3.7\""}, - {version = ">=1.17.3", markers = "python_version >= \"3.8\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, + {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, + {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] @@ -1404,8 +1418,8 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" @@ -1965,13 +1979,13 @@ rpds-py = ">=0.7.0" [[package]] name = "requests" -version = "2.31.0" +version = "2.32.3" description = "Python HTTP for Humans." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, - {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, ] [package.dependencies] @@ -2337,7 +2351,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\""} +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} typing-extensions = ">=4.6.0" [package.extras] @@ -2528,4 +2542,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "a4605012e32b0e73dd2ad439535ea85f4d7ac52696a82105056ae9d5f6c19e0c" +content-hash = "350b8e6299e74b42b2be05d70f1d396b0c2090ba62a02e084a6cea5e736ee684" diff --git a/pyproject.toml b/pyproject.toml index 8534877..688bd62 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ pyarrow = ">=14, <17" wbgapi = "<1.1" camelot-py = "^0.11" pypdf = "^3.17" +imf-reader = "^1.0.0" [tool.poetry.dev-dependencies] bump2version = "^1.0.1" diff --git a/tests/test_import_tools/test_weo.py b/tests/test_import_tools/test_weo.py index 6205e1c..39f2454 100644 --- a/tests/test_import_tools/test_weo.py +++ b/tests/test_import_tools/test_weo.py @@ -14,14 +14,14 @@ def test_smdx_query_url(): # test April version assert ( - imf_weo._smdx_query_url((2023, 1)) - == f"{imf_weo.BASE_URL}/en/Publications/WEO/weo-database/2023/April/download-entire-database" + imf_weo._smdx_query_url((2023, 1)) + == f"{imf_weo.BASE_URL}/en/Publications/WEO/weo-database/2023/April/download-entire-database" ) # test October version assert ( - imf_weo._smdx_query_url((2023, 2)) - == f"{imf_weo.BASE_URL}/en/Publications/WEO/weo-database/2023/October/download-entire-database" + imf_weo._smdx_query_url((2023, 2)) + == f"{imf_weo.BASE_URL}/en/Publications/WEO/weo-database/2023/October/download-entire-database" ) # test invalid version @@ -170,4 +170,4 @@ def test_init_latest_version(self): def test_weo_class_invalid_version(self): invalid_version = "2022" with pytest.raises(ValueError): - imf_weo.WEO(version=invalid_version) + imf_weo.WEO(version=invalid_version) \ No newline at end of file