Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

182 bug weo importer #183

Merged
merged 7 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.3.0
current_version = 1.3.1
commit = True
tag = True

Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
Changelog
=========

[1.3.1] - 2024-04-18
---------------------
### Updated
- Switched the backend of the WorldEconomicOutlook importer to remove the
weo-reader dependency.

[1.3.0] - 2024-04-9
---------------------
### Updated
Expand Down
2 changes: 1 addition & 1 deletion bblocks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.3.0"
__version__ = "1.3.1"

# Easy access to importers
from bblocks.import_tools.world_bank import WorldBankData
Expand Down
84 changes: 17 additions & 67 deletions bblocks/import_tools/imf.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,34 @@
from __future__ import annotations

import os
from dataclasses import dataclass
from typing import Optional

import pandas as pd
import weo.dates
from weo import all_releases, download, WEO

from bblocks import config
from bblocks.cleaning_tools.clean import clean_numeric_series, convert_to_datetime
from bblocks.import_tools.common import ImportData
from bblocks.import_tools.imf_weo import WEO
from bblocks.logger import logger


def _check_weo_parameters(
latest_y: int | None = None, latest_r: int | None = None
) -> (int, int):
"""Check parameters and return max values or provided input"""
if latest_y is None:
latest_y = max(*all_releases())[0]
def _check_parameters(latest_y: int | None, latest_r: int | None) -> str | tuple:
if latest_y is None and latest_r is None:
release = "latest"

# if latest release isn't provided, take max value
if latest_r is None:
latest_r = max(*all_releases())[1]
else:
release = (latest_y, latest_r)

return latest_y, latest_r
return release


def _update_weo(
latest_y: int = None,
latest_r: int = None,
) -> None:
def _update_weo(latest_y: int = None, latest_r: int = None) -> None:
"""Update _data from the World Economic Outlook, using WEO package"""

latest_y, latest_r = _check_weo_parameters(latest_y, latest_r)
release = _check_parameters(latest_y, latest_r)

# Download the file from the IMF website and store in directory
download(
latest_y,
latest_r,
directory=config.BBPaths.raw_data,
filename=f"weo{latest_y}_{latest_r}.csv",
)

# Validate the file
if (
os.path.getsize(config.BBPaths.raw_data / f"weo{latest_y}_{latest_r}.csv")
< 1000
):
print(
f"Downloading release {latest_r} of "
f"{latest_y} failed. Trying previous release"
)
os.remove(config.BBPaths.raw_data / f"weo{latest_y}_{latest_r}.csv")

try:
_update_weo(latest_y, latest_r - 1)
except weo.dates.DateError:
_update_weo(latest_y - 1, latest_r)
WEO(release).update_data()


@dataclass
Expand All @@ -83,13 +53,11 @@ def __load_data(
released value (1 or 2).
"""

latest_y, latest_r = _check_weo_parameters(latest_y, latest_r)

names = {
"ISO": "iso_code",
"WEO Subject Code": "indicator",
"Subject Descriptor": "indicator_name",
"Subject Notes": "indicator_description",
# "Country/Series-specific Notes": "indicator_description",
"Units": "units",
"Scale": "scale",
"Estimates Start After": "estimates_start_after",
Expand All @@ -101,35 +69,17 @@ def __load_data(
"Country/Series-specific Notes",
]

# If _data doesn't exist or update is required, update the _data
if not (config.BBPaths.raw_data / f"weo{latest_y}_{latest_r}.csv").exists():
_update_weo(latest_y, latest_r)

# Load the _data from disk. If it doesn't exist, try the previous one
try:
df = WEO(config.BBPaths.raw_data / f"weo{latest_y}_{latest_r}.csv").df
self.version = {"year": latest_y, "release": latest_r}
except FileNotFoundError:
try:
df = WEO(
config.BBPaths.raw_data / f"weo{latest_y}_{latest_r - 1}.csv"
).df
self.version = {"year": latest_y, "release": latest_r - 1}
except FileNotFoundError:
df = WEO(
config.BBPaths.raw_data / f"weo{latest_y - 1}_{latest_r}.csv"
).df
self.version = {"year": latest_y - 1, "release": latest_r}
release = _check_parameters(latest_y, latest_r)
df = WEO(version=release).load_data().get_old_format_data()

# Load _data into _data object
self._raw_data = (
df.drop(to_drop, axis=1)
df.drop(
columns=to_drop,
)
.rename(columns=names)
.melt(id_vars=names.values(), var_name="year", value_name="value")
.assign(
year=lambda d: convert_to_datetime(d.year),
value=lambda d: clean_numeric_series(d.value),
)
.assign(year=lambda d: convert_to_datetime(d.year))
.dropna(subset=["value"])
.reset_index(drop=True)
)
Expand Down
35 changes: 35 additions & 0 deletions bblocks/import_tools/imf_weo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from bs4 import BeautifulSoup
import io
import requests
import numpy as np

from bblocks.import_tools.common import ImportData, get_response, unzip
from bblocks.config import BBPaths
Expand Down Expand Up @@ -383,3 +384,37 @@ def available_indicators(self) -> dict[str, str]:
.set_index("concept_code")["concept"]
.to_dict()
)

def get_old_format_data(self) -> pd.DataFrame:
"""This function returns the data in the old format that weo-reader returns

NOTE: This will return all the data in the object in the old format, regardless of the indicators loaded
Not all columns that existed in weo-reader are returned as they don't exist in the sdmx data files. However,
this should not cause issues are they are metadata columns not used in analysis.
"""

logger.warning(
"This method is a temporary fix used to patch the output format that weo-reader returns. "
"It will be removed in the future."
)

col_mapper = {
"concept_code": "WEO Subject Code",
"ref_area_code": "WEO Country Code",
"lastactualdate": "Estimates Start After",
"notes": "Country/Series-specific Notes",
"unit": "Units",
"concept": "Subject Descriptor",
"ref_area": "Country",
"scale": "Scale",
}

return (
self.get_data()
.rename(columns=col_mapper)
.pivot(index=col_mapper.values(), columns="time_period", values="obs_value")
.reset_index()
.assign(ISO=lambda d: clean.convert_id(d.Country, not_found=np.nan))
.dropna(subset="ISO")
.reset_index(drop=True)
)
Loading
Loading