Skip to content

Commit

Permalink
🐛 fix catalogPath for cases with multiple tables
Browse files Browse the repository at this point in the history
  • Loading branch information
Marigold committed Dec 12, 2023
1 parent 6b5be5f commit 0e237ca
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 50 deletions.
23 changes: 12 additions & 11 deletions etl/steps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,16 +780,6 @@ def run(self) -> None:

table = gh._adapt_table_for_grapher(table)

# generate table with entity_id, year and value for every column
upsert = lambda t, catalog_path, verbose: gi.upsert_table( # noqa: E731
engine,
t,
dataset_upsert_results,
catalog_path=catalog_path,
dimensions=(t.iloc[:, 0].metadata.additional_info or {}).get("dimensions"),
verbose=verbose,
)

for t in gh._yield_wide_table(table, na_action="drop"):
i += 1
assert len(t.columns) == 1
Expand All @@ -802,7 +792,18 @@ def run(self) -> None:
lambda: (time.sleep(10), log.info("upsert_dataset.continue_without_logging"))
)

futures.append(thread_pool.submit(upsert, t, catalog_path=catalog_path, verbose=verbose))
# generate table with entity_id, year and value for every column
futures.append(
thread_pool.submit(
gi.upsert_table,
engine,
t,
dataset_upsert_results,
catalog_path=catalog_path,
dimensions=(t.iloc[:, 0].metadata.additional_info or {}).get("dimensions"),
verbose=verbose,
)
)

variable_upsert_results = [future.result() for future in as_completed(futures)]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
"""Load a garden dataset and create a grapher dataset."""

from typing import cast

from owid.catalog import Dataset

from etl.helpers import PathFinder, create_dataset, grapher_checks

Expand All @@ -15,7 +13,7 @@ def run(dest_dir: str) -> None:
# Load inputs.
#
# Load garden dataset.
ds_garden = cast(Dataset, paths.load_dependency("cherry_blossom"))
ds_garden = paths.load_dataset("cherry_blossom")

# Read table from garden dataset.
tb = ds_garden["cherry_blossom"]
Expand Down
7 changes: 2 additions & 5 deletions etl/steps/data/grapher/ihme_gbd/2019/gbd_cause.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from owid import catalog

from etl.helpers import PathFinder

from .shared import run_wrapper
Expand All @@ -8,6 +6,5 @@


def run(dest_dir: str) -> None:
garden_dataset = paths.garden_dataset
dataset = catalog.Dataset.create_empty(dest_dir, garden_dataset.metadata)
dataset = run_wrapper(garden_dataset=garden_dataset, dataset=dataset)
ds_garden = paths.load_dataset("gbd_cause")
run_wrapper(dest_dir, garden_dataset=ds_garden)
9 changes: 2 additions & 7 deletions etl/steps/data/grapher/ihme_gbd/2019/gbd_child_mortality.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from owid import catalog

from etl.helpers import PathFinder

from .shared import run_wrapper
Expand All @@ -8,8 +6,5 @@


def run(dest_dir: str) -> None:
garden_dataset = paths.garden_dataset
dataset = catalog.Dataset.create_empty(dest_dir, garden_dataset.metadata)
# dataset.save()

run_wrapper(garden_dataset=garden_dataset, dataset=dataset)
ds_garden = paths.load_dataset("gbd_child_mortality")
run_wrapper(dest_dir, garden_dataset=ds_garden)
7 changes: 2 additions & 5 deletions etl/steps/data/grapher/ihme_gbd/2019/gbd_mental_health.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from owid import catalog

from etl.helpers import PathFinder

from .shared import run_wrapper
Expand All @@ -8,6 +6,5 @@


def run(dest_dir: str) -> None:
garden_dataset = paths.garden_dataset
dataset = catalog.Dataset.create_empty(dest_dir, garden_dataset.metadata)
dataset = run_wrapper(garden_dataset=garden_dataset, dataset=dataset)
ds_garden = paths.load_dataset("gbd_mental_health")
run_wrapper(dest_dir, garden_dataset=ds_garden)
7 changes: 2 additions & 5 deletions etl/steps/data/grapher/ihme_gbd/2019/gbd_prevalence.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from owid import catalog

from etl.helpers import PathFinder

from .shared import run_wrapper
Expand All @@ -8,6 +6,5 @@


def run(dest_dir: str) -> None:
garden_dataset = paths.garden_dataset
dataset = catalog.Dataset.create_empty(dest_dir, garden_dataset.metadata)
run_wrapper(garden_dataset=garden_dataset, dataset=dataset)
ds_garden = paths.load_dataset("gbd_prevalence")
run_wrapper(dest_dir, garden_dataset=ds_garden)
7 changes: 2 additions & 5 deletions etl/steps/data/grapher/ihme_gbd/2019/gbd_risk.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from owid import catalog

from etl.helpers import PathFinder

from .shared import run_wrapper
Expand All @@ -8,6 +6,5 @@


def run(dest_dir: str) -> None:
garden_dataset = paths.garden_dataset
dataset = catalog.Dataset.create_empty(dest_dir, garden_dataset.metadata)
run_wrapper(garden_dataset=garden_dataset, dataset=dataset)
ds_garden = paths.load_dataset("gbd_risk")
run_wrapper(dest_dir, garden_dataset=ds_garden)
30 changes: 21 additions & 9 deletions etl/steps/data/grapher/ihme_gbd/2019/shared.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
from owid.catalog import Dataset
from structlog import get_logger

log = get_logger()
from etl.helpers import create_dataset, grapher_checks


def run_wrapper(garden_dataset: Dataset, dataset: Dataset) -> Dataset:
# add tables to dataset
tables = garden_dataset.table_names
for table in tables:
tab = garden_dataset[table]
dataset.add(tab)
dataset.save()
def run_wrapper(dest_dir: str, garden_dataset: Dataset) -> None:
# Read tables from garden dataset.
tables = []
for table_name in garden_dataset.table_names[:10] + ["diarrheal_diseases__both_sexes__age_standardized"]:
tb = garden_dataset[table_name]
tables.append(tb)

#
# Save outputs.
#
# Create a new grapher dataset with the same metadata as the garden dataset.
ds_grapher = create_dataset(dest_dir, tables=tables, default_metadata=garden_dataset.metadata)

#
# Checks.
#
grapher_checks(ds_grapher)

# Save changes in the new grapher dataset.
ds_grapher.save()

0 comments on commit 0e237ca

Please sign in to comment.