famines: additional indicators for Bastian's static chart (#3846)

owid · Jan 15, 2025 · d83d85b · d83d85b
1 parent 257df04
commit d83d85b
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 1 deletion.
diff --git a/etl/steps/data/garden/wpf/2024-10-03/famines_by_regime_gdp.meta.yml b/etl/steps/data/garden/wpf/2024-10-03/famines_by_regime_gdp.meta.yml
@@ -32,4 +32,13 @@ tables:
           description_processing: When GDP data was missing for certain years but nearby estimates were available, we used the closest or most relevant data. For example, during Cuba's famine from 1895 to 1898, we used the GDP from 1892. For China’s famine from 1876 to 1879, we used the average GDP from 1870 to 1887. In special cases like Russia and Kazakhstan (1932 to 1934), we used the USSR’s average GDP from 1940 to 1946. For recent gaps, we used 2022 data for countries like the Central African Republic, Ethiopia, and Syria.
 
        regime_redux_row_owid:
-        description_processing: Closed and electoral autocracies here were treated as “autocracies”.
+        description_processing: Closed and electoral autocracies here were treated as “autocracies”. The regime classification is 0 for autocracies and 1 for democracies.
+
+       midpoint_year:
+          title: Midpoint year of famine
+          unit: ''
+          description_processing: The midpoint year of the famine is the year halfway between the start and end of the famine.
+
+       country_name:
+          title: Country
+          unit: ''
diff --git a/etl/steps/data/garden/wpf/2024-10-03/famines_by_regime_gdp.py b/etl/steps/data/garden/wpf/2024-10-03/famines_by_regime_gdp.py
@@ -1,5 +1,7 @@
 """Load a meadow dataset and create a garden dataset."""
 
+import re
+
 import owid.catalog.processing as pr
 from owid.catalog import Dataset, Table
 
@@ -76,6 +78,15 @@ def run(dest_dir: str) -> None:
     # Add GDP data.
     tb = add_gdp(tb, tb_gdp)
 
+    # Extract the text before the year from the famine_name column
+    tb["country_name"] = tb["famine_name"].apply(lambda x: re.split(r"\s+\d{4}", x)[0])
+
+    # Remove (Hungerplan) from famine_name
+    tb["country_name"] = tb["country_name"].str.replace(r"\s*\(Hungerplan\)", "", regex=True)
+    tb["midpoint_year"] = tb["famine_name"].apply(extract_years)
+
+    tb["regime_redux_row_owid"] = tb["regime_redux_row_owid"].replace({3: 0, 2: 1})
+
     # Drop unused in this dataset columns columns.
     tb = tb.drop(columns=["country", "conflict", "government_policy_overall", "external_factors"])
     tb = tb.format(["famine_name", "year"])
@@ -92,6 +103,18 @@ def run(dest_dir: str) -> None:
     ds_garden.save()
 
 
+def extract_years(famine_name):
+    # Extract start and end years from famine_name and calculate midpoint
+    years = re.findall(r"\d{4}", famine_name)
+    if len(years) == 2:
+        start_year, end_year = map(int, years)
+        return (start_year + end_year) // 2
+    elif len(years) == 1:
+        return int(years[0])
+    else:
+        return None
+
+
 def add_regime(tb_famines: Table, ds_regime: Dataset) -> Table:
     """
     Add regime information to the famines table by merging it with the regime dataset and applying custom regime rules.