Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

data(cases-deaths): new tool to inject data from the WHO #2783

Merged
merged 12 commits into from
Feb 20, 2023
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,4 @@ secrets.yaml
scripts/docs/_build/
scripts/docs/cowidev/api/

.env*
286,392 changes: 286,392 additions & 0 deletions public/data/cases_deaths/COVID-19 Cases and deaths - WHO.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/biweekly_cases.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/biweekly_cases_per_million.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/biweekly_deaths.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/biweekly_deaths_per_million.csv

Large diffs are not rendered by default.

281,828 changes: 281,828 additions & 0 deletions public/data/cases_deaths/full_data.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/new_cases.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/new_cases_per_million.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/new_deaths.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/new_deaths_per_million.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/total_cases.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/total_cases_per_million.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/total_deaths.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/total_deaths_per_million.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/weekly_cases.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/weekly_cases_per_million.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/weekly_deaths.csv

Large diffs are not rendered by default.

1,142 changes: 1,142 additions & 0 deletions public/data/cases_deaths/weekly_deaths_per_million.csv

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2023-02-17T16:01:54
239 changes: 239 additions & 0 deletions scripts/input/who/cases_deaths.countries_mapping.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
{
"Afghanistan": "Afghanistan",
"Albania": "Albania",
"Algeria": "Algeria",
"American Samoa": "American Samoa",
"Andorra": "Andorra",
"Angola": "Angola",
"Anguilla": "Anguilla",
"Antigua and Barbuda": "Antigua and Barbuda",
"Argentina": "Argentina",
"Armenia": "Armenia",
"Aruba": "Aruba",
"Australia": "Australia",
"Austria": "Austria",
"Azerbaijan": "Azerbaijan",
"Bahamas": "Bahamas",
"Bahrain": "Bahrain",
"Bangladesh": "Bangladesh",
"Barbados": "Barbados",
"Belarus": "Belarus",
"Belgium": "Belgium",
"Belize": "Belize",
"Benin": "Benin",
"Bermuda": "Bermuda",
"Bhutan": "Bhutan",
"Bolivia (Plurinational State of)": "Bolivia",
"Bosnia and Herzegovina": "Bosnia and Herzegovina",
"Botswana": "Botswana",
"Brazil": "Brazil",
"British Virgin Islands": "British Virgin Islands",
"Brunei Darussalam": "Brunei",
"Bulgaria": "Bulgaria",
"Burkina Faso": "Burkina Faso",
"Burundi": "Burundi",
"Cabo Verde": "Cape Verde",
"Cambodia": "Cambodia",
"Cameroon": "Cameroon",
"Canada": "Canada",
"Cayman Islands": "Cayman Islands",
"Central African Republic": "Central African Republic",
"Chad": "Chad",
"Chile": "Chile",
"China": "China",
"Colombia": "Colombia",
"Comoros": "Comoros",
"Congo": "Congo",
"Cook Islands": "Cook Islands",
"Costa Rica": "Costa Rica",
"Croatia": "Croatia",
"Cuba": "Cuba",
"Cura\u00e7ao": "Curacao",
"Cyprus": "Cyprus",
"Czechia": "Czechia",
"C\u00f4te d\u2019Ivoire": "Cote d'Ivoire",
"Democratic People's Republic of Korea": "North Korea",
"Democratic Republic of the Congo": "Democratic Republic of Congo",
"Denmark": "Denmark",
"Djibouti": "Djibouti",
"Dominica": "Dominica",
"Dominican Republic": "Dominican Republic",
"Ecuador": "Ecuador",
"Egypt": "Egypt",
"El Salvador": "El Salvador",
"Equatorial Guinea": "Equatorial Guinea",
"Eritrea": "Eritrea",
"Estonia": "Estonia",
"Eswatini": "Eswatini",
"Ethiopia": "Ethiopia",
"Falkland Islands (Malvinas)": "Falkland Islands",
"Faroe Islands": "Faeroe Islands",
"Fiji": "Fiji",
"Finland": "Finland",
"France": "France",
"French Guiana": "French Guiana",
"French Polynesia": "French Polynesia",
"Gabon": "Gabon",
"Gambia": "Gambia",
"Georgia": "Georgia",
"Germany": "Germany",
"Ghana": "Ghana",
"Gibraltar": "Gibraltar",
"Greece": "Greece",
"Greenland": "Greenland",
"Grenada": "Grenada",
"Guadeloupe": "Guadeloupe",
"Guam": "Guam",
"Guatemala": "Guatemala",
"Guernsey": "Guernsey",
"Guinea": "Guinea",
"Guinea-Bissau": "Guinea-Bissau",
"Guyana": "Guyana",
"Haiti": "Haiti",
"Holy See": "Vatican",
"Honduras": "Honduras",
"Hungary": "Hungary",
"Iceland": "Iceland",
"India": "India",
"Indonesia": "Indonesia",
"Iran (Islamic Republic of)": "Iran",
"Iraq": "Iraq",
"Ireland": "Ireland",
"Isle of Man": "Isle of Man",
"Israel": "Israel",
"Italy": "Italy",
"Jamaica": "Jamaica",
"Japan": "Japan",
"Jersey": "Jersey",
"Jordan": "Jordan",
"Kazakhstan": "Kazakhstan",
"Kenya": "Kenya",
"Kiribati": "Kiribati",
"Kuwait": "Kuwait",
"Kyrgyzstan": "Kyrgyzstan",
"Lao People's Democratic Republic": "Laos",
"Latvia": "Latvia",
"Lebanon": "Lebanon",
"Lesotho": "Lesotho",
"Liberia": "Liberia",
"Libya": "Libya",
"Liechtenstein": "Liechtenstein",
"Lithuania": "Lithuania",
"Luxembourg": "Luxembourg",
"Madagascar": "Madagascar",
"Malawi": "Malawi",
"Malaysia": "Malaysia",
"Maldives": "Maldives",
"Mali": "Mali",
"Malta": "Malta",
"Marshall Islands": "Marshall Islands",
"Martinique": "Martinique",
"Mauritania": "Mauritania",
"Mauritius": "Mauritius",
"Mayotte": "Mayotte",
"Mexico": "Mexico",
"Micronesia (Federated States of)": "Micronesia (country)",
"Monaco": "Monaco",
"Mongolia": "Mongolia",
"Montenegro": "Montenegro",
"Montserrat": "Montserrat",
"Morocco": "Morocco",
"Mozambique": "Mozambique",
"Myanmar": "Myanmar",
"Namibia": "Namibia",
"Nauru": "Nauru",
"Nepal": "Nepal",
"Netherlands": "Netherlands",
"New Caledonia": "New Caledonia",
"New Zealand": "New Zealand",
"Nicaragua": "Nicaragua",
"Niger": "Niger",
"Nigeria": "Nigeria",
"Niue": "Niue",
"North Macedonia": "North Macedonia",
"Norway": "Norway",
"Oman": "Oman",
"Pakistan": "Pakistan",
"Palau": "Palau",
"Panama": "Panama",
"Papua New Guinea": "Papua New Guinea",
"Paraguay": "Paraguay",
"Peru": "Peru",
"Philippines": "Philippines",
"Poland": "Poland",
"Portugal": "Portugal",
"Puerto Rico": "Puerto Rico",
"Qatar": "Qatar",
"Republic of Korea": "South Korea",
"Republic of Moldova": "Moldova",
"Romania": "Romania",
"Russian Federation": "Russia",
"Rwanda": "Rwanda",
"R\u00e9union": "Reunion",
"Saint Barth\u00e9lemy": "Saint Barthelemy",
"Saint Helena, Ascension and Tristan da Cunha": "Saint Helena",
"Saint Kitts and Nevis": "Saint Kitts and Nevis",
"Saint Lucia": "Saint Lucia",
"Saint Pierre and Miquelon": "Saint Pierre and Miquelon",
"Saint Vincent and the Grenadines": "Saint Vincent and the Grenadines",
"Samoa": "Samoa",
"San Marino": "San Marino",
"Sao Tome and Principe": "Sao Tome and Principe",
"Saudi Arabia": "Saudi Arabia",
"Senegal": "Senegal",
"Serbia": "Serbia",
"Seychelles": "Seychelles",
"Sierra Leone": "Sierra Leone",
"Singapore": "Singapore",
"Slovakia": "Slovakia",
"Slovenia": "Slovenia",
"Solomon Islands": "Solomon Islands",
"Somalia": "Somalia",
"South Africa": "South Africa",
"South Sudan": "South Sudan",
"Spain": "Spain",
"Sri Lanka": "Sri Lanka",
"Sudan": "Sudan",
"Suriname": "Suriname",
"Sweden": "Sweden",
"Switzerland": "Switzerland",
"Syrian Arab Republic": "Syria",
"Tajikistan": "Tajikistan",
"Thailand": "Thailand",
"Timor-Leste": "Timor",
"Togo": "Togo",
"Tokelau": "Tokelau",
"Tonga": "Tonga",
"Trinidad and Tobago": "Trinidad and Tobago",
"Tunisia": "Tunisia",
"Turkmenistan": "Turkmenistan",
"Turks and Caicos Islands": "Turks and Caicos Islands",
"Tuvalu": "Tuvalu",
"Uganda": "Uganda",
"Ukraine": "Ukraine",
"United Arab Emirates": "United Arab Emirates",
"United Republic of Tanzania": "Tanzania",
"United States Virgin Islands": "United States Virgin Islands",
"United States of America": "United States",
"Uruguay": "Uruguay",
"Uzbekistan": "Uzbekistan",
"Vanuatu": "Vanuatu",
"Venezuela (Bolivarian Republic of)": "Venezuela",
"Viet Nam": "Vietnam",
"Wallis and Futuna": "Wallis and Futuna",
"Yemen": "Yemen",
"Zambia": "Zambia",
"Zimbabwe": "Zimbabwe",
"Kosovo[1]": "Kosovo",
"Northern Mariana Islands (Commonwealth of the)": "Northern Mariana Islands",
"Pitcairn Islands": "Pitcairn",
"Saint Martin": "Saint Martin (French part)",
"Sint Maarten": "Sint Maarten (Dutch part)",
"The United Kingdom": "United Kingdom",
"T\u00fcrkiye": "Turkey",
"occupied Palestinian territory, including east Jerusalem": "Palestine",
"Sint Eustatius": "Sint Eustatius",
"Bonaire": "Bonaire",
"Saba" : "Saba",
"Other" : "Other"
}
12 changes: 11 additions & 1 deletion scripts/scripts/autoupdate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ git reset --hard origin/master && git pull


# =====================================================================
# JHU
# Cases & Deaths

# Attempt to download JHU CSVs
cowid jhu get
Expand All @@ -65,6 +65,16 @@ else
echo "JHU export is up to date"
fi

hour=$(date +%H)
if [ $hour == 00 ] || [ $hour == 6 || [ $hour == 12 || [ $hour == 18] ; then
echo "Generating Case/Death files..."
cowid --server casedeath generate
# python $SCRIPTS_DIR/scripts/jhu.py --skip-download
git_push "case-death"
else
echo "Case/Death export is up to date"
fi

# =====================================================================
# Decoupling charts
hour=$(date +%H)
Expand Down
1 change: 1 addition & 0 deletions scripts/scripts/grapherupdate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ cowidev-grapher-db
minute=$(date +%M)
if [ $minute == 40 ] ; then
cowid --server jhu grapher-db
cowid --server casedeath grapher-db
cowid --server decoupling grapher-db
cowid --server hosp grapher-db
# cowid --server gmobility grapher-db
Expand Down
1 change: 1 addition & 0 deletions scripts/src/cowidev/cases_deaths/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Module for cases and deaths (to replace JHU)"""
113 changes: 113 additions & 0 deletions scripts/src/cowidev/cases_deaths/extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import json
import pandas as pd

from cowidev import PATHS
from cowidev.cases_deaths.params import API


URL = "https://covid19.who.int/WHO-COVID-19-global-data.csv"


def load_data(server_mode):
"""Load JHU data"""
# Load data
try:
df = pd.read_csv(URL)
except Exception:
if server_mode:
API.send_error(
channel="#corona-data-updates",
title="Cases/Deaths: File not found in source!",
message="Could not load data from WHO!",
)
raise ValueError("Could not load data from WHO.")
# Process data
df = process_data(df, API, server_mode)
return df


def process_data(df: pd.DataFrame, API, server_mode):
# Clean column names, column and row ordering, etc.
df = format_table(df)
# Remove zero-values
# df = df[(df["total_cases"] > 0) | (df["total_deaths"] > 0)]
# Harmonize country names
df = harmonize_country_names(df, API, server_mode)
# Handle country-specific issues
df = handle_country_issues(df)
return df


def format_table(df: pd.DataFrame) -> pd.DataFrame:
"""Rename columns, sort columns and rows, etc."""
column_renaming = {
"Country": "location",
"Date_reported": "date",
"New_cases": "new_cases",
"Cumulative_cases": "total_cases",
"New_deaths": "new_deaths",
"Cumulative_deaths": "total_deaths",
}
# Rename columns
df = df.rename(columns=column_renaming)
# Sort columns and rows
df = df[column_renaming.values()].sort_values(["location", "date"])
return df


def harmonize_country_names(df: pd.DataFrame, api, server_mode) -> pd.DataFrame:
"""Harmonize country names with OWID's standard names."""
# Load country name mapping
with open(PATHS.INTERNAL_INPUT_WHO_CASES_DEATHS_COUNTRY_STD_FILE, "r") as f:
dix = json.load(f)
# Check missing / unexpected countries
countries_received = set(df["location"])
countries_expected = set(dix.keys())
if countries_missing := countries_expected - countries_received:
if server_mode:
api.send_error(
channel="#corona-data-updates",
title="Cases/Deaths: Missing countries!",
message=f"There were missing countries in source: {countries_missing}",
)
raise ValueError(f"Missing countries: {countries_missing}")
if countries_unexpected := countries_received - countries_expected:
if server_mode:
api.send_error(
channel="#corona-data-updates",
title="Cases/Deaths: Unexpected countries!",
message=f"There were unexpected countries in source: {countries_unexpected}",
)
raise ValueError(f"Unexpected countries: {countries_unexpected}")
# Harmonize country names
df["location"] = df["location"].map(dix)
assert not df["location"].isnull().any(), "There are still missing countries!"
return df


def handle_country_issues(df: pd.DataFrame) -> pd.DataFrame:
"""Handles some country-specific issues.

Example: "Bonaire, Sint Eustatius and Saba" does not come as a country, but as its individual entities.
"""
# Remove 'Others'
df = df[df["location"] != "Others"]
# Estimate "Bonaire, Sint Eustatius and Saba" as the sum of its individual entities
countries = ["Sint Eustatius", "Bonaire", "Saba"]
df.loc[df["location"].isin(countries), "location"] = "Bonaire, Sint Eustatius and Saba"
df = df.groupby(["location", "date"], as_index=False).sum()
return df


def check_data_correctness(df, logger, server_mode):
"""Check that everything is alright in df"""
# Check for duplicate rows
if df.duplicated(subset=["date", "location"]).any():
if server_mode:
API.send_warning(
channel="#corona-data-updates",
title="JHU: Duplicate rows!",
message=f"Found duplicate rows in the JHU dataset: {df[df.duplicated(subset=['date', 'location'])]}",
)
print_err("\n" + ERROR + " Found duplicate rows:")
print_err(df[df.duplicated(subset=["date", "location"])])
Loading