Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2712 api parameterisation #2724

Merged
merged 5 commits into from
Jun 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/R/R/cases_cachefile.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cases_cachefile <-
function(server = GDH_URL, folder="cache", ...) {
function(server, folder="cache", ...) {
return(paste0(folder, "/",
sha256(paste0(stringify_filters(...), "|", server)), ".csv"))
}
1 change: 0 additions & 1 deletion api/R/R/constants.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
GDH_URL <- "https://data.covid-19.global.health"
FILTERS <- c("country")
downloadAsync <- "/api/cases/downloadAsync"
3 changes: 2 additions & 1 deletion api/R/R/get_cached_cases.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
get_cached_cases <-
function(apikey, server = GDH_URL, refresh = FALSE, folder = "cache", ...) {
function(apikey, disease = 'covid-19', environment = 'production', refresh = FALSE, folder = "cache", ...) {
if(!file.exists(folder)) {
dir.create(folder)
}
server <- get_server(disease, environment)
cachename <- cases_cachefile(server, folder, ...)
if(!refresh && file.exists(cachename)) {
return(read_csv(cachename))
Expand Down
3 changes: 2 additions & 1 deletion api/R/R/get_cases.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
get_cases <-
function(apikey, server = GDH_URL, ...) {
function(apikey, disease = 'covid-19', environment = 'production', ...) {
server <- get_server(disease, environment)
data <- sprintf('{"format": "csv", "query": "%s"}', trimws(stringify_filters(...)))
res <- POST(paste0(server, downloadAsync), body = data,
add_headers("Content-Type" = "application/json",
Expand Down
12 changes: 12 additions & 0 deletions api/R/R/get_server.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
get_server <-
function(disease = DISEASE, environment = ENVIRONMENT) {
server_from_environment <- Sys.getenv('GDH_URL')
if (server_from_environment != "") {
return(server_from_environment)
}
if (environment == 'production') {
return(sprintf('https://data.%s.global.health', disease))
} else {
return(sprintf('https://%s-data.%s.global.health'), environment, disease)
}
}
13 changes: 8 additions & 5 deletions api/R/man/get_cached_cases.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,19 @@ Unless you always need to be working with the latest data, we recommend
using this function instead of \code{\link{get_cases}}.
}
\usage{
get_cached_cases(apikey, server = GDH_URL, refresh = FALSE, folder = "cache", \dots)
get_cached_cases(apikey, disease = 'covid-19', environment = 'production', refresh = FALSE, folder = "cache", \dots)
}
\arguments{
\item{apikey}{
Put your Global.health API key here. You can get a free API key by signing
up at \url{https://data.covid-19.global.health} and clicking on Profile.
up at \url{https://data.covid-19.global.health} (or another server instance)
and clicking on Profile.
}
\item{server}{
If you are using a self-hosted version of Global.health, put the URL to it
here. This is not needed if you are using the main instance.
\item{disease}{
Identify the outbreak for which you want to fetch data; the default is COVID-19.
}
\item{environment}{
Specify the environment whose data you want to use: default is production, alternatives are dev or qa.
}
\item{refresh}{
Optional, if set to TRUE, refreshes the cache and returns the latest version of
Expand Down
10 changes: 6 additions & 4 deletions api/R/man/get_cases.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@ Unless you always need to be working with the latest data, we recommend
using \code{\link{get_cached_cases}} instead of this function.
}
\usage{
get_cases(apikey, server = GDH_URL, \dots)
get_cases(apikey, disease = 'covid-19', environment = 'production', \dots)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{apikey}{
Put your Global.health API key here. You can get a free API key by signing
up at \url{https://data.covid-19.global.health} and clicking on Profile.
}
\item{server}{
If you are using a self-hosted version of Global.health, put the URL to it
here. This is not needed if you are using the main instance.
\item{disease}{
Identify the outbreak for which you want to fetch data; the default is COVID-19.
}
\item{environment}{
Specify the environment whose data you want to use: default is production, alternatives are dev or qa.
}
\item{\dots}{
Filters to use for data. Currently the only supported filter is 'country'
Expand Down
16 changes: 9 additions & 7 deletions api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ which describes the columns in the downloaded data.
To download data using one of our supported languages, you first need an API
key. Here are the steps to get one:

1. Login to the database at https://data.covid-19.global.health
1. Go to your profile at https://data.covid-19.global.health/profile
1. Login to the database at https://data.covid-19.global.health (or the instance pertaining to the outbreak you're interested in)
1. Go to your profile; click your user icon in the top-right corner, then "Profile" in the menu
1. Click Reset API Key (you only need to do this once)
1. Copy the API key

Expand Down Expand Up @@ -70,9 +70,10 @@ You can also use R's in-built `help()` to access the documentation.
Download the [gdh.py](python/gdh.py) script to a folder, then:

```python
from gdh import get_cases
from gdh import GlobalDotHealth
key = "API KEY HERE"
c1 = get_cases(key, country="NZ")
covid_line_list = GlobalDotHealth(key, 'covid-19')
c1 = covid_line_list.get_cases(country="NZ")
```

This will download the New Zealand case data from the database. Re-downloading
Expand All @@ -81,11 +82,12 @@ is a `get_cached_cases()` function which caches the data download which can be
used in later calls:

```python
from gdh import get_cases
from gdh import GlobalDotHealth
key = "API KEY HERE"
c1 = get_cached_cases(key, country="NZ")
covid_line_list = GlobalDotHealth(key, 'covid-19')
c1 = covid_line_list.get_cached_cases(country="NZ")
# use refresh=True to update the cache
c1 = get_cached_cases(key, country="NZ", refresh=True)
c1 = covid_line_list.get_cached_cases(country="NZ", refresh=True)
```

[httr]: https://httr.r-lib.org/
Expand Down
82 changes: 49 additions & 33 deletions api/python/gdh.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
# pip install requests pandas

# Example invocations:
# >>> from gdh import get_cases
# >>> from gdh import GlobalDotHealth
# >>> key = "API KEY HERE"
# >>> c1 = get_cases(key, country="New Zealand")
# >>> c19 = GlobalDotHealth(key, 'covid-19')
# >>> c1 = c19.get_cases(country="New Zealand")
#
# Use get_cached_cases() to cache the cases locally. This is useful for
# rerunning the script which will then use the cached version.
Expand All @@ -19,15 +20,18 @@
import requests
import pandas as pd

GDH_URL_DEFAULT = "https://data.covid-19.global.health"
GDH_URL = os.getenv("GDH_URL", GDH_URL_DEFAULT)
downloadAsync = "/api/cases/downloadAsync"

# sync with data-serving/data-service/src/util/search.ts
FILTERS = {
"country"
}

ENVIRONMENTS = {
"production",
"dev",
"qa"
}

def stringify_filters(**kwargs):
if not kwargs:
Expand All @@ -47,37 +51,49 @@ def stringify_filters(**kwargs):
return " ".join(f"{field}:{value}" for field, value in kwargs.items())


def get_cases(apikey, server=GDH_URL, **kwargs):
res = requests.post(
f"{server}{downloadAsync}",
data=json.dumps({"format": "csv", "query": stringify_filters(**kwargs)}),
headers={"Content-Type": "application/json", "X-API-Key": apikey},
)
if res.status_code != 200:
raise ConnectionError(res.text)
if "signedUrl" in res.text:
# country-export returns a gzip compressed file
signedUrl = json.loads(res.text)["signedUrl"]
return pd.read_csv(signedUrl, compression="gzip")
else:
with io.StringIO(res.text) as buf:
return pd.read_csv(buf)


def cases_cachefile(server=GDH_URL, folder="cache", **kwargs):
def cases_cachefile(server, folder="cache", **kwargs):
to_hash = f"{stringify_filters(**kwargs)}|{server}"
sha256 = hashlib.sha256(to_hash.encode("utf-8")).hexdigest()
return f"{folder}/{sha256}.csv"


# Returns a cached copy of cases if it exists, otherwise saves to cache
def get_cached_cases(apikey, server=GDH_URL, refresh=False, folder="cache", **kwargs):
if not os.path.exists(folder):
os.mkdir(folder)
cachename = cases_cachefile(server, folder, **kwargs)
if not refresh and os.path.exists(cachename):
return pd.read_csv(cachename)
else:
df = get_cases(apikey, server, **kwargs)
df.to_csv(cachename, index=False)
return df
class GlobalDotHealth:
def __init__(self, apikey, disease='covid-19', environment = 'production'):
# Let someone override our URL explicitly
if server := os.getenv('GDH_URL'):
self.server = server
else:
# Otherwise, build a URL from the disease name/environment
if environment == 'production':
self.server = f'https://data.{disease}.global.health'
else:
self.server = f'https://{environment}-data.{disease}.global.health'
self.apikey = apikey

def get_cases(self, **kwargs):
res = requests.post(
f"{self.server}{downloadAsync}",
data=json.dumps({"format": "csv", "query": stringify_filters(**kwargs)}),
headers={"Content-Type": "application/json", "X-API-Key": self.apikey},
)
if res.status_code != 200:
raise ConnectionError(res.text)
if "signedUrl" in res.text:
# country-export returns a gzip compressed file
signedUrl = json.loads(res.text)["signedUrl"]
return pd.read_csv(signedUrl, compression="gzip")
else:
with io.StringIO(res.text) as buf:
return pd.read_csv(buf)

# Returns a cached copy of cases if it exists, otherwise saves to cache
def get_cached_cases(refresh=False, folder="cache", **kwargs):
if not os.path.exists(folder):
os.mkdir(folder)
cachename = cases_cachefile(self.server, folder, **kwargs)
if not refresh and os.path.exists(cachename):
return pd.read_csv(cachename)
else:
df = self.get_cases(**kwargs)
df.to_csv(cachename, index=False)
return df
2 changes: 1 addition & 1 deletion api/python/test_gdh.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ def test_stringify_filters(source, expected):

def test_cases_cachefile():
assert (
cases_cachefile(country="Belgium")
cases_cachefile('https://data.covid-19.global.health', country="Belgium")
== "cache/e6ee72213b1c28500279d56c119fb9eccb2d5c67b0b6167ca241980a3bfc7762.csv"
)