forked from mariano22/covid19_dashboard
-
Notifications
You must be signed in to change notification settings - Fork 0
/
backend.py
125 lines (108 loc) · 4.79 KB
/
backend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import pandas as pd
import numpy as np
import time
import os
import datetime
def normalize_str(s):
""" Function for name normalization (handle áéíóú). """
return unicodedata.normalize("NFKD", s).encode("ascii","ignore").decode("ascii").upper()
CSVS_TO_DOWNLOAD = {
'Argentina_Provinces.csv': 'https://raw.githubusercontent.com/mariano22/argcovidapi/master/csvs/Argentina_Provinces.csv',
'SantaFe_AllData.csv': 'https://raw.githubusercontent.com/mariano22/argcovidapi/master/csvs/SantaFe_AllData.csv',
}
DATA_DIR = './data/'
def _download_expired_data():
for csv_fn, csv_remote_fp in CSVS_TO_DOWNLOAD.items():
csv_fp = os.path.join(DATA_DIR, csv_fn)
if (not os.path.isfile(csv_fp)) or (time.time()-os.stat(csv_fp).st_mtime>30*60):
print('Downloading',csv_fn)
pd.read_csv(csv_remote_fp).to_csv(csv_fp,index=False)
def _load_National_data(csv_fp):
df_arg = pd.read_csv(csv_fp)
df_arg['LOCATION'] = 'ARGENTINA/' + df_arg['PROVINCIA']
df_arg = df_arg.drop(columns=['PROVINCIA'])
df_arg = df_arg.set_index(['TYPE','LOCATION'])
df_arg = df_arg.rename(columns=lambda colname: pd.to_datetime(colname,format='%d/%m').replace(year=2020))
total_arg = df_arg.groupby(level=[0]).sum()
total_arg['LOCATION']='ARGENTINA'
total_arg = total_arg.reset_index().set_index(['TYPE','LOCATION'])
df_arg = pd.concat([df_arg,total_arg]).sort_index()
return df_arg
def _set_location_safe(row):
location_prefix = 'ARGENTINA/SANTA FE'
if row['DEPARTMENT']=='##TOTAL':
return location_prefix
location_prefix += '/'+row['DEPARTMENT'][3:]
if row['PLACE'].startswith('#'):
return location_prefix
return location_prefix +'/'+ row['PLACE']
def _load_SantaFe_data(csv_fp):
df_safe = pd.read_csv(csv_fp)
df_safe['LOCATION'] = df_safe.apply(_set_location_safe, axis=1)
df_safe = df_safe[ (df_safe['TYPE']=='CONFIRMADOS') & (df_safe['DEPARTMENT']!='##TOTAL') ]
df_safe = df_safe.drop(columns=['DEPARTMENT', 'PLACE'])
df_safe = df_safe.set_index(['TYPE','LOCATION'])
df_safe = df_safe.rename(columns=lambda colname: pd.to_datetime(colname,format='%d/%m/%Y'))
return df_safe
def _load_data_time_series():
df_arg = _load_National_data(os.path.join(DATA_DIR, 'Argentina_Provinces.csv'))
df_safe = _load_SantaFe_data(os.path.join(DATA_DIR, 'SantaFe_AllData.csv'))
df = pd.concat([df_arg,df_safe])
# Non described dates are 0's
df = df.fillna(0).sort_index()
df_diff = df.apply(lambda r : pd.concat([pd.Series([0]), r]).diff().drop(0),axis=1)
df_diff.index = df_diff.index.map(lambda x : (x[0]+'_DIFF',x[1]) )
df = pd.concat([df,df_diff]).sort_index()
return df
def _only_povs(df):
df = df[ df['LOCATION'].apply(lambda l : l.count('/')==1) ].copy()
df['LOCATION'] = df['LOCATION'].apply(lambda l : l[10:])
return df
def _soon_deprecated_data(df_time_series, df_info):
df_time_series=_only_povs(df_time_series)
df_info=_only_povs(df_info)
df_time_series['2020-03-02 00:00:00']=0.0
df = pd.melt(df_time_series, id_vars=['TYPE','LOCATION'], value_vars=df_time_series.columns[2:], var_name='date')
df = df[ df['TYPE'].apply(lambda t: t in ['ACTIVOS','CONFIRMADOS','MUERTOS','RECUPERADOS']) ]
df['TYPE'] = df['TYPE'].replace({
'ACTIVOS': 'active',
'CONFIRMADOS': 'confirmed',
'MUERTOS': 'deceased',
'RECUPERADOS': 'recovered',
})
df = pd.merge(df,df_info,on='LOCATION')
df['Province/State']=df['LOCATION']
df = df.rename(columns={
'TYPE':'var',
'LAT':'Lat',
'LONG':'Long',
'LOCATION':'Country/Region',
'POPULATION': 'population',
})
df = df[ [ 'date', 'Country/Region', 'Province/State', 'var', 'value', 'Lat', 'Long', 'population' ] ]
df = df.sort_values(by=['Country/Region','date','var'])
df['value_new'] = df['value'].diff(4)
df = df.sort_values(by=['date', 'Country/Region','var'])
df = df[df['date']!='2020-03-02']
return df
def _calculate_global_status():
df_time_series =_load_data_time_series().reset_index()
df_geoinfo = pd.read_csv(os.path.join(DATA_DIR, 'info_arg.csv'))
return {
'timestamp': datetime.datetime.today().strftime('%Y-%m-%d-%H:%M:%S'),
'geoinfo': df_geoinfo,
'time_series': df_time_series,
'soon_deprecated': _soon_deprecated_data(df_time_series, df_geoinfo)
}
_global_status = None
def backend_update_data():
global _global_status
print("Updating backend...")
_download_expired_data()
_global_status = _calculate_global_status()
def backend_global_status_getter(field):
global _global_status
return _global_status[field]
def backend_data_at_date(date):
global _global_status
return _global_status['time_series'][date].swaplevel(0,1).unstack()