From 11ae37e7490a17df907e40a6157eb24b6123d28a Mon Sep 17 00:00:00 2001 From: daniprec Date: Tue, 11 May 2021 16:11:07 +0200 Subject: [PATCH 01/79] Update taxonomia path --- covidnpi/utils/taxonomia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/covidnpi/utils/taxonomia.py b/covidnpi/utils/taxonomia.py index 3fb46ac..69b8aed 100644 --- a/covidnpi/utils/taxonomia.py +++ b/covidnpi/utils/taxonomia.py @@ -1,6 +1,6 @@ import pandas as pd -PATH_TAXONOMIA = "datos_NPI/Taxonomía_07022021.xlsx" +PATH_TAXONOMIA = "datos_NPI/Taxonomía_11052021.xlsx" def read_taxonomia(path_taxonomia: str = PATH_TAXONOMIA) -> pd.DataFrame: From 6db5554f22d0faf346a4e05e508f7fd27cc3cb31 Mon Sep 17 00:00:00 2001 From: daniprec Date: Tue, 11 May 2021 17:34:17 +0200 Subject: [PATCH 02/79] Update score_item, remove unused items --- covidnpi/score/score_items.py | 73 +++++++++-------------------------- 1 file changed, 18 insertions(+), 55 deletions(-) diff --git a/covidnpi/score/score_items.py b/covidnpi/score/score_items.py index acdd965..8831000 100644 --- a/covidnpi/score/score_items.py +++ b/covidnpi/score/score_items.py @@ -24,60 +24,34 @@ def score_items(df: pd.DataFrame): # Deporte interior df_item["DIN_afo"] = df[["AF.1", "AF.2", "AF.5", "AF.12"]].max(axis=1) + df_item["DIN_grupo"] = df[["AF.4", "AF.17"]].max(axis=1) df_item["DIN_pub"] = df[["AF.3", "AF.14", "AF.16"]].max(axis=1) - df_item["DIN_pisc"] = df[["AF.8", "AF.9"]].max(axis=1) # Ceremonias df_item["CER_cult"] = df[["CE.1", "CE.2"]].max(axis=1) df_item["CER_cor"] = df[["CE.1", "CE.7"]].max(axis=1) - df_item["CER_ent"] = np.max( - [df["CE.9"], (0.7 * df["CE.3"] + 0.3 * df["CE.4"]) * (df["CE.9"] == 0)] - ) - df_item["CER_otr"] = np.max( - [df["CE.10"], (0.7 * df["CE.5"] + 0.3 * df["CE.6"]) * (df["CE.10"] == 0)] - ) + df_item["CER_ent_int"] = df["CE.3"] + df_item["CER_ent_ext"] = df["CE.4"] + df_item["CER_otro_int"] = df["CE.5"] + df_item["CER_otro_ext"] = df["CE.5"] # Comercio df_item["COM_afo"] = df[["CO.1", "CO.8"]].max(axis=1) df_item["COM_hor"] = df[["CO.1", "CO.7"]].max(axis=1) df_item["COM_esp"] = df[["CO.1", "CO.2"]].max(axis=1) df_item["COM_fis"] = df[["CO.1", "CO.3"]].max(axis=1) - df_item["COM_cent"] = df[["CO.4", "CO.9"]].max(axis=1) - df_item["COM_cczon"] = df[["CO.4", "CO.5"]].max(axis=1) + df_item["COM_cent"] = df[["CO.1", "CO.4", "CO.9"]].max(axis=1) + df_item["COM_cczon"] = df[["CO.1", "CO.4", "CO.5"]].max(axis=1) df_item["COM_libre"] = df[["CO.1", "CO.6", "CO.10"]].max(axis=1) - # Colegios - for n in ["I", "P", "S", "B"]: - df_item[f"COL_{n}"] = df[[f"ED.1{n}", f"ED.5{n}", f"ED.2{n}"]].max(axis=1) - df_item["COL"] = ( - df_item[["COL_I", "COL_P", "COL_S", "COL_B"]].fillna(0).mean(axis=1) - ) - - # Educacion otra - df_item["EDU_uni"] = df[["ED.1U", "ED.5U", "ED.2U"]].max(axis=1) - df_item["EDU_acad"] = df[["ED.3", "ED.4"]].max(axis=1) - - # Ocio Nocturno - df_item["OCN_afo"] = np.max( - [ - df["ON.1"], - df["ON.2"], - df["ON.4"], - (0.7 * df["ON.5"] + 0.3 * df["ON.6"]) * (df["ON.4"] == 0), - ] - ) - df_item["OCN_mes"] = df[["ON.1", "ON.2", "ON.10"]].max(axis=1) - df_item["OCN_hor"] = df[["ON.1", "ON.2", "ON.8"]].max(axis=1) - df_item["OCN_bai"] = df[["ON.1", "ON.2", "ON.3"]].max(axis=1) - df_item["OCN_ver"] = df["ON.7"] - # Cultura df_item["CUL_mus"] = np.max( [ df["CD.1"], - df["CD.6"], - (0.7 * np.max([df["CD.2"], df["CD.7"]], axis=0) + 0.3 * df["CD.8"]) - * (df["CD.6"] == 0), + ( + 0.5 * np.max([df["CD.2"], df["CD.7"], df["CD.6"]], axis=0) + + 0.5 * np.max([df["CD.8"], df["CD.6"]], axis=0) + ), ] ) df_item["CUL_cin"] = np.max( @@ -92,42 +66,31 @@ def score_items(df: pd.DataFrame): df_item["CUL_zoo"] = df[["CD.16", "CD.15"]].max(axis=1) # Restauración interior - df_item["RIN_bing"] = df[["LA.1", "LA.2"]].max(axis=1) - df_item["RIN_binh"] = df[["LA.1", "LA.3"]].max(axis=1) df_item["RIN_afo"] = df[["RH.1", "RH.2", "RH.3", "RH.7"]].max(axis=1) df_item["RIN_hor"] = df[["RH.1", "RH.2", "RH.3", "RH.5"]].max(axis=1) df_item["RIN_mesa"] = df[["RH.1", "RH.2", "RH.3", "RH.9", "RH.11"]].max(axis=1) # Restauración exterior df_item["REX_afo"] = df[["RH.1", "RH.2", "RH.6"]].max(axis=1) + df_item["REX_hor"] = df[["RH.1", "RH.2", "RH.5"]].max(axis=1) df_item["REX_otr"] = df[["RH.1", "RH.2", "RH.9", "RH.10"]].max(axis=1) # Distancia social df_item["DS_even"] = df[["MV.1", "CD.12", "CD.13"]].max(axis=1) df_item["DS_dom"] = df[["MV.1", "MV.2"]].max(axis=1) - df_item["DS_reun"] = df[["MV.1", "RS.1", "RS.2", "RS.8"]].max(axis=1) + df_item["DS_reun"] = np.max( + [ + df[["MV.1", "RS.1"]].max(axis=1), + df[["RS.2", "RS.3", "RS.8"]].mean(axis=1) * df["RS.1"].isna(), + ] + ) df_item["DS_tran"] = df[["MV.1", "TP.1"]].max(axis=1) - df_item["DS_alc"] = df[["MV.1", "RS.6"]].max(axis=1) # Movilidad df_item["MOV_qued"] = df[["MV.1", "MV.3"]].max(axis=1) df_item["MOV_per"] = df[["MV.1", "MV.4"]].max(axis=1) df_item["MOV_int"] = df[["MV.1", "MV.7"]].max(axis=1) - # Trabajo - df_item["TRA_1"] = df[["TR.1", "TR.2", "TR.3"]].max(axis=1) - df_item["TRA_2"] = np.max( - [ - df["TR.8"], - df["TR.9"], - ( - 0.3 * np.max([df["TR.4"], df["TR.5"]], axis=0) - + 0.7 * np.max([df["TR.6"], df["TR.7"]], axis=0) - ) - * (df["TR.9"] == 0), - ] - ) - # Truncate up to today df_item = df_item[df_item["fecha"] <= dt.datetime.today()] From eda61164f68fb8584d4a70de367154ea6025bc47 Mon Sep 17 00:00:00 2001 From: daniprec Date: Tue, 11 May 2021 17:34:33 +0200 Subject: [PATCH 03/79] Raise KeyError when column name is missing in taxonomia --- covidnpi/utils/taxonomia.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/covidnpi/utils/taxonomia.py b/covidnpi/utils/taxonomia.py index 69b8aed..867298c 100644 --- a/covidnpi/utils/taxonomia.py +++ b/covidnpi/utils/taxonomia.py @@ -111,7 +111,10 @@ def return_item_ponderacion( ) -> pd.DataFrame: taxonomia = read_taxonomia(path_taxonomia) # Fill missing names with "variable" + item count - mask_nan = taxonomia["nombre"].isna() + try: + mask_nan = taxonomia["nombre"].isna() + except KeyError: + raise KeyError("La columna 'nombre' falta en la taxonomia") taxonomia.loc[mask_nan, "nombre"] = ( taxonomia.loc[mask_nan, "variable"].str[:3].str.upper() From 87234a0853e1ed3828604a47ef012454392e7ac9 Mon Sep 17 00:00:00 2001 From: daniprec Date: Tue, 18 May 2021 15:37:15 +0200 Subject: [PATCH 04/79] Update score items: COM_cent, CUL_tor, CUL_zoo --- covidnpi/score/score_items.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/covidnpi/score/score_items.py b/covidnpi/score/score_items.py index 8831000..afe208a 100644 --- a/covidnpi/score/score_items.py +++ b/covidnpi/score/score_items.py @@ -40,7 +40,7 @@ def score_items(df: pd.DataFrame): df_item["COM_hor"] = df[["CO.1", "CO.7"]].max(axis=1) df_item["COM_esp"] = df[["CO.1", "CO.2"]].max(axis=1) df_item["COM_fis"] = df[["CO.1", "CO.3"]].max(axis=1) - df_item["COM_cent"] = df[["CO.1", "CO.4", "CO.9"]].max(axis=1) + df_item["COM_cent"] = df[["CO.1", "CO.4", "CO.9", "CO.8"]].max(axis=1) df_item["COM_cczon"] = df[["CO.1", "CO.4", "CO.5"]].max(axis=1) df_item["COM_libre"] = df[["CO.1", "CO.6", "CO.10"]].max(axis=1) @@ -62,8 +62,8 @@ def score_items(df: pd.DataFrame): ] ) df_item["CUL_sal"] = df[["CD.5", "CD.11"]].max(axis=1) - df_item["CUL_tor"] = df[["CD.17", "CD.14"]].max(axis=1) - df_item["CUL_zoo"] = df[["CD.16", "CD.15"]].max(axis=1) + df_item["CUL_tor"] = df[["CD.3", "CD.17", "CD.14"]].max(axis=1) + df_item["CUL_zoo"] = df[["CD.3", "CD.16", "CD.15"]].max(axis=1) # Restauración interior df_item["RIN_afo"] = df[["RH.1", "RH.2", "RH.3", "RH.7"]].max(axis=1) From 4d73e58f31f40160c8a9010fe9454bbab423d91d Mon Sep 17 00:00:00 2001 From: daniprec Date: Tue, 18 May 2021 16:11:54 +0200 Subject: [PATCH 05/79] Add comment about date parsing --- covidnpi/utils/preprocess.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/covidnpi/utils/preprocess.py b/covidnpi/utils/preprocess.py index ca3c59d..068bea9 100644 --- a/covidnpi/utils/preprocess.py +++ b/covidnpi/utils/preprocess.py @@ -129,6 +129,7 @@ def read_npi_data( for sheet in LIST_BASE_SHEET: try: + # Read excel - dates are parsed automatically df = pd.read_excel(path_com, sheet_name=sheet) break except xlrd.biffh.XLRDError: @@ -343,8 +344,8 @@ def format_porcentaje_afectado(df: pd.DataFrame) -> pd.DataFrame: f"Maximo: {df['porcentaje_afectado'].dropna().max()}. Se multiplican por 100" ) df["porcentaje_afectado"] = df["porcentaje_afectado"] * 100 - elif df["porcentaje_afectado"].dropna().min() < 1: - list_idx = df.query("porcentaje_afectado < 1").index + elif 0 < df["porcentaje_afectado"].dropna().min() < 1: + list_idx = df.query("0 < porcentaje_afectado < 1").index raise_value_warning(df, list_idx, "porcentaje_afectado") # Round to one decimal new_col = df["porcentaje_afectado"].astype(float).round(1) From 5200a7001a271403f3e7dd4c96ee57b7ba1fd030 Mon Sep 17 00:00:00 2001 From: daniprec Date: Tue, 18 May 2021 16:29:50 +0200 Subject: [PATCH 06/79] preprocess format_hora can take ranges --- covidnpi/utils/preprocess.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/covidnpi/utils/preprocess.py b/covidnpi/utils/preprocess.py index 068bea9..faa1f24 100644 --- a/covidnpi/utils/preprocess.py +++ b/covidnpi/utils/preprocess.py @@ -282,6 +282,19 @@ def format_hora(df: pd.DataFrame, date_format: str = "%H:%M:%S") -> pd.DataFrame # If "hora" is empty, return original if df["hora"].isnull().all(): return df + # Remove whitespaces from string + df["hora"] = df["hora"].str.replace(" ", "") + # Change ranges HH:MM-HH:MM to last HH:MM + mask_range = ( + df["hora"] + .str.contains( + "^([0-1]?[0-9]|2[0-3]):[0-5][0-9]-([0-1]?[0-9]|2[0-3]):[0-5][0-9]$" + ) + .fillna(False) + ) + df.loc[mask_range, "hora"] = ( + df.loc[mask_range, "hora"].str.split("-").str[-1] + ":00" + ) # Convert to date format try: hora = pd.to_datetime(df["hora"], format=date_format, errors="raise") From 031ee84338052fe81174fc481967063c2ea389a2 Mon Sep 17 00:00:00 2001 From: daniprec Date: Tue, 18 May 2021 16:32:19 +0200 Subject: [PATCH 07/79] Remove option to choose date_format in format_hora --- covidnpi/utils/preprocess.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/covidnpi/utils/preprocess.py b/covidnpi/utils/preprocess.py index faa1f24..73b2644 100644 --- a/covidnpi/utils/preprocess.py +++ b/covidnpi/utils/preprocess.py @@ -275,7 +275,7 @@ def rename_unidad(df, rename: dict = None) -> pd.DataFrame: return df -def format_hora(df: pd.DataFrame, date_format: str = "%H:%M:%S") -> pd.DataFrame: +def format_hora(df: pd.DataFrame) -> pd.DataFrame: """Formats the hora column, to datetime""" # We do not want to modify the original dataframe df = df.copy() @@ -297,10 +297,10 @@ def format_hora(df: pd.DataFrame, date_format: str = "%H:%M:%S") -> pd.DataFrame ) # Convert to date format try: - hora = pd.to_datetime(df["hora"], format=date_format, errors="raise") + hora = pd.to_datetime(df["hora"], format="%H:%M:%S", errors="raise") except (TypeError, ValueError) as e: hora = pd.Series( - pd.to_datetime(df["hora"], format=date_format, errors="coerce") + pd.to_datetime(df["hora"], format="%H:%M:%S", errors="coerce") ) list_idx = df.loc[hora.isna(), "hora"].dropna().index.tolist() # Filtramos aquellos warning que no interesan, From c91950c6c6cdbeeb50c060b42077b62ff6b336a0 Mon Sep 17 00:00:00 2001 From: daniprec Date: Tue, 18 May 2021 17:14:33 +0200 Subject: [PATCH 08/79] Update .gitignore --- config.toml | 234 ---------------------------------------------------- 1 file changed, 234 deletions(-) delete mode 100644 config.toml diff --git a/config.toml b/config.toml deleted file mode 100644 index a8d1d63..0000000 --- a/config.toml +++ /dev/null @@ -1,234 +0,0 @@ -[mongo] -url = "mongodb://zappa.uca.es:27038/" -username = "" -password = "" -database = "" -chunks = 10 -date_min = "21-06-2020" - -[npi] -fillna_date_end = "today" - - -[casos] -movavg = 7 -link = "https://cnecovid.isciii.es/covid19/resources/casos_tecnica_provincia.csv" - - -[postal_to_code] -1 = "VI" -2 = "AB" -3 = "A" -4 = "AL" -5 = "AV" -6 = "BA" -7 = "PM" -8 = "B" -9 = "BU" -10 = "CC" -11 = "CA" -12 = "CS" -13 = "CR" -14 = "CO" -15 = "C" -16 = "CU" -17 = "GI" -18 = "GR" -19 = "GU" -20 = "SS" -21 = "H" -22 = "HU" -23 = "J" -24 = "LE" -25 = "L" -26 = "LO" -27 = "LU" -28 = "M" -29 = "MA" -30 = "MU" -31 = "NA" -32 = "OR" -33 = "O" -34 = "P" -35 = "GC" -36 = "PO" -37 = "SA" -38 = "TF" -39 = "S" -40 = "SG" -41 = "SE" -42 = "SO" -43 = "T" -44 = "TE" -45 = "TO" -46 = "V" -47 = "VA" -48 = "BI" -49 = "ZA" -50 = "Z" -51 = "CE" -52 = "ML" - - -[code_to_provincia] -A = "Alacant" -AB = "Albacete" -AL = "Almería" -AV = "Ávila" -B = "Barcelona" -BA = "Badajoz" -BI = "Bizkaia" -BU = "Burgos" -C = "A Coruña" -CA = "Cádiz" -CC = "Cáceres" -CE = "Ceuta" -CO = "Córdoba" -CR = "Ciudad Real" -CS = "Castelló" -CU = "Cuenca" -GC = "Las Palmas" -GI = "Girona" -GR = "Granada" -GU = "Guadalajara" -H = "Huelva" -HU = "Huesca" -J = "Jaén" -L = "Lleida" -LE = "León" -LO = "La Rioja" -LU = "Lugo" -M = "Madrid" -MA = "Málaga" -ML = "Melilla" -MU = "Murcia" -NA = "Nafarroa" -OR = "Ourense" -O = "Asturias" -P = "Palencia" -PM = "Illes Balears" -PO = "Pontevedra" -SA = "Salamanca" -TF = "Sta. Cruz de Tenerife" -S = "Cantabria" -SG = "Segovia" -SE = "Sevilla" -SO = "Soria" -SS = "Gipuzkoa" -T = "Tarragona" -TE = "Teruel" -TO = "Toledo" -V = "Valéncia" -VA = "Valladolid" -VI = "Álava" -ZA = "Zamora" -Z = "Zaragoza" - -[provincia_to_code] -alava = "VI" -albacete = "AB" -alicante = "A" -almeria = "AL" -avila = "AV" -badajoz = "BA" -mallorca = "PM" -barcelona = "B" -burgos = "BU" -caceres = "CC" -cadiz = "CA" -castellon = "CS" -ceuta = "CE" -ciudad_real = "CR" -cordoba = "CO" -coruna_la = "C" -cuenca = "CU" -girona = "GI" -granada = "GR" -guadalajara = "GU" -guipuzcoa = "SS" -huelva = "H" -huesca = "HU" -jaen = "J" -leon = "LE" -lleida = "L" -rioja_la = "LO" -lugo = "LU" -madrid = "M" -malaga = "MA" -melilla = "ML" -murcia = "MU" -navarra = "NA" -orense = "OR" -asturias = "O" -palencia = "P" -gran_canaria = "GC" -pontevedra = "PO" -salamanca = "SA" -santa_cruz_de_tenerife = "TF" -cantabria = "S" -segovia = "SG" -sevilla = "SE" -soria = "SO" -tarragona = "T" -teruel = "TE" -toledo = "TO" -valencia = "V" -valladolid = "VA" -vizcaya = "BI" -zamora = "ZA" -zaragoza = "Z" - - -[code_to_poblacion] -AB = 388270 -A = 1879888 -AL = 727945 -VI = 333940 -O = 1018784 -AV = 157664 -BA = 672137 -PM = 1171543 -B = 5743402 -BI = 1159443 -BU = 357650 -CC = 391850 -CA = 1244049 -S = 582905 -CS = 585590 -CR = 495045 -CO = 781451 -C = 1121815 -CU = 196139 -SS = 727121 -GI = 781788 -GR = 919168 -GU = 261995 -H = 524278 -HU = 222687 -J = 631381 -LE = 456439 -L = 438517 -LU = 327946 -M = 6779888 -MA = 1685920 -MU = 1511251 -NA = 661197 -OR = 306650 -P = 160321 -GC = 1131065 -PO = 945408 -LO = 319914 -SA = 329245 -TF = 1044887 -SG = 153478 -SE = 1950219 -SO = 88884 -T = 816772 -TE = 134176 -TO = 703772 -V = 2591875 -VA = 520649 -ZA = 170588 -Z = 972528 -CE = 84202 -ML = 87076 \ No newline at end of file From 4d38921e7d8c56dfa6cdc8fa516b79540f2f10b3 Mon Sep 17 00:00:00 2001 From: daniprec Date: Tue, 18 May 2021 17:14:50 +0200 Subject: [PATCH 09/79] Update .gitignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e198bd1..c78ed43 100644 --- a/.gitignore +++ b/.gitignore @@ -135,4 +135,6 @@ dmypy.json # Other directories output/* .idea/* -datos_* \ No newline at end of file +datos_* +/config.toml +/log.out From cd5bf3c370326718b263eb188137110a8aa941e8 Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Fri, 21 May 2021 11:10:10 +0200 Subject: [PATCH 10/79] format_hora fix AttributeError --- covidnpi/utils/preprocess.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/covidnpi/utils/preprocess.py b/covidnpi/utils/preprocess.py index 73b2644..111bea1 100644 --- a/covidnpi/utils/preprocess.py +++ b/covidnpi/utils/preprocess.py @@ -282,19 +282,23 @@ def format_hora(df: pd.DataFrame) -> pd.DataFrame: # If "hora" is empty, return original if df["hora"].isnull().all(): return df - # Remove whitespaces from string - df["hora"] = df["hora"].str.replace(" ", "") - # Change ranges HH:MM-HH:MM to last HH:MM - mask_range = ( - df["hora"] - .str.contains( - "^([0-1]?[0-9]|2[0-3]):[0-5][0-9]-([0-1]?[0-9]|2[0-3]):[0-5][0-9]$" + # The following will only run when the column "hora" is a string + try: + # Remove whitespaces from string + df["hora"] = df["hora"].str.replace(" ", "").astype(str) + # Change ranges HH:MM-HH:MM to last HH:MM + mask_range = ( + df["hora"] + .str.contains( + "^([0-1]?[0-9]|2[0-3]):[0-5][0-9]-([0-1]?[0-9]|2[0-3]):[0-5][0-9]$" + ) + .fillna(False) ) - .fillna(False) - ) - df.loc[mask_range, "hora"] = ( - df.loc[mask_range, "hora"].str.split("-").str[-1] + ":00" - ) + df.loc[mask_range, "hora"] = ( + df.loc[mask_range, "hora"].str.split("-").str[-1] + ":00" + ) + except AttributeError: + pass # Convert to date format try: hora = pd.to_datetime(df["hora"], format="%H:%M:%S", errors="raise") From c9c9e4336cb2a5311f447124328bf9d0db8ee104 Mon Sep 17 00:00:00 2001 From: daniprec Date: Mon, 24 May 2021 13:02:31 +0200 Subject: [PATCH 11/79] Script warns when "provincia" column is empty --- covidnpi/utils/preprocess.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/covidnpi/utils/preprocess.py b/covidnpi/utils/preprocess.py index 111bea1..48d0819 100644 --- a/covidnpi/utils/preprocess.py +++ b/covidnpi/utils/preprocess.py @@ -23,8 +23,10 @@ } DICT_FILL_PROVINCIA = { + "CTB": "cantabria", "CEU": "ceuta", "MEL": "melilla", + "MUR": "murcia", "RIO": "rioja_la", } @@ -169,9 +171,14 @@ def read_npi_data( ) # Algunas provincias no rellenan la columna "provincia", la rellenamos nosotros - for key, value in DICT_FILL_PROVINCIA.items(): - if f"Medidas_{key}" in path_com: - df["provincia"] = df["provincia"].fillna(value) + if df["provincia"].isnull().all(): + for key, value in DICT_FILL_PROVINCIA.items(): + if f"Medidas_{key}" in path_com: + df["provincia"] = df["provincia"].fillna(value) + logger.warning(f"La columna 'provincia' se ha rellenado con '{value}'") + break + else: + logger.warning("La columna 'provincia' no ha sido rellenada") return df @@ -303,9 +310,7 @@ def format_hora(df: pd.DataFrame) -> pd.DataFrame: try: hora = pd.to_datetime(df["hora"], format="%H:%M:%S", errors="raise") except (TypeError, ValueError) as e: - hora = pd.Series( - pd.to_datetime(df["hora"], format="%H:%M:%S", errors="coerce") - ) + hora = pd.Series(pd.to_datetime(df["hora"], format="%H:%M:%S", errors="coerce")) list_idx = df.loc[hora.isna(), "hora"].dropna().index.tolist() # Filtramos aquellos warning que no interesan, # porque son medidas que no aplican la columna "hora" From 3f68a740fb6cb404160249044e8630ef7632ead2 Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 16:06:25 +0200 Subject: [PATCH 12/79] Initialize rho computation script --- covidnpi/utils/rho.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 covidnpi/utils/rho.py diff --git a/covidnpi/utils/rho.py b/covidnpi/utils/rho.py new file mode 100644 index 0000000..478ee82 --- /dev/null +++ b/covidnpi/utils/rho.py @@ -0,0 +1,20 @@ +import numpy as np +import pandas as pd +from covidnpi.utils.series import cumulative_incidence + + +def compute_weight(casos: pd.Series, days: int = 7) -> pd.Series: + acum = cumulative_incidence(casos, days) + weight = days * np.divide(casos, acum) + return weight + + +def compute_normed_incidence( + casos: pd.Series, days: int = 7, num_lag: int = 4 +) -> pd.Series: + weight = compute_weight(casos, days=days) + series_sum = weight + for lag in range(1, num_lag + 1): + series_sum += weight.shift(lag * days) + casos_norm = (num_lag + 1) * np.divide(casos, series_sum) + return casos_norm From 737a34fe51a8aeb3cd356aae0ede9d7876d7b568 Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 16:06:41 +0200 Subject: [PATCH 13/79] Add logging info in casos and mobility scripts --- covidnpi/utils/casos.py | 4 +++- covidnpi/utils/mobility.py | 30 +++++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/covidnpi/utils/casos.py b/covidnpi/utils/casos.py index a2877b9..6dbe50c 100644 --- a/covidnpi/utils/casos.py +++ b/covidnpi/utils/casos.py @@ -5,6 +5,7 @@ import pandas as pd from covidnpi.utils.config import load_config +from covidnpi.utils.log import logger warnings.filterwarnings("ignore", category=RuntimeWarning) @@ -31,12 +32,13 @@ def load_casos_df( Number of cases of COVID by day and province """ + logger.debug("Loading incidence data") casos = pd.read_csv(link, parse_dates=["fecha"], date_parser=_dateparse) # Correct some abbreviations casos["provincia_iso"] = casos["provincia_iso"].replace({"ME": "ML", "NC": "NA"}) - + logger.debug("Done loading incidence data") return casos diff --git a/covidnpi/utils/mobility.py b/covidnpi/utils/mobility.py index 5396ff0..4d2d9bf 100644 --- a/covidnpi/utils/mobility.py +++ b/covidnpi/utils/mobility.py @@ -1,10 +1,12 @@ import os import pandas as pd +import typer from covidnpi.utils.casos import load_casos_df, return_casos_of_provincia_normed from covidnpi.utils.config import load_config from covidnpi.utils.log import logger +from covidnpi.utils.rho import compute_normed_incidence from covidnpi.utils.series import ( cumulative_incidence, compute_growth_rate, @@ -14,7 +16,9 @@ def load_mobility_report( - country: str = "ES", path_csv: str = URL_MOBILITY + country: str = "ES", + path_csv: str = URL_MOBILITY, + chunksize: int = 500000, ) -> pd.DataFrame: """Loads the Google mobility report of a certain country. Adds additional columns: - code : province code @@ -25,6 +29,8 @@ def load_mobility_report( Code of the country to load, by default "ES" path_csv : str, optional Link or path to the mobility report csv + chunksize : int, optional + Rows of data read at once, by default 500000 Returns ------- @@ -32,17 +38,25 @@ def load_mobility_report( Mobility report of given country """ + logger.debug("Loading mobility report") # Process in chunks to not saturate the memory df_list = [] - for chunk in pd.read_csv( - path_csv, parse_dates=["date"], dayfirst=False, chunksize=5e5, low_memory=False - ): + for i, chunk in enumerate(pd.read_csv( + path_csv, + parse_dates=["date"], + dayfirst=False, + chunksize=chunksize, + low_memory=False, + )): df_list += [chunk.query(f"country_region_code == '{country}'")] + logger.debug(f" Loaded chunk {i}") mob = pd.concat(df_list) del df_list + logger.debug("Done loading all chunks. Merged into single dataframe.") # Codes of each province mob["code"] = mob["iso_3166_2_code"].str.replace(f"{country}-", "") + logger.debug("Done loading mobility report") return mob @@ -73,7 +87,7 @@ def return_reports_of_provincia(mob: pd.DataFrame, code: str) -> dict: def mobility_report_to_csv( - path_config: str = "../config.toml", path_output: str = "../output/mobility" + path_config: str = "config.toml", path_output: str = "output/mobility" ): """Stores the Google mobility reports in csv format""" @@ -97,6 +111,8 @@ def mobility_report_to_csv( series_casos = return_casos_of_provincia_normed( casos, code, path_config=path_config ) + casos_norm = compute_normed_incidence(series_casos) + print(casos_norm) series_ia7 = cumulative_incidence(series_casos, 7) series_growth = compute_growth_rate(series_casos, 7) @@ -106,3 +122,7 @@ def mobility_report_to_csv( ) filename = code_to_filename[code] df_store.to_csv(os.path.join(path_output, f"{filename}.csv")) + + +if __name__ == "__main__": + typer.run(mobility_report_to_csv) From cb4288786f042206d24b645d994dbf641292ebdb Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 16:08:36 +0200 Subject: [PATCH 14/79] Update path to config in mobility --- covidnpi/utils/mobility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/covidnpi/utils/mobility.py b/covidnpi/utils/mobility.py index 4d2d9bf..f690ff9 100644 --- a/covidnpi/utils/mobility.py +++ b/covidnpi/utils/mobility.py @@ -87,7 +87,7 @@ def return_reports_of_provincia(mob: pd.DataFrame, code: str) -> dict: def mobility_report_to_csv( - path_config: str = "config.toml", path_output: str = "output/mobility" + path_config: str = "covidnpi/config.toml", path_output: str = "output/mobility" ): """Stores the Google mobility reports in csv format""" From d59d33faffd8dca6a25bc497f5cf62f8d859c460 Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 16:17:07 +0200 Subject: [PATCH 15/79] Rename functions and optimize compute_incidence_normed --- covidnpi/utils/rho.py | 57 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/covidnpi/utils/rho.py b/covidnpi/utils/rho.py index 478ee82..8dc6a4c 100644 --- a/covidnpi/utils/rho.py +++ b/covidnpi/utils/rho.py @@ -3,18 +3,51 @@ from covidnpi.utils.series import cumulative_incidence -def compute_weight(casos: pd.Series, days: int = 7) -> pd.Series: - acum = cumulative_incidence(casos, days) - weight = days * np.divide(casos, acum) - return weight +def compute_incidence_weighted(series_casos: pd.Series, days: int = 7) -> pd.Series: + """Computes weighted incidence: incidence per day divided by the average incidence + during the last days + Parameters + ---------- + series_casos : pandas.Series + COVID incidence per date + days : int, optional + Size of the cumulative sum, by default 7 -def compute_normed_incidence( - casos: pd.Series, days: int = 7, num_lag: int = 4 + Returns + ------- + pandas.Series + COVID incidence per date, normalized by the average incidence of the last days + + """ + acum = cumulative_incidence(series_casos, days) + series_casos_peso = days * np.divide(series_casos, acum) + return series_casos_peso + + +def compute_incidence_normed( + series_casos: pd.Series, days: int = 7, num_lag: int = 4 ) -> pd.Series: - weight = compute_weight(casos, days=days) - series_sum = weight - for lag in range(1, num_lag + 1): - series_sum += weight.shift(lag * days) - casos_norm = (num_lag + 1) * np.divide(casos, series_sum) - return casos_norm + """ + + Parameters + ---------- + series_casos : pandas.Series + COVID incidence per date + days : int, optional + Size of the cumulative sum, by default 7 + num_lag : int, optional + Number of lags to use, by default 4 + + Returns + ------- + pandas.Series + + """ + series_casos_peso = compute_incidence_weighted(series_casos, days=days) + list_series_sum = [ + series_casos_peso.shift(lag * days) for lag in range(num_lag + 1) + ] + series_sum = pd.concat(list_series_sum, axis=1).sum(axis=1) + series_casos_norm = (num_lag + 1) * np.divide(series_casos, series_sum) + return series_casos_norm From a56bfd5b71a56bad54e9e88ed1bb805c7d07e92e Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 16:17:40 +0200 Subject: [PATCH 16/79] Rename functions in mobility --- covidnpi/utils/mobility.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/covidnpi/utils/mobility.py b/covidnpi/utils/mobility.py index f690ff9..ac1444f 100644 --- a/covidnpi/utils/mobility.py +++ b/covidnpi/utils/mobility.py @@ -6,7 +6,7 @@ from covidnpi.utils.casos import load_casos_df, return_casos_of_provincia_normed from covidnpi.utils.config import load_config from covidnpi.utils.log import logger -from covidnpi.utils.rho import compute_normed_incidence +from covidnpi.utils.rho import compute_incidence_normed from covidnpi.utils.series import ( cumulative_incidence, compute_growth_rate, @@ -41,13 +41,15 @@ def load_mobility_report( logger.debug("Loading mobility report") # Process in chunks to not saturate the memory df_list = [] - for i, chunk in enumerate(pd.read_csv( - path_csv, - parse_dates=["date"], - dayfirst=False, - chunksize=chunksize, - low_memory=False, - )): + for i, chunk in enumerate( + pd.read_csv( + path_csv, + parse_dates=["date"], + dayfirst=False, + chunksize=chunksize, + low_memory=False, + ) + ): df_list += [chunk.query(f"country_region_code == '{country}'")] logger.debug(f" Loaded chunk {i}") mob = pd.concat(df_list) @@ -111,8 +113,7 @@ def mobility_report_to_csv( series_casos = return_casos_of_provincia_normed( casos, code, path_config=path_config ) - casos_norm = compute_normed_incidence(series_casos) - print(casos_norm) + print(compute_incidence_normed(series_casos)) series_ia7 = cumulative_incidence(series_casos, 7) series_growth = compute_growth_rate(series_casos, 7) From 991ef4a98ca56c70f544a2eec0cb8236ca67634d Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 16:19:05 +0200 Subject: [PATCH 17/79] Replace sum operations with average --- covidnpi/utils/rho.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/covidnpi/utils/rho.py b/covidnpi/utils/rho.py index 8dc6a4c..cb7c818 100644 --- a/covidnpi/utils/rho.py +++ b/covidnpi/utils/rho.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from covidnpi.utils.series import cumulative_incidence +from covidnpi.utils.series import moving_average def compute_incidence_weighted(series_casos: pd.Series, days: int = 7) -> pd.Series: @@ -20,8 +20,8 @@ def compute_incidence_weighted(series_casos: pd.Series, days: int = 7) -> pd.Ser COVID incidence per date, normalized by the average incidence of the last days """ - acum = cumulative_incidence(series_casos, days) - series_casos_peso = days * np.divide(series_casos, acum) + acum = moving_average(series_casos, days) + series_casos_peso = np.divide(series_casos, acum) return series_casos_peso @@ -48,6 +48,6 @@ def compute_incidence_normed( list_series_sum = [ series_casos_peso.shift(lag * days) for lag in range(num_lag + 1) ] - series_sum = pd.concat(list_series_sum, axis=1).sum(axis=1) - series_casos_norm = (num_lag + 1) * np.divide(series_casos, series_sum) + series_sum = pd.concat(list_series_sum, axis=1).mean(axis=1) + series_casos_norm = np.divide(series_casos, series_sum) return series_casos_norm From ba819ab21b705e67e18b8bac699ccd3026e59039 Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 16:20:45 +0200 Subject: [PATCH 18/79] Rename some inner variables --- covidnpi/utils/rho.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/covidnpi/utils/rho.py b/covidnpi/utils/rho.py index cb7c818..68551f5 100644 --- a/covidnpi/utils/rho.py +++ b/covidnpi/utils/rho.py @@ -45,9 +45,9 @@ def compute_incidence_normed( """ series_casos_peso = compute_incidence_weighted(series_casos, days=days) - list_series_sum = [ + list_series_peso = [ series_casos_peso.shift(lag * days) for lag in range(num_lag + 1) ] - series_sum = pd.concat(list_series_sum, axis=1).mean(axis=1) - series_casos_norm = np.divide(series_casos, series_sum) + series_mean = pd.concat(list_series_peso, axis=1).mean(axis=1) + series_casos_norm = np.divide(series_casos, series_mean) return series_casos_norm From 867514162a518d034182b4f35b924c7a11e1a808 Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 16:26:35 +0200 Subject: [PATCH 19/79] Compute rho --- covidnpi/utils/mobility.py | 6 +++--- covidnpi/utils/rho.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/covidnpi/utils/mobility.py b/covidnpi/utils/mobility.py index ac1444f..620ba06 100644 --- a/covidnpi/utils/mobility.py +++ b/covidnpi/utils/mobility.py @@ -6,7 +6,7 @@ from covidnpi.utils.casos import load_casos_df, return_casos_of_provincia_normed from covidnpi.utils.config import load_config from covidnpi.utils.log import logger -from covidnpi.utils.rho import compute_incidence_normed +from covidnpi.utils.rho import compute_rho from covidnpi.utils.series import ( cumulative_incidence, compute_growth_rate, @@ -113,13 +113,13 @@ def mobility_report_to_csv( series_casos = return_casos_of_provincia_normed( casos, code, path_config=path_config ) - print(compute_incidence_normed(series_casos)) series_ia7 = cumulative_incidence(series_casos, 7) series_growth = compute_growth_rate(series_casos, 7) + series_rho = compute_rho(series_casos) # Store data df_store = pd.DataFrame(dict_reports).assign( - ia7=series_ia7, growth_rate=series_growth + ia7=series_ia7, growth_rate=series_growth, rho=series_rho ) filename = code_to_filename[code] df_store.to_csv(os.path.join(path_output, f"{filename}.csv")) diff --git a/covidnpi/utils/rho.py b/covidnpi/utils/rho.py index 68551f5..99264db 100644 --- a/covidnpi/utils/rho.py +++ b/covidnpi/utils/rho.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd + from covidnpi.utils.series import moving_average @@ -51,3 +52,18 @@ def compute_incidence_normed( series_mean = pd.concat(list_series_peso, axis=1).mean(axis=1) series_casos_norm = np.divide(series_casos, series_mean) return series_casos_norm + + +def compute_rho(series_casos: pd.Series) -> pd.Series: + days = 7 + lag_peso = 4 + lag_norm = 6 + series_casos_norm = compute_incidence_normed( + series_casos, days=days, num_lag=lag_peso + ) + series_norm_movavg = moving_average(series_casos_norm, lag_norm) + list_numerator = [series_norm_movavg.shift(lag) for lag in range(3)] + numerator = pd.concat(list_numerator, axis=1).mean(axis=1) + list_denominator = [series_norm_movavg.shift(lag) for lag in range(5, 8)] + denominator = pd.concat(list_denominator, axis=1).mean(axis=1) + return np.divide(numerator, denominator) From c539d156931800634343db90ece6df7efce6057f Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 16:41:32 +0200 Subject: [PATCH 20/79] Docstring --- covidnpi/utils/rho.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/covidnpi/utils/rho.py b/covidnpi/utils/rho.py index 99264db..b6b5025 100644 --- a/covidnpi/utils/rho.py +++ b/covidnpi/utils/rho.py @@ -54,14 +54,34 @@ def compute_incidence_normed( return series_casos_norm -def compute_rho(series_casos: pd.Series) -> pd.Series: - days = 7 - lag_peso = 4 - lag_norm = 6 +def compute_rho( + series_casos: pd.Series, days: int = 7, lag_peso: int = 4, lag_norm: int = 6 +) -> pd.Series: + """ + + Parameters + ---------- + series_casos : pandas.Series + COVID incidence per date + days : int, optional + Size of the cumulative sum, by default 7 + lag_peso : int, optional + Number of lags to use when computing weighted incidence, by default 4 + lag_norm : int, optional + Number of lags to use when computing movavg normed incidence, by default 7 + + Returns + ------- + pandas.Series + Rho + + """ + # Compute the moving average of normed incidence series_casos_norm = compute_incidence_normed( series_casos, days=days, num_lag=lag_peso ) series_norm_movavg = moving_average(series_casos_norm, lag_norm) + # Compute rho list_numerator = [series_norm_movavg.shift(lag) for lag in range(3)] numerator = pd.concat(list_numerator, axis=1).mean(axis=1) list_denominator = [series_norm_movavg.shift(lag) for lag in range(5, 8)] From eea19e3fea7c79558c6372a49ddb961d8158c3a4 Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 17:03:25 +0200 Subject: [PATCH 21/79] Add provinces code to config --- covidnpi/config.toml | 3 +++ covidnpi/utils/mobility.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/covidnpi/config.toml b/covidnpi/config.toml index c03494b..275a648 100644 --- a/covidnpi/config.toml +++ b/covidnpi/config.toml @@ -71,6 +71,7 @@ link = "https://cnecovid.isciii.es/covid19/resources/casos_tecnica_provincia.csv A = "Alacant" AB = "Albacete" AL = "Almería" +AS = "Asturias" AV = "Ávila" B = "Barcelona" BA = "Badajoz" @@ -78,6 +79,7 @@ BI = "Bizkaia" BU = "Burgos" C = "A Coruña" CA = "Cádiz" +CB = "Cantabria" CC = "Cáceres" CE = "Ceuta" CO = "Córdoba" @@ -97,6 +99,7 @@ LO = "La Rioja" LU = "Lugo" M = "Madrid" MA = "Málaga" +MD = "Madrid" ML = "Melilla" MU = "Murcia" NA = "Nafarroa" diff --git a/covidnpi/utils/mobility.py b/covidnpi/utils/mobility.py index 620ba06..409eff6 100644 --- a/covidnpi/utils/mobility.py +++ b/covidnpi/utils/mobility.py @@ -107,7 +107,7 @@ def mobility_report_to_csv( provincia = code_to_provincia[code] logger.debug(f"{code} - {provincia}") except KeyError: - logger.debug(f"Omitted {code}") + logger.error(f"Omitted {code}") continue dict_reports = return_reports_of_provincia(mob, code) series_casos = return_casos_of_provincia_normed( From 6ac564dcdb75b52e737a9e6af5e99fbf874aea09 Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 17:06:58 +0200 Subject: [PATCH 22/79] Reassing province codes to avoid duplicates --- covidnpi/utils/mobility.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/covidnpi/utils/mobility.py b/covidnpi/utils/mobility.py index 409eff6..a32ef57 100644 --- a/covidnpi/utils/mobility.py +++ b/covidnpi/utils/mobility.py @@ -107,8 +107,11 @@ def mobility_report_to_csv( provincia = code_to_provincia[code] logger.debug(f"{code} - {provincia}") except KeyError: - logger.error(f"Omitted {code}") + logger.warning(f"Omitted {code}") continue + # Reassign code + code = provincia_to_code[provincia.lower()] + # Get reports and incidence dict_reports = return_reports_of_provincia(mob, code) series_casos = return_casos_of_provincia_normed( casos, code, path_config=path_config From a88f99bb306c552890ade1d2a4a231ad2aa6b161 Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 17:10:25 +0200 Subject: [PATCH 23/79] Code reassignation --- covidnpi/config.toml | 8 +++++--- covidnpi/utils/mobility.py | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/covidnpi/config.toml b/covidnpi/config.toml index 275a648..8116736 100644 --- a/covidnpi/config.toml +++ b/covidnpi/config.toml @@ -71,7 +71,6 @@ link = "https://cnecovid.isciii.es/covid19/resources/casos_tecnica_provincia.csv A = "Alacant" AB = "Albacete" AL = "Almería" -AS = "Asturias" AV = "Ávila" B = "Barcelona" BA = "Badajoz" @@ -79,7 +78,6 @@ BI = "Bizkaia" BU = "Burgos" C = "A Coruña" CA = "Cádiz" -CB = "Cantabria" CC = "Cáceres" CE = "Ceuta" CO = "Córdoba" @@ -99,7 +97,6 @@ LO = "La Rioja" LU = "Lugo" M = "Madrid" MA = "Málaga" -MD = "Madrid" ML = "Melilla" MU = "Murcia" NA = "Nafarroa" @@ -124,6 +121,11 @@ VI = "Álava" ZA = "Zamora" Z = "Zaragoza" +[code_reassign] +AS = "O" +CB = "S" +MD = "M" + [provincia_to_code] alava = "VI" albacete = "AB" diff --git a/covidnpi/utils/mobility.py b/covidnpi/utils/mobility.py index a32ef57..46c25ba 100644 --- a/covidnpi/utils/mobility.py +++ b/covidnpi/utils/mobility.py @@ -99,19 +99,19 @@ def mobility_report_to_csv( mob = load_mobility_report() casos = load_casos_df() code_to_provincia = load_config(path_config, "code_to_provincia") + code_reassign = load_config(path_config, "code_reassign") provincia_to_code = load_config(path_config, "provincia_to_code") code_to_filename = {v: k for k, v in provincia_to_code.items()} for code in mob["code"].unique(): + # Reassing code if needed + code = code_reassign.get(code, code) try: provincia = code_to_provincia[code] logger.debug(f"{code} - {provincia}") except KeyError: logger.warning(f"Omitted {code}") continue - # Reassign code - code = provincia_to_code[provincia.lower()] - # Get reports and incidence dict_reports = return_reports_of_provincia(mob, code) series_casos = return_casos_of_provincia_normed( casos, code, path_config=path_config From 09793edf48978c8f15aa9fa8f339bea635a79e4a Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 17:15:32 +0200 Subject: [PATCH 24/79] Add more codes to reassign --- covidnpi/config.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/covidnpi/config.toml b/covidnpi/config.toml index 8116736..7cd4c3a 100644 --- a/covidnpi/config.toml +++ b/covidnpi/config.toml @@ -124,7 +124,10 @@ Z = "Zaragoza" [code_reassign] AS = "O" CB = "S" +IB = "PM" MD = "M" +MC = "MU" +NC = "NA" [provincia_to_code] alava = "VI" From e48fcc1c8efd8471e4c0c55e9b6e797f5c1898d4 Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Thu, 27 May 2021 17:25:30 +0200 Subject: [PATCH 25/79] Ensure dataframe index is named "date" --- covidnpi/utils/mobility.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/covidnpi/utils/mobility.py b/covidnpi/utils/mobility.py index 46c25ba..aaad039 100644 --- a/covidnpi/utils/mobility.py +++ b/covidnpi/utils/mobility.py @@ -104,7 +104,7 @@ def mobility_report_to_csv( code_to_filename = {v: k for k, v in provincia_to_code.items()} for code in mob["code"].unique(): - # Reassing code if needed + # Reassign code if needed code = code_reassign.get(code, code) try: provincia = code_to_provincia[code] @@ -121,8 +121,10 @@ def mobility_report_to_csv( series_rho = compute_rho(series_casos) # Store data - df_store = pd.DataFrame(dict_reports).assign( - ia7=series_ia7, growth_rate=series_growth, rho=series_rho + df_store = ( + pd.DataFrame(dict_reports) + .assign(ia7=series_ia7, growth_rate=series_growth, rho=series_rho) + .rename_axis("date", axis=0) ) filename = code_to_filename[code] df_store.to_csv(os.path.join(path_output, f"{filename}.csv")) From c9aaba28ff2a85e2efe098c44efa5ccf2f590780 Mon Sep 17 00:00:00 2001 From: daniprec Date: Fri, 28 May 2021 13:19:19 +0200 Subject: [PATCH 26/79] Improve config error messages --- .gitignore | 1 + covidnpi/preprocess_and_score.py | 2 -- covidnpi/utils/config.py | 6 ++++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index c78ed43..6c48e1a 100644 --- a/.gitignore +++ b/.gitignore @@ -138,3 +138,4 @@ output/* datos_* /config.toml /log.out +/config-prod.toml diff --git a/covidnpi/preprocess_and_score.py b/covidnpi/preprocess_and_score.py index ea7de13..204a608 100644 --- a/covidnpi/preprocess_and_score.py +++ b/covidnpi/preprocess_and_score.py @@ -54,8 +54,6 @@ def main( f"Ahora puntuamos cada medida" ) - config = load_config(path_config, "npi") - dict_scores = return_dict_score_medidas(dict_medidas) path_score_medidas = os.path.join(path_output, "score_medidas") store_dict_scores(dict_scores, path_output=path_score_medidas) diff --git a/covidnpi/utils/config.py b/covidnpi/utils/config.py index e88f103..dd661e6 100644 --- a/covidnpi/utils/config.py +++ b/covidnpi/utils/config.py @@ -17,5 +17,7 @@ def load_config(path: str, key: str = None): try: config = toml.load(path) return config if key is None else config[key] - except: - raise ValueError(f"Path to config not found: {path}") + except KeyError: + raise KeyError(f"Missing key in config file ({path}): {key}") + except FileNotFoundError: + raise FileNotFoundError(f"Path to config not found: {path}") From 106211211b27d21ee36dd2c4440e046d9e94baf7 Mon Sep 17 00:00:00 2001 From: daniprec Date: Tue, 1 Jun 2021 11:07:41 +0200 Subject: [PATCH 27/79] Add missing import in mongo module --- covidnpi/web/mongo.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/covidnpi/web/mongo.py b/covidnpi/web/mongo.py index 843cb51..6bfa6b1 100644 --- a/covidnpi/web/mongo.py +++ b/covidnpi/web/mongo.py @@ -1,4 +1,5 @@ import pymongo +from covidnpi.utils.log import logger class SingletonMeta(type): @@ -43,9 +44,7 @@ def insert_new_dict(self, collection: str, new_dict: dict): logger.debug(x.inserted_id) return x - def update_dict( - self, collection: str, id_key: str, id_value: str, new_dict: dict - ): + def update_dict(self, collection: str, id_key: str, id_value: str, new_dict: dict): mydb = self.client[self.database] mycol = mydb[collection] mycol.update({id_key: id_value}, new_dict) From 7763c7e6bffccb7359e9d198d94ebfff5c322eb4 Mon Sep 17 00:00:00 2001 From: daniprec Date: Tue, 1 Jun 2021 11:16:01 +0200 Subject: [PATCH 28/79] Update combine script: remove some ambitos --- covidnpi/utils/combine.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/covidnpi/utils/combine.py b/covidnpi/utils/combine.py index d9bd7ac..b60a118 100644 --- a/covidnpi/utils/combine.py +++ b/covidnpi/utils/combine.py @@ -3,11 +3,10 @@ import numpy as np import pandas as pd -import typer +import typer from covidnpi.utils.config import load_config from covidnpi.utils.dictionaries import reverse_dictionary -from covidnpi.utils.log import logger COLS_AMBITO = [ "fecha", @@ -15,12 +14,12 @@ "deporte_exterior", "deporte_interior", "cultura", - "colegios", - "educacion_otra", + # "colegios", + # "educacion_otra", "restauracion_exterior", "restauracion_interior", "movilidad", - "trabajo", + # "trabajo", ] From 12c77cbfc8ef52f2d41945cc32ab7026ffdab4ed Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 3 Jun 2021 11:22:36 +0200 Subject: [PATCH 29/79] Store dict of conditions --- covidnpi/score/score_medidas.py | 17 +++++++++++------ covidnpi/utils/dictionaries.py | 9 +++++++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/covidnpi/score/score_medidas.py b/covidnpi/score/score_medidas.py index e740dd7..b006e6b 100644 --- a/covidnpi/score/score_medidas.py +++ b/covidnpi/score/score_medidas.py @@ -5,7 +5,11 @@ import pandas as pd import typer -from covidnpi.utils.dictionaries import store_dict_scores, load_dict_medidas +from covidnpi.utils.dictionaries import ( + store_dict_scores, + load_dict_medidas, + store_dict_condicion, +) from covidnpi.utils.log import logger from covidnpi.utils.taxonomia import return_taxonomia, return_all_medidas @@ -125,7 +129,7 @@ def expand_nivel_educacion(df): def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: df_score = df.copy() # Asumimos que por defecto es baja - df_score["score_medida"] = 0.3 + df_score["score_medida"] = 0.2 dict_condicion = {} @@ -170,6 +174,9 @@ def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: condicion = " | ".join(list_condiciones) dict_condicion.update({nivel: condicion}) + # Store dictionary + store_dict_condicion(dict_condicion) + condicion_alto = dict_condicion["alto"] condicion_medio = dict_condicion["medio"] @@ -179,7 +186,7 @@ def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: except TypeError: raise TypeError(f"Column with unproper type:\n{df.dtypes}") - df_score.loc[mask_medio, "score_medida"] = 0.6 + df_score.loc[mask_medio, "score_medida"] = 0.5 df_score.loc[mask_alto, "score_medida"] = 1 df_score = expand_nivel_educacion(df_score) @@ -200,9 +207,7 @@ def pivot_df_score(df_score: pd.DataFrame): return df_medida -def return_dict_score_medidas( - dict_medidas: dict -) -> dict: +def return_dict_score_medidas(dict_medidas: dict) -> dict: """ Parameters diff --git a/covidnpi/utils/dictionaries.py b/covidnpi/utils/dictionaries.py index 42fee7f..af04029 100644 --- a/covidnpi/utils/dictionaries.py +++ b/covidnpi/utils/dictionaries.py @@ -1,3 +1,4 @@ +import json import os import pandas as pd @@ -54,3 +55,11 @@ def load_dict_scores(path_scores: str = "output/score_medidas"): def reverse_dictionary(d: dict) -> dict: reversed_dictionary = {value: key for (key, value) in d.items()} return reversed_dictionary + + +def store_dict_condicion( + dict_condicion: dict, path_output: str = "output/dict_condicion.json" +): + """Guarda un json con las condiciones aplicadas por la taxonomia""" + with open(path_output, "w") as f: + json.dump(dict_condicion, f) From f0ecb2f21756b55f1e8ffcd736cdda507d36f102 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 3 Jun 2021 11:22:45 +0200 Subject: [PATCH 30/79] Store processed taxonomia --- covidnpi/utils/taxonomia.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/covidnpi/utils/taxonomia.py b/covidnpi/utils/taxonomia.py index 867298c..b2b491b 100644 --- a/covidnpi/utils/taxonomia.py +++ b/covidnpi/utils/taxonomia.py @@ -99,10 +99,16 @@ def classify_criteria(taxonomia: pd.DataFrame): return classified -def return_taxonomia(path_taxonomia: str = PATH_TAXONOMIA): +def return_taxonomia( + path_taxonomia: str = PATH_TAXONOMIA, path_output: str = "output/taxonomia.csv" +): taxonomia = read_taxonomia(path_taxonomia) criterio = classify_criteria(taxonomia) - taxonomia = pd.merge(taxonomia, criterio, left_index=True, right_index=True) + taxonomia = pd.merge( + taxonomia[["codigo", "item", "ambito"]], criterio, left_index=True, right_index=True + ) + # Store taxonomia + taxonomia.to_csv(path_output, index=False) return taxonomia From 5286facd9c5114328430cfe8d74a2a75eeaf7d0b Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 3 Jun 2021 11:42:08 +0200 Subject: [PATCH 31/79] Sort taxonomia codes --- covidnpi/utils/taxonomia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/covidnpi/utils/taxonomia.py b/covidnpi/utils/taxonomia.py index b2b491b..688b88c 100644 --- a/covidnpi/utils/taxonomia.py +++ b/covidnpi/utils/taxonomia.py @@ -106,7 +106,7 @@ def return_taxonomia( criterio = classify_criteria(taxonomia) taxonomia = pd.merge( taxonomia[["codigo", "item", "ambito"]], criterio, left_index=True, right_index=True - ) + ).sort_values("codigo").drop_duplicates() # Store taxonomia taxonomia.to_csv(path_output, index=False) return taxonomia From 61067b5e75736a99482c23ea7db428f12851a1b9 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 3 Jun 2021 11:42:23 +0200 Subject: [PATCH 32/79] Improve condicion_personas --- covidnpi/score/score_medidas.py | 36 ++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/covidnpi/score/score_medidas.py b/covidnpi/score/score_medidas.py index b006e6b..e8a4423 100644 --- a/covidnpi/score/score_medidas.py +++ b/covidnpi/score/score_medidas.py @@ -67,9 +67,9 @@ def build_condicion_porcentaje(lista_medidas, porcentaje): return condicion_compuesta -def build_condicion_personas(lista_medidas, personas): +def build_condicion_personas(lista_medidas, personas, condition: str = "<="): condicion = build_condicion_existe(lista_medidas) - condicion_compuesta = f"({condicion} & (personas <= {personas}))" + condicion_compuesta = f"({condicion} & (personas {condition} {personas}))" return condicion_compuesta @@ -126,6 +126,18 @@ def expand_nivel_educacion(df): return df_expanded +def list_missing_codigos(taxonomia: pd.DataFrame, dict_condicion: dict): + """Avisa de que faltan ciertos codigos en la lista de condiciones""" + codigos = "-".join([s for s in dict_condicion.values()]) + list_missing = [] + # Filtramos aquellas con bajo == "existe" porque no saldran en la lista + for codigo in taxonomia.query("bajo != 'existe'")["codigo"].unique(): + if codigo not in codigos: + list_missing.append(codigo) + if len(list_missing) > 0: + logger.error(f"Faltan codigos en condicones: {', '.join(list_missing)}") + + def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: df_score = df.copy() # Asumimos que por defecto es baja @@ -143,11 +155,19 @@ def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: list_condiciones += [build_condicion_existe(existe)] # Personas for pers in [6, 10, 100]: - personas_leq = taxonomia.loc[ - taxonomia[nivel].str.contains(f"<={pers}(?!%)", regex=True), "codigo" - ].unique() - if len(personas_leq) > 0: - list_condiciones += [build_condicion_personas(personas_leq, pers)] + for condition in ["<=", "<"]: + personas_cond = taxonomia.loc[ + taxonomia[nivel].str.contains( + f"{condition}{pers}(?!%)", regex=True + ), + "codigo", + ].unique() + if len(personas_cond) > 0: + list_condiciones += [ + build_condicion_personas( + personas_cond, pers, condition=condition + ) + ] # Personas no especifica no_especifica = taxonomia.loc[ taxonomia[nivel].str.contains("noseespecifica"), "codigo" @@ -176,6 +196,8 @@ def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: # Store dictionary store_dict_condicion(dict_condicion) + # List missing codigos + list_missing_codigos(taxonomia, dict_condicion) condicion_alto = dict_condicion["alto"] condicion_medio = dict_condicion["medio"] From b24dddb84b56e95e6be09a1e2238691e94a6e1b7 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 3 Jun 2021 11:50:44 +0200 Subject: [PATCH 33/79] Change += to append --- covidnpi/score/score_medidas.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/covidnpi/score/score_medidas.py b/covidnpi/score/score_medidas.py index e8a4423..bb53e1f 100644 --- a/covidnpi/score/score_medidas.py +++ b/covidnpi/score/score_medidas.py @@ -152,7 +152,7 @@ def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: taxonomia[nivel].str.contains("existe"), "codigo" ].unique() if len(existe) > 0: - list_condiciones += [build_condicion_existe(existe)] + list_condiciones.append(build_condicion_existe(existe)) # Personas for pers in [6, 10, 100]: for condition in ["<=", "<"]: @@ -163,24 +163,24 @@ def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: "codigo", ].unique() if len(personas_cond) > 0: - list_condiciones += [ + list_condiciones.append( build_condicion_personas( personas_cond, pers, condition=condition ) - ] + ) # Personas no especifica no_especifica = taxonomia.loc[ taxonomia[nivel].str.contains("noseespecifica"), "codigo" ].unique() if len(no_especifica) > 0: - list_condiciones += [build_condicion_no_especifica(no_especifica)] + list_condiciones.append(build_condicion_no_especifica(no_especifica)) # Porcentaje for por in [35]: porcentaje_leq = taxonomia.loc[ taxonomia[nivel].str.contains(f"<={por}%"), "codigo" ].unique() if len(porcentaje_leq) > 0: - list_condiciones += [build_condicion_porcentaje(porcentaje_leq, por)] + list_condiciones.append(build_condicion_porcentaje(porcentaje_leq, por)) # Hora for hor in [18]: hora_leq = taxonomia.loc[ @@ -189,9 +189,9 @@ def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: "codigo", ].unique() if len(hora_leq) > 0: - list_condiciones += [build_condicion_horario(hora_leq, hor)] + list_condiciones.append(build_condicion_horario(hora_leq, hor)) # All conditions - condicion = " | ".join(list_condiciones) + condicion = " | ".join(set(list_condiciones)) dict_condicion.update({nivel: condicion}) # Store dictionary From 2a400ffd1624a1399c021b728596aac5f259acac Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 3 Jun 2021 12:34:30 +0200 Subject: [PATCH 34/79] Fix hora preprocessing --- covidnpi/utils/log.py | 17 +++++++--- covidnpi/utils/preprocess.py | 62 ++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/covidnpi/utils/log.py b/covidnpi/utils/log.py index 033f18b..cabd2e5 100644 --- a/covidnpi/utils/log.py +++ b/covidnpi/utils/log.py @@ -65,9 +65,7 @@ def raise_type_warning( ) -def raise_value_warning( - df: pd.DataFrame, list_idx: list, col: str -): +def raise_value_warning(df: pd.DataFrame, list_idx: list, col: str): """Prints the rows that produces warnings, showing index and the value that fails""" list_msg = [""] * len(list_idx) for j, idx in enumerate(list_idx): @@ -75,4 +73,15 @@ def raise_value_warning( logger.warning( f"La columna '{col}' contiene valores sospechosos:\n" + "\n".join(list_msg) - ) \ No newline at end of file + ) + + +def raise_missing_warning(df: pd.DataFrame, list_idx: list, col: str): + """Prints the rows that produces warnings, showing index and the value that fails""" + if len(list_idx) == 0: + return + list_msg = [""] * len(list_idx) + for j, idx in enumerate(list_idx): + list_msg[j] = f" {idx + 2} ... {df.loc[idx, col]}" + + logger.warning(f"La columna '{col}' ha perdido valores:\n" + "\n".join(list_msg)) diff --git a/covidnpi/utils/preprocess.py b/covidnpi/utils/preprocess.py index 48d0819..880db62 100644 --- a/covidnpi/utils/preprocess.py +++ b/covidnpi/utils/preprocess.py @@ -7,7 +7,12 @@ import xlrd from covidnpi.utils.dictionaries import store_dict_provincia_to_medidas -from covidnpi.utils.log import logger, raise_type_warning, raise_value_warning +from covidnpi.utils.log import ( + logger, + raise_type_warning, + raise_value_warning, + raise_missing_warning, +) from covidnpi.utils.taxonomia import return_all_medidas, PATH_TAXONOMIA LIST_BASE_SHEET = ["base", "base-regional-provincias", "BASE"] @@ -284,43 +289,30 @@ def rename_unidad(df, rename: dict = None) -> pd.DataFrame: def format_hora(df: pd.DataFrame) -> pd.DataFrame: """Formats the hora column, to datetime""" - # We do not want to modify the original dataframe - df = df.copy() # If "hora" is empty, return original if df["hora"].isnull().all(): return df - # The following will only run when the column "hora" is a string - try: - # Remove whitespaces from string - df["hora"] = df["hora"].str.replace(" ", "").astype(str) - # Change ranges HH:MM-HH:MM to last HH:MM - mask_range = ( - df["hora"] - .str.contains( - "^([0-1]?[0-9]|2[0-3]):[0-5][0-9]-([0-1]?[0-9]|2[0-3]):[0-5][0-9]$" - ) - .fillna(False) - ) - df.loc[mask_range, "hora"] = ( - df.loc[mask_range, "hora"].str.split("-").str[-1] + ":00" - ) - except AttributeError: - pass - # Convert to date format - try: - hora = pd.to_datetime(df["hora"], format="%H:%M:%S", errors="raise") - except (TypeError, ValueError) as e: - hora = pd.Series(pd.to_datetime(df["hora"], format="%H:%M:%S", errors="coerce")) - list_idx = df.loc[hora.isna(), "hora"].dropna().index.tolist() - # Filtramos aquellos warning que no interesan, - # porque son medidas que no aplican la columna "hora" - list_idx = [ - idx for idx in list_idx if df["codigo"][idx] not in LIST_MEDIDAS_NO_HORA - ] - if len(list_idx) > 0: - raise_type_warning(df, list_idx, "hora") - # Take only hour - df["hora"] = hora.dt.hour + hora.dt.minute / 60 + # We do not want to modify the original dataframe + df = df.copy() + # Take the column "hora" as a string series + hora = df["hora"].dropna().astype(str).str.replace(" ", "").copy() + # Change ranges HH:MM-HH:MM to last HH:MM + mask_range = hora.str.contains( + "^([0-1]?[0-9]|2[0-3]):[0-5][0-9]-([0-1]?[0-9]|2[0-3]):[0-5][0-9]$" + ).fillna(False) + hora[mask_range] = hora[mask_range].str.split("-").str[-1] + ":00" + # Take hour + minute = hora.str.split(":").str[1] + hora = hora.str.split(":").str[0] + # Force numeric + hora = pd.to_numeric(hora, errors="coerce") + minute = pd.to_numeric(minute, errors="coerce") + # Sum minutes to hora + hora += minute / 60 + # Check if some original data is missing + list_idx = df[hora.isna() & ~df["hora"].isna()].index + raise_missing_warning(df, list_idx, "hora") + df["hora"] = hora return df From afc3016fa31a192d27d02ff94c54f0bb7e8822a0 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 3 Jun 2021 12:47:51 +0200 Subject: [PATCH 35/79] Set maximum ambito values to 1 --- covidnpi/score/score_ambitos.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/covidnpi/score/score_ambitos.py b/covidnpi/score/score_ambitos.py index 3512ff1..944a706 100644 --- a/covidnpi/score/score_ambitos.py +++ b/covidnpi/score/score_ambitos.py @@ -94,6 +94,8 @@ def score_ponderada(df_afectado: pd.DataFrame, path_taxonomia=PATH_TAXONOMIA): pesos = pon_sub["ponderacion"].values items = pon_sub["nombre"] df_afectado[ambito] = (df_afectado[items] * pesos).sum(axis=1).div(pesos.sum()) + # Max value is 1 + df_afectado.loc[df_afectado[ambito] > 1, ambito] = 1 return df_afectado From d3e957d7f75a7f4b65ec0a4e5e32933f460f8e9b Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 3 Jun 2021 15:53:39 +0200 Subject: [PATCH 36/79] Update RIN_afo --- covidnpi/score/score_items.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/covidnpi/score/score_items.py b/covidnpi/score/score_items.py index afe208a..4adcf76 100644 --- a/covidnpi/score/score_items.py +++ b/covidnpi/score/score_items.py @@ -66,7 +66,13 @@ def score_items(df: pd.DataFrame): df_item["CUL_zoo"] = df[["CD.3", "CD.16", "CD.15"]].max(axis=1) # Restauración interior - df_item["RIN_afo"] = df[["RH.1", "RH.2", "RH.3", "RH.7"]].max(axis=1) + df_item["RIN_afo"] = np.max( + [ + df[["RH.1", "RH.2", "RH.3"]].max(axis=1), + df[["RH.4", "RH.7"]].sum(axis=1) + * df[["RH.1", "RH.2", "RH.3"]].isna().all(axis=1), + ] + ) df_item["RIN_hor"] = df[["RH.1", "RH.2", "RH.3", "RH.5"]].max(axis=1) df_item["RIN_mesa"] = df[["RH.1", "RH.2", "RH.3", "RH.9", "RH.11"]].max(axis=1) From 1a6e90fc79bddc556fa1c85b124d0c0151057416 Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Tue, 8 Jun 2021 12:31:40 +0200 Subject: [PATCH 37/79] Update score ceremonias --- covidnpi/score/score_items.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/covidnpi/score/score_items.py b/covidnpi/score/score_items.py index 4adcf76..f174beb 100644 --- a/covidnpi/score/score_items.py +++ b/covidnpi/score/score_items.py @@ -30,10 +30,10 @@ def score_items(df: pd.DataFrame): # Ceremonias df_item["CER_cult"] = df[["CE.1", "CE.2"]].max(axis=1) df_item["CER_cor"] = df[["CE.1", "CE.7"]].max(axis=1) - df_item["CER_ent_int"] = df["CE.3"] - df_item["CER_ent_ext"] = df["CE.4"] - df_item["CER_otro_int"] = df["CE.5"] - df_item["CER_otro_ext"] = df["CE.5"] + df_item["CER_ent_int"] = df[["CE.3", "CE.9"]] + df_item["CER_ent_ext"] = df[["CE.4", "CE.9"]] + df_item["CER_otro_int"] = df[["CE.5", "CE.10"]] + df_item["CER_otro_ext"] = df[["CE.6", "CE.10"]] # Comercio df_item["COM_afo"] = df[["CO.1", "CO.8"]].max(axis=1) From cb751017f84df9ac5316e1d7ee082068ed6beb0a Mon Sep 17 00:00:00 2001 From: Daniel Precioso Date: Tue, 8 Jun 2021 12:38:33 +0200 Subject: [PATCH 38/79] Update score ceremonias (fix error) --- covidnpi/score/score_items.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/covidnpi/score/score_items.py b/covidnpi/score/score_items.py index f174beb..0b11f2f 100644 --- a/covidnpi/score/score_items.py +++ b/covidnpi/score/score_items.py @@ -30,10 +30,10 @@ def score_items(df: pd.DataFrame): # Ceremonias df_item["CER_cult"] = df[["CE.1", "CE.2"]].max(axis=1) df_item["CER_cor"] = df[["CE.1", "CE.7"]].max(axis=1) - df_item["CER_ent_int"] = df[["CE.3", "CE.9"]] - df_item["CER_ent_ext"] = df[["CE.4", "CE.9"]] - df_item["CER_otro_int"] = df[["CE.5", "CE.10"]] - df_item["CER_otro_ext"] = df[["CE.6", "CE.10"]] + df_item["CER_ent_int"] = df[["CE.3", "CE.9"]].max(axis=1) + df_item["CER_ent_ext"] = df[["CE.4", "CE.9"]].max(axis=1) + df_item["CER_otro_int"] = df[["CE.5", "CE.10"]].max(axis=1) + df_item["CER_otro_ext"] = df[["CE.6", "CE.10"]].max(axis=1) # Comercio df_item["COM_afo"] = df[["CO.1", "CO.8"]].max(axis=1) From 19292ad82e4e22fc97afbf172655c92f278630aa Mon Sep 17 00:00:00 2001 From: daniprec Date: Fri, 11 Jun 2021 14:12:50 +0200 Subject: [PATCH 39/79] Data of abreviations and population --- datos/abreviaturas.csv | 53 +++++++++++++++++++++++++++++++++++++++++ datos/poblacion.csv | 54 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 datos/abreviaturas.csv create mode 100644 datos/poblacion.csv diff --git a/datos/abreviaturas.csv b/datos/abreviaturas.csv new file mode 100644 index 0000000..21109ed --- /dev/null +++ b/datos/abreviaturas.csv @@ -0,0 +1,53 @@ +"code","postal_code","name","phone_code","iso2" +"VI","01","Álava",945,"ES" +"AB","02","Albacete",967,"ES" +"A","03","Alacant",950,"ES" +"AL","04","Almería",920,"ES" +"AV","05","Ávila",924,"ES" +"BA","06","Badajoz",924,"ES" +"PM","07","Illes Balears",971,"ES" +"B","08","Barcelona",93,"ES" +"BU","09","Burgos",947,"ES" +"CC","10","Cáceres",927,"ES" +"CA","11","Cádiz",956,"ES" +"CS","12","Castelló",964,"ES" +"CR","13","Ciudad Real",926,"ES" +"CO","14","Córdoba",957,"ES" +"C","15","A Coruña",981,"ES" +"CU","16","Cuenca",969,"ES" +"GI","17","Girona",972,"ES" +"GR","18","Granada",958,"ES" +"GU","19","Guadalajara",949,"ES" +"SS","20","Gipuzkoa",943,"ES" +"H","21","Huelva",959,"ES" +"HU","22","Huesca",974,"ES" +"J","23","Jaén",953,"ES" +"LE","24","León",987,"ES" +"L","25","Lleida",973,"ES" +"LO","26","La Rioja",941,"ES" +"LU","27","Lugo",982,"ES" +"M","28","Madrid",91,"ES" +"MA","29","Málaga",95,"ES" +"MU","30","Murcia",968,"ES" +"NA","31","Nafarroa",948,"ES" +"OR","32","Ourense",988,"ES" +"O","33","Asturias",98,"ES" +"P","34","Palencia",979,"ES" +"GC","35","Las Palmas",928,"ES" +"PO","36","Pontevedra",986,"ES" +"SA","37","Salamanca",923,"ES" +"TF","38","Sta. Cruz de Tenerife",922,"ES" +"S","39","Cantabria",942,"ES" +"SG","40","Segovia",921,"ES" +"SE","41","Sevilla",95,"ES" +"SO","42","Soria",975,"ES" +"T","43","Tarragona",977,"ES" +"TE","44","Teruel",978,"ES" +"TO","45","Toledo",925,"ES" +"V","46","Valéncia",96,"ES" +"VA","47","Valladolid",983,"ES" +"BI","48","Bizkaia",94,"ES" +"ZA","49","Zamora",980,"ES" +"Z","50","Zaragoza",976,"ES" +"CE","51","Ceuta",956,"ES" +"ML","52","Melilla",95,"ES" \ No newline at end of file diff --git a/datos/poblacion.csv b/datos/poblacion.csv new file mode 100644 index 0000000..22f9ab4 --- /dev/null +++ b/datos/poblacion.csv @@ -0,0 +1,54 @@ +Provincias;Sexo;Periodo;Total +Total;Total;2020;47.450.795 +02 Albacete;Total;2020;388.270 +03 Alicante/Alacant;Total;2020;1.879.888 +04 Almera;Total;2020;727.945 +01 Araba/lava;Total;2020;333.940 +33 Asturias;Total;2020;1.018.784 +05 vila;Total;2020;157.664 +06 Badajoz;Total;2020;672.137 +07 Balears, Illes;Total;2020;1.171.543 +08 Barcelona;Total;2020;5.743.402 +48 Bizkaia;Total;2020;1.159.443 +09 Burgos;Total;2020;357.650 +10 Cceres;Total;2020;391.850 +11 Cdiz;Total;2020;1.244.049 +39 Cantabria;Total;2020;582.905 +12 Castelln/Castell;Total;2020;585.590 +13 Ciudad Real;Total;2020;495.045 +14 Crdoba;Total;2020;781.451 +15 Corua, A;Total;2020;1.121.815 +16 Cuenca;Total;2020;196.139 +20 Gipuzkoa;Total;2020;727.121 +17 Girona;Total;2020;781.788 +18 Granada;Total;2020;919.168 +19 Guadalajara;Total;2020;261.995 +21 Huelva;Total;2020;524.278 +22 Huesca;Total;2020;222.687 +23 Jan;Total;2020;631.381 +24 Len;Total;2020;456.439 +25 Lleida;Total;2020;438.517 +27 Lugo;Total;2020;327.946 +28 Madrid;Total;2020;6.779.888 +29 Mlaga;Total;2020;1.685.920 +30 Murcia;Total;2020;1.511.251 +31 Navarra;Total;2020;661.197 +32 Ourense;Total;2020;306.650 +34 Palencia;Total;2020;160.321 +35 Palmas, Las;Total;2020;1.131.065 +36 Pontevedra;Total;2020;945.408 +26 Rioja, La;Total;2020;319.914 +37 Salamanca;Total;2020;329.245 +38 Santa Cruz de Tenerife;Total;2020;1.044.887 +40 Segovia;Total;2020;153.478 +41 Sevilla;Total;2020;1.950.219 +42 Soria;Total;2020;88.884 +43 Tarragona;Total;2020;816.772 +44 Teruel;Total;2020;134.176 +45 Toledo;Total;2020;703.772 +46 Valencia/Valncia;Total;2020;2.591.875 +47 Valladolid;Total;2020;520.649 +49 Zamora;Total;2020;170.588 +50 Zaragoza;Total;2020;972.528 +51 Ceuta;Total;2020;84.202 +52 Melilla;Total;2020;87.076 From 6710ff78ab9b1ce974832686c25e18d049133317 Mon Sep 17 00:00:00 2001 From: daniprec Date: Fri, 11 Jun 2021 14:13:45 +0200 Subject: [PATCH 40/79] Script to run preprocessing and update web --- update.sh | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 update.sh diff --git a/update.sh b/update.sh new file mode 100644 index 0000000..1153b71 --- /dev/null +++ b/update.sh @@ -0,0 +1,4 @@ +python covidnpi/preprocess_and_score.py --path-raw ../modelos-covid/datos_NPI_3 > log.out +python covidnpi/initialize_web.py --path-config config.toml +python covidnpi/initialize_web.py --path-config config-staging.toml +python covidnpi/initialize_web.py --path-config config-live.toml From 7ee3e68c0077dc801a1ea256d2e1b3079c6899e8 Mon Sep 17 00:00:00 2001 From: daniprec Date: Fri, 11 Jun 2021 14:13:54 +0200 Subject: [PATCH 41/79] Include notebooks in gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6c48e1a..61d32a7 100644 --- a/.gitignore +++ b/.gitignore @@ -133,9 +133,10 @@ dmypy.json *.iml # Other directories +notebooks/* output/* .idea/* datos_* -/config.toml +/config*.toml /log.out /config-prod.toml From 1ed07035817192ee9c506e4c316270503ada160d Mon Sep 17 00:00:00 2001 From: daniprec Date: Fri, 11 Jun 2021 15:47:08 +0200 Subject: [PATCH 42/79] Move datos to data --- {datos => data}/abreviaturas.csv | 0 {datos => data}/poblacion.csv | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename {datos => data}/abreviaturas.csv (100%) rename {datos => data}/poblacion.csv (100%) diff --git a/datos/abreviaturas.csv b/data/abreviaturas.csv similarity index 100% rename from datos/abreviaturas.csv rename to data/abreviaturas.csv diff --git a/datos/poblacion.csv b/data/poblacion.csv similarity index 100% rename from datos/poblacion.csv rename to data/poblacion.csv From 6151f42d71eb7410477d7551850ac340c413557f Mon Sep 17 00:00:00 2001 From: daniprec Date: Fri, 11 Jun 2021 16:20:58 +0200 Subject: [PATCH 43/79] Rename guipuzkoa to guipuzcoa --- covidnpi/utils/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/covidnpi/utils/preprocess.py b/covidnpi/utils/preprocess.py index 880db62..0c02918 100644 --- a/covidnpi/utils/preprocess.py +++ b/covidnpi/utils/preprocess.py @@ -76,7 +76,7 @@ "vizcaya": "pais_vasco", } -DICT_PROVINCE_RENAME = {"a_coruna": "coruna_la", "cyl": ""} +DICT_PROVINCE_RENAME = {"a_coruna": "coruna_la", "cyl": "", "guipuzkoa": "guipuzcoa"} DICT_CCAA_RENAME = {"autonomico": np.nan} LIST_MEDIDAS_NO_HORA = ["MV.3", "MV.4", "MV.7"] From 9d008ffdf94a72073c29ecf3cc0d88fbc5d9eafb Mon Sep 17 00:00:00 2001 From: daniprec Date: Fri, 11 Jun 2021 17:08:49 +0200 Subject: [PATCH 44/79] Sort taxonomia by ambito and item --- covidnpi/utils/taxonomia.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/covidnpi/utils/taxonomia.py b/covidnpi/utils/taxonomia.py index 688b88c..66c4104 100644 --- a/covidnpi/utils/taxonomia.py +++ b/covidnpi/utils/taxonomia.py @@ -104,9 +104,16 @@ def return_taxonomia( ): taxonomia = read_taxonomia(path_taxonomia) criterio = classify_criteria(taxonomia) - taxonomia = pd.merge( - taxonomia[["codigo", "item", "ambito"]], criterio, left_index=True, right_index=True - ).sort_values("codigo").drop_duplicates() + taxonomia = ( + pd.merge( + taxonomia[["codigo", "item", "ambito"]], + criterio, + left_index=True, + right_index=True, + ) + .sort_values(["ambito", "item", "codigo"]) + .drop_duplicates() + ) # Store taxonomia taxonomia.to_csv(path_output, index=False) return taxonomia From ad80782d1ad34e96647afd1b26c85e73c94f9167 Mon Sep 17 00:00:00 2001 From: daniprec Date: Fri, 11 Jun 2021 17:09:01 +0200 Subject: [PATCH 45/79] Ignore nivel educacion in medidas --- covidnpi/score/score_medidas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/covidnpi/score/score_medidas.py b/covidnpi/score/score_medidas.py index bb53e1f..947cbe8 100644 --- a/covidnpi/score/score_medidas.py +++ b/covidnpi/score/score_medidas.py @@ -211,7 +211,7 @@ def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: df_score.loc[mask_medio, "score_medida"] = 0.5 df_score.loc[mask_alto, "score_medida"] = 1 - df_score = expand_nivel_educacion(df_score) + # df_score = expand_nivel_educacion(df_score) return df_score From 7312814cd8b3f7f44fa7f9dae004156fd79c0b65 Mon Sep 17 00:00:00 2001 From: daniprec Date: Fri, 11 Jun 2021 17:19:32 +0200 Subject: [PATCH 46/79] Fix max computation in score items --- covidnpi/score/score_items.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/covidnpi/score/score_items.py b/covidnpi/score/score_items.py index 0b11f2f..bd847fd 100644 --- a/covidnpi/score/score_items.py +++ b/covidnpi/score/score_items.py @@ -45,33 +45,36 @@ def score_items(df: pd.DataFrame): df_item["COM_libre"] = df[["CO.1", "CO.6", "CO.10"]].max(axis=1) # Cultura - df_item["CUL_mus"] = np.max( + df_item["CUL_mus"] = np.nanmax( [ df["CD.1"], ( - 0.5 * np.max([df["CD.2"], df["CD.7"], df["CD.6"]], axis=0) - + 0.5 * np.max([df["CD.8"], df["CD.6"]], axis=0) + 0.5 * np.nanmax([df["CD.2"], df["CD.7"], df["CD.6"]], axis=0) + + 0.5 * np.nanmax([df["CD.8"], df["CD.6"]], axis=0) ), - ] + ], + axis=0, ) - df_item["CUL_cin"] = np.max( + df_item["CUL_cin"] = np.nanmax( [ df["CD.3"], - (0.7 * np.max([df["CD.4"], df["CD.9"]], axis=0) + 0.3 * df["CD.10"]) + (0.7 * np.nanmax([df["CD.4"], df["CD.9"]], axis=0) + 0.3 * df["CD.10"]) * (df["CD.3"] == 0), - ] + ], + axis=0, ) df_item["CUL_sal"] = df[["CD.5", "CD.11"]].max(axis=1) df_item["CUL_tor"] = df[["CD.3", "CD.17", "CD.14"]].max(axis=1) df_item["CUL_zoo"] = df[["CD.3", "CD.16", "CD.15"]].max(axis=1) # Restauración interior - df_item["RIN_afo"] = np.max( + df_item["RIN_afo"] = np.nanmax( [ df[["RH.1", "RH.2", "RH.3"]].max(axis=1), df[["RH.4", "RH.7"]].sum(axis=1) * df[["RH.1", "RH.2", "RH.3"]].isna().all(axis=1), - ] + ], + axis=0, ) df_item["RIN_hor"] = df[["RH.1", "RH.2", "RH.3", "RH.5"]].max(axis=1) df_item["RIN_mesa"] = df[["RH.1", "RH.2", "RH.3", "RH.9", "RH.11"]].max(axis=1) @@ -84,11 +87,12 @@ def score_items(df: pd.DataFrame): # Distancia social df_item["DS_even"] = df[["MV.1", "CD.12", "CD.13"]].max(axis=1) df_item["DS_dom"] = df[["MV.1", "MV.2"]].max(axis=1) - df_item["DS_reun"] = np.max( + df_item["DS_reun"] = np.nanmax( [ df[["MV.1", "RS.1"]].max(axis=1), df[["RS.2", "RS.3", "RS.8"]].mean(axis=1) * df["RS.1"].isna(), - ] + ], + axis=0, ) df_item["DS_tran"] = df[["MV.1", "TP.1"]].max(axis=1) From 4e058e9da11c57e25844629493ebe2cdc5f5af96 Mon Sep 17 00:00:00 2001 From: daniprec Date: Mon, 14 Jun 2021 17:37:42 +0200 Subject: [PATCH 47/79] Ignore vscode folder --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 61d32a7..395687e 100644 --- a/.gitignore +++ b/.gitignore @@ -140,3 +140,4 @@ datos_* /config*.toml /log.out /config-prod.toml +/.vscode From d6d89df251a4f1d2746dbebc1a8e4292707fea14 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 10:42:55 +0200 Subject: [PATCH 48/79] Update Dist Social item scoring --- covidnpi/score/score_items.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/covidnpi/score/score_items.py b/covidnpi/score/score_items.py index bd847fd..620fc81 100644 --- a/covidnpi/score/score_items.py +++ b/covidnpi/score/score_items.py @@ -85,11 +85,13 @@ def score_items(df: pd.DataFrame): df_item["REX_otr"] = df[["RH.1", "RH.2", "RH.9", "RH.10"]].max(axis=1) # Distancia social - df_item["DS_even"] = df[["MV.1", "CD.12", "CD.13"]].max(axis=1) + df_item["DS_even"] = np.nanmax( + [df[["MV.1", "CD.12"]].max(axis=1), df["CD.13"] * df["CD.12"].isna()], axis=0 + ) df_item["DS_dom"] = df[["MV.1", "MV.2"]].max(axis=1) df_item["DS_reun"] = np.nanmax( [ - df[["MV.1", "RS.1"]].max(axis=1), + df["RS.1"], df[["RS.2", "RS.3", "RS.8"]].mean(axis=1) * df["RS.1"].isna(), ], axis=0, From ff247bb25b9d4802bf7c98f7e9a121fb2e2fe078 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 10:49:01 +0200 Subject: [PATCH 49/79] score_medidas is more complete --- covidnpi/score/score_medidas.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/covidnpi/score/score_medidas.py b/covidnpi/score/score_medidas.py index 947cbe8..2438284 100644 --- a/covidnpi/score/score_medidas.py +++ b/covidnpi/score/score_medidas.py @@ -138,7 +138,7 @@ def list_missing_codigos(taxonomia: pd.DataFrame, dict_condicion: dict): logger.error(f"Faltan codigos en condicones: {', '.join(list_missing)}") -def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: +def add_score_medida(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: df_score = df.copy() # Asumimos que por defecto es baja df_score["score_medida"] = 0.2 @@ -229,6 +229,29 @@ def pivot_df_score(df_score: pd.DataFrame): return df_medida +def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: + """Receives the medidas dataframe and outputs a new dataframe of scores + + Parameters + ---------- + df : pd.DataFrame + Dataframe of medidas + taxonomia : pd.DataFrame + Dataframe with taxonomy data + + Returns + ------- + pd.DataFrame + Dataframe of scores, each row being a date and each column a medida + """ + df_sub = df.copy() + df_sub = process_hora(df_sub) + df_sub_extended = extend_fecha(df_sub) + df_score = add_score_medida(df_sub_extended, taxonomia) + df_score = pivot_df_score(df_score) + return df_score + + def return_dict_score_medidas(dict_medidas: dict) -> dict: """ @@ -249,10 +272,7 @@ def return_dict_score_medidas(dict_medidas: dict) -> dict: for provincia, df_sub in dict_medidas.items(): logger.debug(provincia) - df_sub = process_hora(df_sub) - df_sub_extended = extend_fecha(df_sub) - df_score = score_medidas(df_sub_extended, taxonomia) - df_score = pivot_df_score(df_score) + df_score = score_medidas(df_sub, taxonomia) # Nos aseguramos de que todas las medidas estan en el df medidas_missing = list(set(all_medidas) - set(df_score.columns)) for m in medidas_missing: From 07ad93847b5db939cad6f976b505e4414206e651 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 10:49:25 +0200 Subject: [PATCH 50/79] Avoid porcentaje below 0 --- covidnpi/score/score_ambitos.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/covidnpi/score/score_ambitos.py b/covidnpi/score/score_ambitos.py index 944a706..2951f7a 100644 --- a/covidnpi/score/score_ambitos.py +++ b/covidnpi/score/score_ambitos.py @@ -27,6 +27,7 @@ def compute_proportion(df: pd.DataFrame, item: str): .groupby("fecha")["porcentaje_afectado"] .sum() ) + porcentaje_general[porcentaje_general < 0] = 0 # Identificamos las medidas que se han aplicado exclusivamente con caracter general mask_general = df_sub["porcentaje_afectado"] == 100 @@ -95,7 +96,7 @@ def score_ponderada(df_afectado: pd.DataFrame, path_taxonomia=PATH_TAXONOMIA): items = pon_sub["nombre"] df_afectado[ambito] = (df_afectado[items] * pesos).sum(axis=1).div(pesos.sum()) # Max value is 1 - df_afectado.loc[df_afectado[ambito] > 1, ambito] = 1 + # assert df_afectado[ambito].max() <= 1, f"La puntuacion de {ambito} supera 1" return df_afectado From 18a3176d68e64938e1b99a44fd04f4c96584c662 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 10:49:57 +0200 Subject: [PATCH 51/79] Initialize test scripts --- test/data/medidas.csv | 2 + test/data/taxonomia.csv | 80 +++++++++++++++++++++++++++++++++++++ test/data/taxonomia.xlsx | Bin 0 -> 72608 bytes test/test.csv | 3 ++ test/test_score_medidas.py | 18 +++++++++ 5 files changed, 103 insertions(+) create mode 100644 test/data/medidas.csv create mode 100644 test/data/taxonomia.csv create mode 100644 test/data/taxonomia.xlsx create mode 100644 test/test.csv create mode 100644 test/test_score_medidas.py diff --git a/test/data/medidas.csv b/test/data/medidas.csv new file mode 100644 index 0000000..7100f62 --- /dev/null +++ b/test/data/medidas.csv @@ -0,0 +1,2 @@ +comunidad_autonoma,provincia,codigo,fecha_inicio,fecha_fin,ambito,porcentaje_afectado,porcentaje,personas,hora,nivel_educacion +test,test,CD.12,1996-09-14,1996-09-14,autonomico,,,100,, \ No newline at end of file diff --git a/test/data/taxonomia.csv b/test/data/taxonomia.csv new file mode 100644 index 0000000..2313f91 --- /dev/null +++ b/test/data/taxonomia.csv @@ -0,0 +1,80 @@ +codigo,item,ambito,alto,medio,bajo +CE.1,1,ceremonias,existe,, +CE.2,1,ceremonias,,<=35%,>35% +CE.7,2,ceremonias,existe,, +CE.3,3,ceremonias,,<=35%o<=10personas,>35%o>10personas +CE.4,4,ceremonias,,<=35%o<=10personas,>35%o>10personas +CE.9,4,ceremonias,existe,, +CE.10,5,ceremonias,existe,, +CE.5,5,ceremonias,,<=35%o<=10personas,>35%o>10personas +CE.6,6,ceremonias,,<=35%o<=10personas,>35%o>10personas +CO.1,1,comercio,existe,, +CO.8,1,comercio,,<=35%,>35% +CO.7,2,comercio,,antesoigualquelas18:00,despuésdelas18:00yantesdelas20:00. +CO.2,3,comercio,existe,, +CO.3,4,comercio,existe,, +CO.4,5,comercio,existe,, +CO.9,5,comercio,,<=35%,>35% +CO.5,6,comercio,existe,, +CO.10,7,comercio,,<=35%,>35% +CO.6,7,comercio,existe,, +CD.1,1,cultura,existe,, +CD.2,1,cultura,existe,, +CD.6,1,cultura,,<=35%,>35% +CD.7,1,cultura,,<=35%,>35% +CD.8,1,cultura,,<=35%,>35% +CD.10,2,cultura,,<=35%,>35% +CD.3,2,cultura,existe,, +CD.4,2,cultura,existe,, +CD.9,2,cultura,,<=35%,>35% +CD.11,3,cultura,,<=35%,>35% +CD.5,3,cultura,existe,, +CD.14,4,cultura,,<=35%,>35% +CD.17,4,cultura,existe,, +CD.15,5,cultura,,<=35%,>35% +CD.16,5,cultura,existe,, +AF.1,1,deporte_exterior,existe,, +AF.6,1,deporte_exterior,,<=35%o<=6personas,>35%o>6personas(onoseespecifica) +AF.7,1,deporte_exterior,,<=6personas,>6personas(onoseespecifica) +AF.17,2,deporte_exterior,,<=6personas(onoseespecifica),>6personas +AF.4,2,deporte_exterior,existe,, +AF.13,3,deporte_exterior,existe,, +AF.15,3,deporte_exterior,,<=35%,>35% +AF.3,3,deporte_exterior,existe,, +AF.1,1,deporte_interior,existe,, +AF.12,1,deporte_interior,,<=6personas,>6personas(onoseespecifica) +AF.2,1,deporte_interior,existe,, +AF.5,1,deporte_interior,,<=35%o<=6personas,>35%o>6personas(onoseespecifica) +AF.17,2,deporte_interior,,<=6personas(onoseespecifica),>6personas +AF.4,2,deporte_interior,existe,, +AF.14,3,deporte_interior,existe,, +AF.16,3,deporte_interior,,<=35%,>35% +AF.3,3,deporte_interior,existe,, +CD.12,1,distancia_social,,<=100personas,>100personas +CD.13,1,distancia_social,,<=100personas,>100personas +MV.1,2,distancia_social,existe,, +MV.2,2,distancia_social,,,existe +RS.1,3,distancia_social,<=6personas,>6y<=10,>10 +RS.2,3,distancia_social,<=6personas,>6y<=10,>10 +RS.3,3,distancia_social,<=6personas,>6y<=10,>10 +RS.8,3,distancia_social,existe,, +TP.1,4,distancia_social,existe,, +MV.3,1,movilidad,existe,, +MV.4,2,movilidad,existe,, +MV.4,3,movilidad,existe,, +MV.4,4,movilidad,existe,, +MV.7,4,movilidad,existe,, +RH.1,1,restauracion_exterior,existe,, +RH.2,1,restauracion_exterior,existe,, +RH.6,1,restauracion_exterior,,<=35%,>35% +RH.5,2,restauracion_exterior,,antesoigualquelas18:00,despuésdelas18:00 +RH.10,3,restauracion_exterior,,<6personas,>=6personas +RH.9,3,restauracion_exterior,,<6personas,>=6personas +RH.1,1,restauracion_interior,existe,, +RH.2,1,restauracion_interior,existe,, +RH.3,1,restauracion_interior,existe,, +RH.4,1,restauracion_interior,,,existe +RH.7,1,restauracion_interior,,<=35%,>35% +RH.5,2,restauracion_interior,,antesoigualquelas18:00,despuésdelas18:00 +RH.11,3,restauracion_interior,,<6personas,>=6personas +RH.9,3,restauracion_interior,,<6personas,>=6personas diff --git a/test/data/taxonomia.xlsx b/test/data/taxonomia.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..0a09131b3241d299b3df198006a01538984cb3d8 GIT binary patch literal 72608 zcmeEvc|6qn{(ng*LYv86hp5B|4YEv8q-<@77+dyz4Pzz=WlbtONvQ0542C4cSdwiR zV;|cXj2X);ev{6<=bU@by}x_U_k8c~`}>~bkB|4~`Cw)~FYou~^Lai$&*zK&)m_Yo zcCfLr?a<`Z_8LmtX&UVs zd3W9ryVs8I#w^o2_svsJPgDcU)=qUfEwtZ<+E9e01ZoV<`WUkgb>1}KZ6<0*W$cRS z7wi0(8~QS0v$kQ$!$P(?_jnF>jf*I7Jb^7(^-Hd_{Z)7U!&&DWuTg@htCagK4Qnv^ z_@uxDm=f#wAuu&^%`3YpG{;Fcsw5cYc&ek~&fLRp0-Rg|#GU?q5khm*y0x^IBTQaP1{#(Ikm+q^OCoB!<2lZ?U9-qv2>=}!HttZd6xIw@H@ zckC!-+Ob3bAO4B?zyFCL9(SEQfBA=&`Uc(VRB>sP=GZ-YaFvs3%~&_==!w&|5>Lu6 zbJkzZa~e3XJC@dp&*YGti@gUGZWvpSGaz2NWz)gwW6Bj;f|JJ0jC#NVRPXO^5^$T+ z=5!Ht6G#vBHNIw|nO*hB{xT11oP%Gv0rldZtO>QMQ%aQ@3=sv{m7}TBXz3C9=?Tj@xL@&0D956UAw?X;kT_#(D4CMTZr> z_I*@#y4q1M3hEV$*chk8zwG4ozkB_b(XKOFyITr1389ZAuSyH?V&sohCE!L3W4RM1 z^-nns?d7nzI)7!#eqP2R{%UIdM==ul-OS~|gPeP(-h!s9)kca^8$+J&dTz-%+V(2r z)N>G2?q+#sexte!UwGWZm$jP#QFQM{KCh%tDcWvDksCVk>&khvPq0@aimvZTJYT+l zc<=g&kxP!}{J_}>61Ohm&oFC1NhAxqG+3E11oho!Ncydz&{oE5K`smLKOU6!;4 zeK_c~bwZo3Vgqng7`o z`qf3WJ&}H85gGY}zWnPVy7JdWl*{?!B3hQDx;{hBMop>U?tXg%=nW9)1uFZkqGNM z1^pdOvu##+5rePKaoFID_-n6=DSSW+uCi{xw%owN`fwwG$xCz>SKAK)gJteP<0S?2+JW{m(D!f|Pbkv`r2tRPXOEp1^3i>EHXTjF{Ds*Uaq8 zcIzxjp3Ot`TweC>I3ilnbE_~Y;Ju1E7I0wK{gW;GlV3`>-sR6IxSOBI#e;`KXX9Oa zEiGJiIU$k-F5tn!(*oMA;CDdb4;!~ZRI?XaEEiJi^PlFc%G39Ko96+@S<`onmG1NJ zuk`KN{wpipSa*A+vu|H6e_HAP^m6I1sDk|E7%xm?x}xGL!K=EDio0s>z>2ZE}G z_tFOGH=gwnLRth2=ah3KZID$`gulC7E;K|3V`^kZwoi7}7w+7z{=Z%?e>~Y)dsz6| zyV!aHeVm;xejlHaVvM71T+VdUF@{y*DHE zydVp*_1s*sXxZ?4dAIJFnn&3Za?p%0XdsoetP3`*E_{aAZ&-!yEJ$-hCrQ-DfOYf1o9hn-?cat|8V`tQN zH_utc0)bz43+T=1$T?c=!?*Rx?OQ4WeKL;XQ9rH+qVAA)IJY+We&=K5ti4l}k5B4J zT6|qO`~t_VV)F~(vu{Z|&-ugo-=Z!-=50_8F!&C#50PB{9ezszSiTUcYka^}ep$b7!YDx>8} zo&nFk$L=XAv=c*Gar*?r^P2gzv?HiDMBzM7W}QuV`%d7_4n)RuT6R2pu5X8EUyq>- z&Yrau3Xy!cGgqeWS{im*IQxPIWWMLz#Bs}1joA3CcMrkA#u)j5c1vcWPZplu z``Y=`!Q<{w-dk!Ws-pHXbsiu=nG^%#b4x|_H}>2@-?ZUp-3kwGQoDL~*k|wZ-p5bg zBp1BT&kGE>QhptOD);SYLpui}3B%xu7UId*b%)AjEO$v*?>qi6GFNR))%4~on~?K! zN5@zfPo!(GjziI_G4J0V?z!|y7@bhSs%`n@$_~SOw1^#I6Kx7Y-pG_|(McZHwRb?+ zPs^G`X%l<`-^{w^k7LAUMzxAH=jtau>3_JA>5=hDY~P*}In0jFKk8rkBzQYTGWCR9 z?GDqZrDw}^VYRP(1s4nnaqHt^HTN;PQDpyus82e^bFcK79W$b0&Fk+{tuMs$dG-+k zO2GG@C_iM}rXIT?prQ6|@;1-D-G_>7)IU}2UwIrVx8^Z*bAPza*t5|#jPW_qTEOEz z0ppUP)sBZF;Qd-;Z~EniHPaav;8(-eUV9xIuTa^Z8oFxB_=)>*wBBEab4{U& zPl#+b$8NEFyBu7gwY<8`n4#5s{)HRAUzwq@-_M24u07J+^xag}HVRtOUWw(Rj+k7T z>wRW*Xv!TN`d)cpY#{17Oa&)858{3kdjL61RE8~f+%{e72kbPy||8<`}^g+3c3%cmg*@`g*oNgu_&quQpQDXKVJ8qbE>-l!vUb};u{0Aj9k`aZ}>qs+THpI(2e z3{SdVohaC7@eu7kbAgj_E);yk+Hv$tL&-~7=3|O;;iC0Pe6_4PtyfO&>SUY?Z}Hdd z2-X~a?$JILGHb;G#5CWfyCh1Dx?Q(@b(`ODMqe(wASk)~OrJy9kt==`W31EHBXMc9 zr_UeI)6bxSPENfQZ8ui#DQP_)6=Qk*j!Bm9o!8!9DLcGnB)>q^wmSNVdNLZ15#^uK zaDnbtHx|?1$BoZ#1~vHZT4%q0$^ioX#0lebaH&VP+}&`@0oHjE&D7}wvk@_7@<-lP z%AUckJq?43sYXf&NgcHRAiA2i^y2Du%qf10M=E%?Ren{2wz5RAle@|SaJd((lDK-T zEIUz*Ghy!bnb1$N`I0bWo*DnsGvSf%Ed8(Z?8m{jAd}i<>heLFgLUcP0_Uv}d*dSq z)eo6<=}L2s9(Uo7jrq)(ISlK*0|?_yA;-7dkPwwCt_>l2vbQ>~!#6B16vW+VCSr9V_czAaf+OZ(v6464t% z>z^&ze^AQrwExdi{tl(wz8(H^R{kEdQf^y}`g2zP4zu#?pQZf0&Xu4)OZj`1QvT0U z{$8b2__LJ1S1A?$EakslO5T5Pi!S|VEq}LK{>gWKGvD!b^>FmGyL;Em6S)2Ddnqw( zF3HV1S-Qk$l-P!w$A@h>v@Q%Cwd1)IoNeD9dC}xy6Z>Vf2W2(uM56oaBPxrZ#_vu~ z=nGFZ?c7R?a0N>dI#k;$5l#NyihByP<4QN?`~ypkYHL%d)EEMCHm(!{tEf)I0|!VVLbmUS-=L-OZOm zhE@)C*vdC+wIPTKseQyLr{W}Kv6_)9pEgm+O(ZIUM50VV%i$b@9RwRnzF4GS17w~f0L zZ>_nT<|J^%$E^+Z&gVqB>bBiWAZ{*g7J^R`RQJ)3l;l&IfSxHa*gU$`i7m+WgtV}w z)2BMk=u*(zrSmtwe6iGxQVzUWAQc@8b}5tsl~xkdrzz?3V3*E9sSdpuVOOf>86-B) zhq5qcwb|xRTO8})l8=v@os^p;7Gs6kY3>0&G*~4BxFrLrsl^D>1;lLSiSnguz7%jF zfnNT85Gy3qR_Jj~g}z3j*}M69Ya`9D_~K|)Xs6|1C*_1>(&*7c8LhajgDG3*r042i?<(ayeXeM( zK6@8}Q$Rsl$NMEHt}O92O4M1*UREnv3$6Xd<|!zyJTVVt?JVXfdpo%mt=(r+@*O(l zoNXt2DfuK?yW1ulbfYZs6>1y0mHZa1-D~p>bfY}+9qOiYwu9_8)MJwb>M2XiM#(v6 z+so=C+oQF&q4LB6l$Ue1qpWH2ELywYrW(}q4dQUNv6EF#=0&%6*@S{*%MvrbLl*CP zAHMZDm~E_^;Gksjj`Z*?nX#jv;Gk~t?r_LS-$U;+>jif&o^aC)QrbIwZ8VzcJ9KIB z(cPoc`J_++Sec0D&4t-Gn1e!&O7CX|J*dCTuo^JQUN9GD+k#D_LG*jmlrr5ryeVo74-@I*zsh5_kfc6Zv-a|mDi7R!BCBccUJ2KH|}!5Fu{=Z zQA5@BEUw?}P5z6Y@gM#AAg_#)x{Yni0!L!%u~J!UpV0P>C%-$m5O8{3jJ>Yyfoti~ zVajPY&AOfkuH~ZvlIu3?|Gn@0FCNVypU@8-`UAXsmDDY4Pb_eRq-GS!{wKk4#JSCi z=(@s8*M%7+C0J8m^l+@};tWD*lN|jY{5a23GfHG{`h!SIrKEn>_RRvv{|s;r zcX|E70oc=r!pFyKwbm^TK-`mp=&8_s?i$Yq!7=m6}m3E9VpXsiSLvmrqIk ze>ylVZn)X~9OjcHY&M%?xVij%fdenWSu`06Y1KE1E_U*$VhBlyE7$-2IdcX;gOT&UF zP5$Ld%H@f*saxI^fe0gf2kbV1OBN{JW7UyN@&{v8hXY#DJ6&{r7pGmx&DY{e@e{cM zu7v*NoGQI4Flvn;Z<{=5Q%AF3PTv?a)#>OPYJ!%htJF}ZtV>50D@&W`79UVPK1icl zm7zMb;XAq&;9=zgDy%lp_YST1yEm{pV0QD|z8yQ%-~6-ZO(NfQfuHfO?_S%VSlG66 zD5~@7-Vf5;ETaeSg-Hj*^@Ti=!)rWr$r3WZoXzDVB)Z%^lpOVH^uttpC$tfAmP@vj zKSV)MB4cv5bjj!0R?Y(*828#;JO_=VN=rp8GT3x#-|5O(eU^7mfm-txU;B{o6wAK9 z_cDu!Z<>>?KY$}!IrPH4n*f2!PT`gkA98k*;#-X+kCjz3_erMFi%qe5;hegr`vzAh zLb`Sms*J?Wa6A)rqoZpZpW5oH*9s@ zfl`(i$qO`SscGJe__!s@bBf8pLPPn8srj~6;1+c(Sygdp(WWhV5bvOfOre6Xx^=ai zJxyV~(|9rpTBkH5Z{z|LclAM#+K5CvDR~3qM6oPS1FjKjjGY&45VRiDJQ43Nn~o6( zUv%l5@G|Zs*hEZC-Uy55pDp$$Ik>BA!MtgUE_K6!4kd}$4j;;_EB18X5Eg+6RJQl6 zd_R&`k!y$m^yebNbSb>wc_`PxIe0=Bro4T|rNWQmLPOO}`GX@MfXWDrYnY$lPz^Gr zX6Q6Tb#;AOA&Qa|Pw9kj&=<*T4Z`Xbe(BJ}2`JTqEUdn{| z%hFA^imJjv`5O2VE|5gGmDKGZ3%|!8H|eJyMiAH5$|+#VSaZ_7yvJclOMW`gYB52_ zFZMeA)ndYqEA~3D>TChW&-OZg)!9OhbM`uc)!BlMXnUQ2>TF@hO?w@9wT*z|CwtVr zYB6nL_0+VRO|!DM(j`OOj7jarY|Twe@ov?8;}Z@FksFH^+M9A)fKHv`Dj*e8sWX9t za)Etb$nkXhB;n+=f}8}TQ_@z~YT{i%p?(tuQ@7=+x5@?Y-Kr*ihG-+-D~|$?8Y}pPAA)5MkmfbNxR27NvuaM zG5b4Y8aJDy-EUnj)^j$opw&w!+fn8l)ZS$sDkdwJn9<6iV`C?y9>@C~I+)F;li&cd zcsKU&Z8l>^FTp|8;@!@Ww_=B~`K~2+XavdbO}RE2$z*#-A+w%$cgG30@6cqV5erk# zd2@cY$X!9+N2R%#$wBq!xsBME4qh-9VT)uAdVW;;AhXYddVOxAy-YVQm`kul?hYzH zDh*&JKd3k7HsWB4yI?NM7ReISdsLdA*(bQ(joXNmspo>Z5?dr|5b3D2Ff%#0{yw)6 zH`75ib9J^zwxGkuq(zv09@anMHafs`L(TjuTjZXgv&W>xnaL09Uve8AVv18Uzrhx{ zH|W+eX$fYZkoqESBOaz6HFFcTNcJG_W70roa!7qWx6u)%gX-p1Y>^y6&yPvVGXI;r z^(X$gEKzWNG?L24jAUylR9kA}rk-O)atHkuGW^qv{w6N{`~3Rl-;-HVqgjzt4FtdS z@MIwc5hT8W5U|8AOzmJrk{A{GJvj0|;-CK&-o5>r{|WlaAd!JFyRp^{zPFd0dFm|F zcXzb-8ZYJZKnr(cofsARZzInCB7J4h_W!r^mGSOSLrrX7_nP-=lwhE3bKw9wR(1s? z80f2s9f|!9vg!Y+^p(M=X##t|mp|w|UCI=tp56o29+%LKW1Nzd_ki^u{NB9#|3mu9 zpxM;PE8yxcodv6n613GT;JT-&8G%hZU!=qeTeSW*68o)ut43-W*t7ziPL!5H)*;Yr!}4~QLm95XWFbckK*P6xGJo_d#-BJLwDtl=m+sk}== z#+xXcAx6=MdF@z zZ1P(11p)=$NsvXuPY>AS z!A9&dtQS~HMVkfQ9zVzfc(N)}wdodnGI6p6=pj>)1dp%Yl(FgzB%2}G3d~Gr&CMO0!!@?1?W1F>84sx}^DyVGCrL9hv!L8*LS6sSn1XtUdY2{o# zAVCVaNifXaaFaF~sU?VIP7OMZ==7&6xAVkxJJ6=a-HTFy)!j65c5BZ|XFh&yjR|{r z-4&sn#k%9+us0t;v|=~A1*fSxfTA9z97 z%CpmrMhi<9(}RQ|4_R~uuF7~m!79_@Uu|KpWbr=pD0&tdjol5;>&C#c_L$hDw-EPN zDPyHJh~q0zRsRJT-T>JNH(M3@l#8_i7XtJ2T;Ohgssnn?TPp7Iv=v;vJVEK5Hd$RngmpLP^!6^yf*AmXbXGC08cA8Lsz5TS4(2M8wn!HU9HejfA;R&*%{Thp9ID5k0ODPv^^Y9HY#wL)^ed!{x1#wVhYA81Mpa{NFq+eWPu2Mu^Tw`)5b_nyR zPN0(t-W3kF-Jno6oPFzx1pY_5wpA-^^=idS~1;@Aw<{S}iQW)uL7=!v+ zPL8^nHFa~n`2Lx9xgKvdSea48FZoV9j%~z7`FS0UDHvvaT-3xad;Dx|9=4u8nYX6> zutNWbDtUN!O;P|cY&};o#M6~7=XVz8wuthiIV_^oi8B2kl#LTK*@<=*LXz#7A%11j zF+9`V+hB;_%>>O~*@V?AGfy9r<_hv-YgacHVj^-UY(p8+Tp@lZ+SM(D_7e3I!agV; zn%;(vv^Sayu@I#b!Wxu0rY)qog8jB3PNG{vSfjGQ^q4ePn4dy>qlM5nNV8sW7#E_LO|Bdy9i*}CjfeaU2-J^w4s&@%`mO& zl7ZMI1Is!9XdJubY6xgkO(HNOwXW+r#4ZmeYjnglFE5ZUzm1U>O~N-eTWgCr`(9?TbY0(N_jYI5{~eCl!U%0O(nfZywj*T+$6Le2ck7)>lT)6(C$nVz zrB#Im3O`_b(_Q26-)=+|L>hSabX<1Rcp!TqC3ZBD*>)Q`*>TxjBV6`+O6+*#A=}pq zB2B#8P^hd-O6*u9hpn}ONFy(AM~j=rHZ&0_U^}TG(#)&a(c-QVE$fjI`wjXVk>#(t z%khpA?i!C}ucTZXk33-eOd<0lF9WjID9_;S4Yj$+pLEUH?(cV4Y;U@0?Eclw*YU_A z*_#U-hn3WuI`oVGHw}Z$ZMhdX9;99@XkTk5cAytf0F^C?s+oTaPHU&~kbad+DZr2<>-@t(cnvptLgesO(aq>r%pF*S-uo$r48UV~tr)s31H7{C zdiI-TfbTgy^pM97{oiE({S=ecIg;zGr+0XrPpMvRIy}^R=7lkz1M{2vgQAAQ%p8qf zMlcESwC-_&K_Q**4S^RzUBA1jy#7`4Mcl@Mhfzn;*vn*0r?6OQPTQC(a)id+*x+$C zO(ilNSGwr2p6;SZU7iC1(sAE)0KPmuX%jO}Y@q=Vm)X5S50n=Kc1{B^PPESx&@+OC zePF3Hjkfqp@~{B`xj>n?JU1br(4}oTmo2CcM*}iL2I^`H*wZ0nMrHTB~1ab zdS)*mogms5C5)E`G69H-$t|U9c==|N4iCpv1J^Lq55^JfNh5I*Nznjd)jDdC>Qvk2 zDOkF)p261OoqoDpx>T4jpp=MFgbhGG&7Z-|W8rlBO{(|$)%`x0+T_Gc>f#*o5N)gD zX418jnmJG1x6GE>_^^adxw|=o;w1qLJ2fbW>gVpgECZW^%^DzUsWnHGbq$ama5G?D z`taDyoMl-$nmPrETO_WG5`z1~$ZH$a#(6HV${o&RS60D}5$AE!^ey^?$JumxezI~c zq?R7upuVO+U$3c718)Va6!{~~;$+`aNbn9YA&lhYNv3QNJ&0ILYn_n1fVOxkK79h1 z3z2SiUd7ceY!w3KRXT+RvLd=C?|gp^{hd9mhwYqP3MkmFgLWz7Sn7hYmPHHgE}$ z@EwV{wlx|XiHUz$yK$12CZV#qFpW{8vLHWh{7S{H$uVEWTX1W#rEv_&Rd!Xzev zR*7Ej(2EoxC)H#k3j>yY1fa*+^Or-& z>uM3u44t~JQxLm&U|Bl=JE)pcEg*yRArz5>v5?D?b+@}^ouU6n2`n21pcmQm*Fwk?HFj9WK4e!o#I6}wHVHtlu;*`tkf~}BunZk!*E@(^Kd@{8 zfF`r&Q$olzHFkJL6te3x#1026TLqvu+4E^3WV%`eJOhR7nuFM_0n4ZWG@U(vD}=nI zw)3U_&J)je?EMW|!FOe09A4SH<%X2sp|Sx!EvQ>@uVh@SAue2epWeAN4&_8IkC>fs z)yJ~$6f(pAcJp(c;OEl8&vk;IO9wyK34Sge{7fhKxpeUB{*2L|=>$KQ4&*Y?U6Bsz zvhVhx-hh5(Cs_8BLgs^C-8i^w4Dg=v2@Oxpu(W;iziRjg*9!ikll;N8f**B~Ke$%# z7oFsftrh%5C;5YG1%J^={*%P~H^!y@eFfm(exnRIIOo31Z<+yS`=;N%^4?ql0*9L# zh0)zBgj&ZM=FGcQ%s5@gsWzA>&02uV1eH9=-jClXPcuT=ce?84V=Hi#IemqADq1hK zx#Jsv-oU>l%_|^B%w}eWq6?+Wc;W`Dj36sIfzA?F|uWm&HBNHgBAu%3Pj3Cm#{(B3-DWAutW7>huAzDf~?~xc2nP z)&fH|IA8ReWrJPqgNy&4$OgSPTC#q#Z1BA&&7kRxi2{_ThA$J z?IO97;eU7e4Cyljbe8H7A-Ey9>Q;%WpOMp6@n#nP%;IqCU~ki4K<{D%!wipf=B!io ztFnii$F=b(-u!TEAf=k}r3Si6hag;ido~)pVrwxm&%TZt@duE`_n>j!FU2h4%N zv9sorefOkv$>WV7GUEyuSPG{FTt?yr3#NXdKDpys$0=kfaWmKJYt zXE=K6HB_>-@hM$=%A~ceVG&cZrnR3Mt^L%Jv$7Xnt#M^j&yaJmrJ==3+9MeXe7=yy z2|)jd3ohK1mJy5`Hh^jR`l@u;@rVPG7K9Tgl z9Xpo$sAZH>Sb}eO>|DE#w45L(rfy$`QxxO3Pwt;Z>PPdCASc%@36~tk+d4KnHEj-} z)2$uh$4pi~h&1-|#x@A58pyqG40Ba!%$RkXF`$2li1}EolS+G|w3+6n@4M=|6jM!} zDXa{x&Qw{rfa&JyWsFk086(z>my0v}G{z_y_mcBR>qC&=p#;re+5BW`_@&R`CsV^O zeGY#$HT;dw;jgBK{lD}%{MFR(OP|9v3!%f)zqUEHkK-a=!Xr891NP=(U+VQ=2fe+# z%PKaM9J0^(o0{Rip?vyvs{o*~QH`;KQ~b8mAHvw-a2d4klq$0apv&2vheF7BHGbcW zR76*wsU1P8%nyLBWp~DgkO^x1_c8=)x_Hd&xPWDk0q7=n=ZO&VvKqf%Mruu$ftei; zSe6Jtqu8BsA!MQ&zkh~cZPyDkJ3V08TL8M3-FZHQOj6?y$Y5A5`poQHfn`+yG=|-I zTQA|?_Ezvf>|%grtpM~CyYsd%!oO{zFo4(<0?P&gXgs^~wgSSx?Ur}}vFiku%>vLQ zcIR!WgMZr=(Fd_30LwN2Xd1iocTMB`n;mS&cVztQwuiqlNl<+Qs9_Y}T}3_Xai4h% zdG5S@|HUn9;&c7;#}X3x(B}K#!jHU%Q?51q?dJc9+u>K-7~dTl_8M6kyw4dn2)?9i z_I7{2!}8st@pI|rN7qKVg2)2LZYA}G4t+)}`+wE&&nN%eZ2k}bX?UzhzG34yTmqFq-U|kYqh2P8F9YghM-W@Z1ixXP@<1!cBW=QQ}rO!3p8)T zuOx$y*4B)@P zL~c?_9>g^m(?8Tpqz}_-kR6qfG`%$2P9mu=Vk^+)1K>FEnGWx zc$fcXNn~3S*p@`{W3M{wI>QmF*QIm>@7VeJ^|J$dpo}NZT3kn+P{EPHMvJ0Vfr7?g zI3IRhdhsPjHSlsSsXNN@)kpOuK8M>uM_nHXLUYEf*2wEj4`lr#td?GnUF4Xbwjk+> zOv^OVxNjeNWVm zXCNH}Lq_gr7M8`ZT0~a^D>38;7D4HU-W^-mI`p1Bm2E38}35uVCbGNRqZkfE5%e?M?wRN3TIvRhr zF3`;W^9w&+Yvd)Vj*6@FviAxBd|)rIZ!RPqX3ViDo(q`<=X2%xF1mTSgZ$IbuqA>P zBXlu7(O}a<2c^RYpet(#q6HEEI=)nX=wAOk7Ivp#x(DeU7q*s*GT~UPrYtu%_fAol z!Ep#i=;DlG>?V{7A$E2)`j?o}eCY3o=?KWeykBF3m&_0iOfR8r5sBfK$hJb@987wT zYI^3>MH@Jf(>;_ zdnh2<5Rs^mFssrsUno0dsOK*+ykI5?#A+l>R6?}YNu<>B+TjtwYpoHfRs&uh@3~dt0@po|%P2AW4b8zJ;Yo z1TYfNn z)(vsBGJYcc&Sq}mjG<-$M6hZQm~XDhI@s+lnm6JS=xeRvk08mIv7V|$`5)F~dTwUf z?|#G%uGyTHd}JLeorFPf3OeLwqBG5#rFOOSqx1 zHeERk@Z`iXbsSH2#16Xxr_KK^wy~Y%_;tB}AtC%Vt#Ry^0gk_>HU6{02H%m(_I4sp zcE_FU#GUL;I@w7$*-1Lt0iEp5IN8a#-uT#dA&En3{(b&Lbu{Ii)WZAXiO;%90U24_ za)L9ktP)^6%>E0OL`1=cN#Xbv1?nb-b>Np)y1P%bEek zlkCnbAqE?22x!LLx-JmJE)`hT2{4{vcU}!K*i=&r%*d+iGJx3S1IzjW#tZDu8_i-n@4@pezA=`zHAQ!n@gIjv-(G$_s?`)1DlVwUlBRub!)Yg6 z`Sy}%$D8rUQ@`B&Nc{g!3-~dv@pBu-H|Q^Mji1{vz9l$*jBEVdhOrI(U7L&_6CB%d zji1{vwi6t`?!frTn=u->x8u#PZ;Vr}MaVMJ@&Bum8{h9=|6)e?BZfA9cI4 z{ueXCA2GD?vt$1Qhc^CV#9#zg{$fV>BZfA9cI^MQZQ*}*r=8!ZCOu1qmb>{g1|)O7pE^1MQ=st>h9r8ZA9 z8d9EP*c=V1%;{%r(X&~?Ce6Bq(Spc|o_VWg;KZs&Kx(FIWq>=mETP?C3u>%0T{^W{ zp0ljJ7PCBVA!EV`OD^o=!b1m~gvV&mx;dWYPS8mPA;jqkAt=2*4hlvKLQCX{UaCEO z5r#=e^fu=-^sK)%p(<}MLL4o@8qYlAS!gOJTS!rFj zae0YM{i_b4&5=B9Uz@YvGjR;|==9!?rJ<_0^>>Go(*#O5Vxd~-Cj!&gvvn`Yi7#C4 zy;XjF7Pu>k_r^kAXjv&ccbMnvts?ay&d`d68l{zuPkZk^KwP|&kZ|rZ-|HHXOHgTGma`2N3J z22DNrS`UABZ@42Z{Wojgi8?p=zg@U=%4E>V19e3txrF&tJ+L*0izzb8!hoXZ;^a83 z-Gk8!Rsg5d)=A42t5qfiHW3>e?^EO^uE`@!hf&jn&SoTiVmjpogn`pF!3WV`6%zt5e9&@y9(V}`p8&0wymu-U3 z6aATELbjD{O%Coz)hbivMNM@3sLz+cT71R9aKnTCHOFQBQ>4H!|4j?o$sQXXBudC; z9`1_sJ~B<*Snu(GRSQpqHOi%>D`INsv$*Ck*Ct3rk)N=$jqXbC0+NTerEv=N`q_xS zjLHbRYW^(V;Vp=)ca^JD@rY=6W;x&SO21MU$@ z>-Km>pO*FBiWEqDi`N=R?wq0X(o`$Se&wzzRutQXrN~y8OB}|)$#>nMI?XIa5&+R? z!Yfc(F=OC-9(RXOjYssapIGn}WaRSSg&+|{)f}WDSTYJxI z1)6%O=>Jf7)rIBCs=mzNX=>4?4{{SnTtVczRyGbsjD=gfZtOaX9(vVg8ijl}i|_6& z5Hsst1U~Qb9vt_o4r6JJuM7>1(+^)<6Kdh95*sQyx@vdUqX%1P}l$OaXL`-{{4 zAva+-REjd{%YyuQU36t#qTH?JV#4T10YJ|H&BHYm0k} zmg4SEpg<`u#kFX0cPWLS#oY@nP>Q>2ihFS=1osd;K#-Gm&h@Q1*P3gdv)8rHzV`R6 z`A_m*gM=jG9eLro$8-N~ID3M4{*ZY72<{+(11E^*;pUJtxNQVZnIN9OAfAUeP+Y;$ z62$XxcgPLgAOhz}{O=GX+C^_;U_pr6;dc_|k<83x1b=U)kYmOpIqXgP182yWe?gOg z%M*V{lZf~iG>Mfzrb(>-9@+U{GzmDx^PjYcA478g(iQTnCh@PFA#lqGk!A4zJX^>w zK9~P+A>wa1jsHY?#NUO8zmYTk!-a^yku&}S=@GvXA_RWLY5a!^5r4yJ{9g_7{Ws+W zy8mTJ#>aa-QlJ4_3EzG++t2l`AZk?S9PQ3 z2l@^N`glu?w#w{#9cRVor`Fq7Za&_kt~cM5*-fn|#9PUl{K17YQZwxQ+^ye-U61|7 zi7k}}=X`JK3(sshjqDCY6+s>WS!|%u&Ej!~$TN$*k=|i7ebce~m~Myar1VUj%R7npa*o3`17ie^k~-a#%oXt%zXH*+L?i8W$!wi-M zOgLJykwd4H{z7-FBYh5zmb6=t_<@#u{u{I;*Y7t^^WmI6`X4Q#{NU4(?C4AOk$=){xdmN4aEUwCrl*m&ja z&#yhSxp-*dH*sMy#utUt104T!SJictS;E=cN@?IhJh9j7#IMuF!UdrU8Jt>UKaj2N zuAk~Lf+my4lxWX%reSmo{mQp`i5*@op7a8{yxVsjBnbfY^o)NUi zP{|C6Pmk6bY!1G;a}pC=Dmen4b7n&X4O?%IQjUQ2E9KI&E)Yr!+~dWG0M@I!QjK&8 zCSm!0$2*n_Q=Ia8=BPQYh39+@an%WD@#3SSN*2K8gB1JAh5aorh(X^2^t#~2Pw3|Qx?7pQ@MiO@puSpb&r@s9NSrNs;t13QdqL{4U=#&&zv3@9gdk6*i8 z4(iw`p&Dh+EDre!@u)A2W>TlW0gdWYq!&Rf+3<$@U}?ot+)Y`eC3WqhLFXITy294t z0>SRB#u*MoER`pxIffzoi$fz5IqgG}SN2w0N9?$`J0pU&q0=C-`S5k?6FaZ!5T4MO z5ApR9qOM4FE=Xq`7#AZLEklVZ%ER8xK3Z(hGW}O=l#dtUq!z-nz^$~Y72jO3S zf&q$-2XDj24hl7(^$E>6YKsdaOG$&R%vw$DS@GbN8sVjzx(x=ySvy}&UFX;&UlNR`xOWW?Ayi0Ck4@(_i`|@*Ydt~ zcd0p%)bf8aF5-JH&_B56mbB#7q{;=g(gU?P-~~8SV*k$7He<0}4i^`E`>KlcSX?k@ zxPEPs%=rR4z6Ixa?7@B;&T+?&TW5$ttH*{jWZ=~;9!$rNOl)>++3f%rAk~s)v+_MO zfV4FYc360w(TEq|+Xg@E{RTf2hd68^jrd*kcxBoAGi&m1qCuC))1i-laPwQvLix*9!!G$C>=QUV!*J&g9?qf-aGlQdlNZF2R)x?OQ)f@JjT4LP=EHD^A z1eMJ2$(?E)J215JVy|f>)&*gKApoKaNw_J3ux$+2$SRz@rkhxI4GVk(Ai9*y@XdYI zHpXUTmBwB(Osu@`!wx_emQGXN1xGQ%%-s%^~P$f}XO zW|3GIf(3>Gh^{5!77D`lv1lW!e)gITV%-xg@FjrgMl!=c_f`8?nUU2TdkvUa7m5Xj z0f=rT;m(Sr_OTHos~z^5Gh$sB7Wf80bSIe+kUP~rc3@<6$zF3qtb2zAz5^gV%SBRo zf=KZ10Vm;I4BdAQF?VbJHVEG9pfuYfv+jgh90;#X755b+0m+RPn?AozBlW@<$z0pz2T5*``f{7@x`ZM{tiOa zZV4NQmmA73JYfIus`uH=mazRMuyB06j9;8;G4p&&e6c0-e8TnC%Wu{44&{d)@HkS2 zv>OhKDE;>tlRpdMKh^^X{*9;!te<*-j?_aUOckw+V+DM;9uPAAQxCxYsR!WjnEuoQ zEN|Y!^#EeH9^mt^nPma%-;bK$IRaAs+j@W?o2Q*^*4%l#-H zK*ZTnswX;)onW(3p^*tYB^f1#jX5K|z>VFROQ11lh6k|uOm3}E0&aUC!IMq6wE#>A z>X>>5h&O@D3hGDK?5n=R^?+=+9H%(q0q@}`%Yf2yoBE-xliL$t=O%--&_(l>Y?mhQyNS`Y z!g8QdM8%$1Lz-Zpht^&G_FcW&8`S)LR(c@Nr<2{rvsv%WQ1Tpo)lHawW@*Z`xxW)1 zxtMFa`yj}&eaHv;al>K4NE1yn=lkvQs%=Gv?%iRGDzwV;#07FD1?zRa+Br+ffsDY0 z>l2nnmP*zPN&{dm+Dg3vJ2H4NgERHpi*8WhR^i6R=}yka=8d)8)XW!)ly|UGbN&*> zy(x!6<;b&~h3>ZO8Az0D!Y~NPzSnN#&P+6y?RA~$?xWN!EISx}6}mME7OWC3yn>F5 zgMV5cW)}TjqwPj37X=v>prp3;-B-9qI4Qu$>dVu7exsb`Hq$e+GbLEP@0npacYq-d z>vT%HP?I+0&z^(Mtr_!rT5(EJCdg0614Ong`u!x7c1Bia{ibg??!jUdwkA0U+_t#i zR-14h1g?jtObmO|pO;-1nsWQ3_&25<@pId)`FI#}<3lW7Lx_B{blL~|4IlDnXvw$f zNT#nXl+#^%n_1S?!{AR4ZvH-$I2_ctwAUH87evdS1d-{pUtfN?zyNi3F)P?#+!!2I zc+(EvT<$yTj3XEP9#z}x@zIRm+Y_V?e}t_Cd;1_Tt!`hmO)Dd9zJ+|=2fK`4%C6eK zQE<*D2kwiwJ$QY5fx~fXJ0B+kZO@1v-VYF*TdCGL&jv9I`|=Ggd5oS}6inVZBlEG> zLvkDh{gE?$k*;4j!*br5a>H^E#4kjsDH_EDoY(Gv?yDcc*zTtNJ({2gw(v@Rd1%-+KZkx>w*Q(K@pI^(6C-|B^1m(He@%?|IrQ7I{nx|@y3(Kqyl;}TV=}X2FK5S; zX2;ZK$24chbZ5s5XUAU8j+uGBD(qjbie3;Z+iNIa>xw29F59oECf1$DvI7IA&n10) za=Tl{HVv)Z*lU`Jb(eks4W^-zFrQqMwlP#AD*$^9kXUyW%kBs;eIe=Nn=9Qm#$aTX z#9lKYbw2jFaS>?0WOc3ktV%ePmrmrM@{Bpb7#w?7iYT0WRh;!J z7tlV|V`R0#UUNdMdxd3p1DL**^a;rAZXeq;vN~h0ff4K8{(vP+-$}v(auHNMBM`hq z0Q|3pMf{fo;plE*?XCet`ZoK*gF#Q^A6y5Gz_$?Pe=;2W^dJ1saUDGPABlg2@ALl^ z8SyhNC!p=Yx;NfQ2$*sAMH&eOg0cE?j@()viy>fXBWqZwpym(!7 zmLa9Ezg^+X-dd1DikA~#MhwGX`*8Bd9kWVrJ3im-TcJ$x_Y9Fs)Ds)l)mt|ft!J0MF~Ha-tkT54yXZs7)RDBpu}MRyWy?6j#q)mPk!t+rvWQ>s znBz9lg;8T#VCj_J@D(4p5`fQALTBYC_T0j*gGjJ@|DFMj_2} zS4n%9tM*m80Zk30>=5F`b!(ULMQ{T=E)t|zP(8kei-uLzdwu2YJ(o4YE)H_5o?2sw zJaDhl1>GJ`ocdq-)i*l?fe#8&A}^i&Tj$#|L3SRxpw5j3nERZa{S(8*u|@jEkInt7 zF{4oNlKK`SP&Z^^VP|)A*iY2%WW(yzi&J>^;0QQjC!+6C;eQvz?okhlY$r5uh#!9iO&vkJh`5 ztqp5`s5*b^WsSFIWNYz(_wl8>8&~_y=&AMDn^p#xTF&$x@7#pNSy5g2?Ex?RFb7h4P=9&A zTi4o?4Jj4jeGR2=Yo*Hp70fNoUsU>Js3PkZ)$IA^nbYr_5OSb zYqDy*jF>4NJljoKo2Q6w7qM+-y{!27sfA5&PW4+Go8Y3Vno&w7_(|Lh{g&{{Q@8%! z-uNq$PT+uYP*P+H^O{?8w)eEyc}eZfc~(Jf#vr)cpr+f$VBr;hYn|xyMREH68MuS z`A(Y>8nb0k(#gwo6*NVX68*{g^hXN}ea)7#?!JIl_1m8h&FMSbuW4OYHt(I=$(pE! z@Zy!1ESYO|c_CIY)5!?T=;>|C7PePw06LGsk@Q}EYPf`8%@Uw70WpTdPv@lXOE zm6`Zf6aO90W2yjKzH3R^GeR!DiIi+t$Ve~_B__ELA)hOY z?Y92xSZ76Ue~U5+JJuU6Pwn0bA*z|9mS{A0k;=8WtdV^HE*PK$21J7aBVYjT0YK>h z5Pbj`IRM~707?)*Gz2gL0pK12lnw#Whk%ho0PYb$=?D;g1QEY1Q2}!7&!spo&uCk0nw*`ky8Nf89?a_5Pb$1IRoIH1C-7I(dU4Xa{w+Bpaca( zLjfaD0PY1q=>iaa0T{Ue;9dfhE&E|CDO^D<(nhA5$%V#U%Vw zs^zbkgnvx6{1ub%#8D=Y`w!A8{vp-!S8KwzKTE0j;YavaXaqPjp_|m?e^@}p-x4?e zbMq{JgGMm^56-jvog?8tHqY`mXatl0+&s(QITHSJ^DKXZMliN~v5y^|_1{29`2YER ze!J%7d7qNgWbY=VqZo|`Y3f$=__nyj24&mIvD}NNXw_cSbDfd>s4grp9LnsoXX)-( zXLIvORG>(BT6rO8!eGx5+&NcvhA${EZUF1++$lTb78F>h^?!}Vm;s{@4`}QSg+}h! zo{hq$T*dUtjGA-4yWXX;*cVi9EWv{+n%>BMw3z7HyY!u6r4sY8vg2>`zFMzqHnjU9 z;}hA7>L6w~45^>7hu-At`L%)I&-REkD!2nvrMK`KDKyVnA0M8}eq89^xAtoEzgdYW z?q1(>b+4`$NHwI4A|loT19$B_xErreYVPI>9M*nRzZn`acW6IU3>S%3CvUhf0ipWO zR1CU*D&UQr&24{I#{U5tVbtuyKO!1OJ~jFo{1(x;HyoPrRUy20nRoIRG{Ti35O#ZZ zM0(Hil&El74wy#52L@h6F$|f9gmP9rxj#x4lDQVCW{c|F$MyV}en-+dRX)v|aGauo zvG&Us#-v-W4+b3FBi1j3^@3(|Xm3I(o9uaNa5lGpMKm(6c72^xpMJEFKiAiV>}Fu< zri_zusBzF-aj|32)zp+f@m<~0ed0WM++nHjazT!D3FQBijo_IQ8!-!?2bn)*H2y28 zgY&1k_^0n@#R2P8u3O>$(J-q|w?#@zTK+^5?`Z$I4 zz}~r<=KZ@RRv5_WW@FW>?xMzbyXlofYp1K%T!41z=2lRr^XZsk$YAwcOGcSdV+Jb& zaO!HI`^H$%$Q8H%&g)`CYA7@@ngT^(axLOMeo7O zD>i>}@o24f2gqK<@Ydk%#B1+1&{{Ll&UQMp9CUYmvVOF8m_oDP1`-*fSTAn=foNPP z;t5iVu8Ut;u$dK}r9HQ3dqurD_CU^v0J_jzj@!2HL!X6W!?71ey?52-i@sC;FRrgy}j>U z#e9o2q^_AU+*@_7_@tE!(gx zp-<|RFDx3!3=LQ;Mq9hx!RO&q)n~$#;>Tw`b{4lyRJ!&K0e)_cZC96H_=tl>Gwh(W z;+Z5&<+5eU?`z6s?Umk7&d^dTz3HeHTpza(6lEi^Q~aO>#2y)C<+(>JOR_a8Ddlf9o9pno#{1lI;*d+WcJ;!n~L<)gN03bkUrsKLOx$ zfr{LuirlP<-295%(u&;Lirm(U+-`W4fA~e|uMA2xWUHe8?-0aaGAIK$-cH=PY;2fC zGBRfqh!lB#JN&fP-;p54_yaSL@E2y_zbS{{FU-I`_PZ>Jzc2$M@a&GiFa!J8@a&Gi zFasm-?2f-M1N+$U?2f-M10(S4j-QwTZ86uy-Jkz$D>QZcc`U-2hdv+YCR{~nZiuyf zA`cF#0t`mW&8v?unzD@+!uR;S{atpi&_wSlTE#BT>$HeZx;>k&dVP7b6;t+RHi57M z2(bIcvuXc^Pp{PBwhCk$R6k{r9W&pOxqzHywoiw&{S1?r3}=BPwsXYh!5C z7ih$b$6ZYh5D;X}T}R=l8rbQ_eK@k{)QU`muNzfeO=pseuGsoTiN|z+-zn4AjI>g< zD>WX3^Jht&%q}c7aHUgr^2E#S7br*UVjoh-WGNx~z|Dn(j@rX5UsCfRrO} zl?-s^PS*fqh>%8Y22SimI>+1jR$$fo9Ez&eYO(3fy&tmx!l9Y^(R9F!_-C{50R9Lw z2U8Vi2S*onQwL}B9~HP=97qZHh#)kl!^5h`C#xq+JxdOE)SkGjRK~4$&5%rDWr03H39loUQ$zAYCT>y zn$hHdh*pR>Upvtr5~}kEB?OTtm4t)!$*l>kCuJGYP(Qm`v8SseVQVm!AG$SSuM^XVNG`@Xs2X+W_X39VV=na!XhU$|^Z5Nw=Mw*SbLr;d>R|U{Hd`ka z?cZ^bpe{4dipFM}VnswCzJF(dHdysF=L9z*c08x1JBp4dn{dQ*-88N-z~b^DF^2BSdf+p&_1>bLps@J^NiQOUnHN$lF*@n zj#=7`Ety0V{F;|zaHwQmU#&!u2`_fbD=j{_ke6OTia3{3!6sj@wp)v0``AMEW})wW zA$Vufe*ry#7-j3QXP}6!`hE+eVJ02bv8~5OZ(UGkJ{a#NJgNY_QJ&ur7CD7h@w|Bs zg$OwL-S_d=HXlZOugU6{El*i#Ek&M51tWQ;VH&gFj#Hhx4;6KkT%wAn6HQ^?VTtUT zoK%s=PghM^wophf8VoO6XU@1l3rwq?&P7IG4dK%5jaiNqKj}@PVZ72X#G!^>NbMbb zO=i?iAn!<^TKd#LQyJcbD0Cx<#j+4{!i{fu>vM|npL?w!u!QBmq zyY2Oqrp?nS_Q1i$6kuZq{S1EqH|DJ|+l!IuvBs*$9V|n%UGcovm!@}47=f{2(qWs} zi+}Mf2_{=pSMTy-5RnI>Uz0lu4v9M+t@e290d;B#*z+1(H+q$t% zw4UoZNy*Kn{Pl?d!{fF=pn6DJ$va1dIE_i^mzao%QfaH6FF2Et>__OtRrS5YzTH>z zN9R331n-4)bwVn=djpkh-)8OS+A_(54gM&x2 z7aFAg;!-6;ngiP8iSv@l1o|<8|0Q3nKw(Z!X#JDHS-~Xn1a;UGsdkq(ErcxnT2a#2 z7?fz$&yK>+`{NRvk!rfEtmz2WXf@e=Wo9!*rb6n6q-`*&%8)>(N0XRegTDp&e!7Sp z`}$l-u0~5{aa<{CS^7)*nJcA!SDZa^_Mp-8f=~iNYcko<*f1einrCrXN&Et&dh#v< zZJVm*ZkZ>f8n`d&+}+ql-044+l$=QW7-t+drJyIgEe~qT4Cd5H`Dz>d_4PT|!)QMy zqE7V4{1XDK!M+F~9)u*quh6G5&w|5NKM$#I1dJSXbBoejc+4I29V0th8NrHR8TI(rEt3Lo`_5SIgT%{$%gGBS^@F+X#T6S{YK=G^xSOtRd3*5dFU5 zq!fK(?bw}5HGzJgFx@xIg3vE#md{_~KItEzvG^2Nm7KB0whxtOs{rLVYl`;^L_0lU zWUXNd0w%{yn#+h)#dfE$*BI#TV$ax;X}hP5<P z88su1UZ^PqzEP*k>NI9Yq36cMl5>^re{t;KohgME*?6`qUI!74kGEB;%98uGh>Kb) zrMkWbyUjDY@y%kerEz9`#izXot(#8md(E3TXH{{&5IS?`UD@N>S^Zd9g8|isU$Xqh z_aa71ml)e{_|zEu_)ldDK=hM?m z76ayWJ+EI`bQ{}Q-p!FQb6++R^jes~=5FU#m|5|R2t(UX^pBrBDCdg=YGnvm^q4Hk*ITs%n0zb!#^6*&u7QpD z&6Q$p`lptcPsmDI{adB)Rw~+fxnX@&hfoGqRf=o|mdzb)&JW}h?+l!hBekQ}RVq10 z1w|;~eM`QsU+uqV=7!x{Bi;8JbYb9u?YI<0q@WJ?K6vZw_9&(>0B7Sxr3N;lY=@Ih zX5_n3J6H$0_qJ3U061l-=bS?b<*?)+AbM0vVO6xIA8jgl!)%HXW;%jT5-dfY37bVp z>`*Av`N})ykAB^OC2i!Gs4cCJ+#dNpe}a8ptu);@t*;}N^P1W$i11q1{Arf|hNyeu zdj!7WXQbm@l@v7M7MSB7H}1b3!ml>R%*!DB0(GQ$_w@K#j2Ct~zUv2~%jYGOR3EkV ziL;}22y?DrMq^DW-RB_m~-?Ip%NgSK*95XgWtKkZf^nToF>JUF0 zN(HHPJH=RbR}R@H3SToW(TKQbB6MpySa+$G?*8#h#{8MSnD$wyTlN~<)>^bCMUzZk z4=PE5W;&>bo28td9CPtS6Im~#t;Gwnv%guscNEN(5hqvjeA3&J;9zB&F$~o3z?6)3 zuLuzx>8g45J-$kIPxsXk_BpEcq$EzK(udNE0G|7-YM6#H(*!M*W7fz9w}V{V@gH6# zVhM?0bU&x37hqbS>IP&QzZkm655xK-^o55;Jnb%$nCp|ePQ>|{_!KMBwo zzhEqbELrW67PHL_g-Z26;w=Uw%>B85oL>~5Y4wU%bnuJ!`SxEJxM%u1J>t%1w&y=q zKjov+U7}vl=GSLX`GQx}`!FTMJ2U7(is1AK%6+?$Q)d7B5(X^J6X>c`6BJwV58YxI z!qPI)F*TV>K2%JFBZUu*G$e@59yFsbl^X|Gd3?|&e6r^yz>9zxA+??Q@%9N~$h)Ar z0!Os0(NLb(L@ly02V7huy3sF#f`U}6-!lSIq-C}S$=_cvI_x*B&sFSTeBj3Gk|Lpw zczTP3t$a76pnww7@xj_8JJvApSWS=_q&gGD6mcLQ4BXuC$^Cfc71}7 zRhDv-)oMj^#SVUM2@L8;(ZSVyt_H9=7>D4-oT!rqtwzvrlMv`RC-DejO`kWs( zIq$_Zu3X_1cyVzmrJb~YHhS@wdlN4zm#u4; z9+}cw7(bjLIO0k(aISbMM?Gqwv`m7@fgbu;v%5OB5h?ZUv-;^YRP{&C8ZAk%)RHhg zDJxO&VWW-_A+`B0#*@&}NH!DeM^P-STpsZyAqohzSJZmWn>xtkyFuQmU<0Tg65RMb zW$&9UGk`QHA|Z*D6h3Ev+lGLMj_r)PO^6V9swOo$>yjt_jwC!mErH~9li3w3vY~T+ zfodL2cB%&VX2)CBiLfo&htRP2!7%TaZ|Da$Zd4+Xr!7qJP3KwBhn`DMlpCv>supk9 znw|34p}!;uMwv|Ru?p9)&|P}APv`tGwny7KQS>~}^afhDX_xRCxYm(K@CfNDDEbTV z{=FD=Cgv-FJL+@~B zh>M*~L}K0PTbn@n^!&9sCw}GF_luD9S-?9vfPx}dU<>Au*~=8t%IJuPEO-d3STs@w zA9&e_t6N8XuKp zq2Vd&f7&zy-PF`o=sI5>d`0J`J8ax<75hF)W%YXXtbd1^`qCWXO|8GP9R0&9Wb(U5 zFP>W_D6~D!vwWoMLTYXhjpK*y(_!O-K;tU{4Q|)P_RW6F98x|KbMv(G-qh>2XsY`l zOGq@^Ec`bqje;_>atv9*Ke{CgZLL zJ94f#+0Xe5a>0tCZqzd9PTaNL-|~0MrU)? zgdCRcTv05D>Sa$VSW3YP-Z*8Z__lDqej}UdjOGt2y=+}|CqTwK>FgwR{s8q2tsW9A zt$4|IlZ=_b@ABNr{3~jfZr`NLv?kJl>%;rQ*Y~(lxScKZpWtkIirf&cqc)*$J(gV_9| ztYhMkK!U~Ag9!~S+hwN05A%XvC%93)plV9ZWJgyYLgC|~hmOK;IUQ_Y*?7tOLNc*Z z8?y86GUU*8J2<)Is>sr^haUSflN9qq+NvutX`Jb?)YhJUr59lD6T_pOY{m+nccI(P zhY{_%CkC4|6sy!jTztQ62*^An(gcK6KFmngRiTLeQlc=-2ew~3a%lGt=GUaB>{6G% z9n{e&7vc!ZMBDEdBB58uur1BVn>O=4gX)L$ z4bOH0eI9WzY{v1#(PXz=sufF141C|pMcL%zBplNie3t!mbZh%tZVF-nv3%zwD$or& z&1;ogy0sXRjGH1PPS3QXfgXKDJz*@RZWjot95OZk2=g7{ymh-6cl;2Hk0ZD#_rz97 zL&1LohkiKk-gd+rnvaTakGL@}eP3;J&H7iQG)Ub;x3jdoVEvsw+3*x!S*iay9rRVO} z3+!$DILAV7lykE6My}56=AEx>g&?j68+pSp3<+%c6}!SqS!#+Ns3o$4wPAIP$TMV8 z1&+c<_0%Z2&Kku1DYjOR9{Et~de1;okIu+)eI>Fhx7sg9~*>tBi+1R`=y@2+>;Y6^ex zqxT?lL^vRvSz76De0&cfMroPFx>l(8^vmDWW zkFd1DRBy{%IjO{JeY$zYj$2ok`Hg-C;rqUm;7xT?mZin_Stxk;UEdSRC7D zfy!`2R+$f;9n~){PsO!kw!^6mHh^r17vBM`-|UPy5cn$$5#GL%TD6$_ghBFJuD~^o zMTNgJ3{Oyiv($T!uOsg(>Vqj);xr|!yjykg5dgRTOCz#TUD^&w!p-;$`DH!?+|uX? zO>^pmZH+UP_Q&eY5^3`R;jI`C!v!>tB%Mwa4Cw6xO^3~;hmM-NxjI-3;!^vE)Im6$ zUQ>M0Z&|{(NQiVEj5l{MO7MCSsT2}cprcX`+zV`;(>Qm)j>PYwAL?{xfxH&&BJm$9 zkYSF146A3pJ!i3UtjM#$-zDr2yUU-7&{k_UB;DVBL8k! zCbEsRGbg88GY5ZC+<(ff?#v(Z6wn*4A-3tSqqvt|_GL0(u=SfZc|)LIvpkCnIWI0U z)>S*kxGf0=fHdq@(*av~e5JpuIda!yg#T@~WG~*)p7lZCN+k}^B!Z+wm3o7l9XC8u zH1@8Jd;Fc&C_`rV#%vy{exOabT`Zf@S4@ScJeHJgB(Kr7z9$Gk$anUjKFYrLVv>?o zX@njuKE#B319*wi@FWx7IdMpMS}M4|k`Q|OXscjlPvXqtl~=yz*u9|4mvP?HTs)fL zOokjQ^}cZ%f!vGIb=Z~lT1ij8<04gTs9uoye z-PI_Lk*q@nsp^*x%=R=A`{LvjQP5G;wes;->M|5Ky4R}4W*eW?i<3_i%@bA(+sW;d zyNks^4*)SC1&wH6{OuakVPTAIl*F`8r4_}OE=)&a5kqTS)$P>X(@eO$oD95EDrH4! zB}md3_Q8DM;j$CcpfN3FBs;b;7{O(oegVdqEWPO6*B;-$m`*Zas{(sM!nhv)mymhZY!{2t12an1~3FAv}XoX6lKVx>Z-wF9_8G%pQ}qH`YXhr_Q;OhuE@Qojj-; z(j&^=tl9=%iWWc0MUsx2drfmR9;goV#GK@FMH$InxaQ=b#=)Ll@Uq_Vu%XZ@Kjz`` zpgS5#e&8HVd2fO}jUzd)H=Bf=+5wuhBk)*#*udngepm^@Eco8DlyMR4lcoA_?V?N6 zf>lD`K*bvGC10l^+wxHNCe!J znMzLZjhWFYreZ!6xD^nCu3ybZuNjr&A1tcprx75MW#h%lO4DsAuppi8xqF{uc+KDE zWYotb`+zgGEp)wXQ@wI(Ay;&SAhC_W8YNfGF+Cp7@H}TQ5%WoS{>!jH`PvSlSBXQl zc8QK6WY}V>!5o1UT~poy4C7dt!O}0SNi$AfFlhI1MsCZ08ol6VP>oJ(31-SoxbIf~ zauk7&K4h50(juW!GM8H3q=vXZi34$fThd6}6B-<(r1^y6DsElP%bF8w%vGpQj888% zxG4g^YfgbPBb+iA=7xjgh^!boH9zDSbk7jQ#qm|gp^00BjJ=+D09iWch{w~IxIuKk z2Fj49oB%9`jJCy3Zy$G{P%3CblIAm6lQ%PHGIkYf)@|**SQwORNao4`X<+6Klp#|e zqoITvo5l{{mE37=K;-q*~9n9GjGM0qb9%Uzz?Q2KvZY|NwTYbb6Dvaq+&fepW}c znGD`3)$tz3cL!S3z@BEgP`qwVj1Ya+wqc2O@>u-c{a1A2zDd`g<&NLZCJ+ZY}Etk)zqdOKvoHA}1!WLrx+YE4cy1NAW}ini3s-o!4G zQsKI%Ha3fw77fZ1Dy4GOC%lHx`L;!c$w7MSOVlHRt?S5<>=(S|3MLQ6&fRmVZdzn= z_(a^0yg`@zo@l7~L_Ei*&GbDRhM*vH*H1Gw_7bLsMs4e*;IqPZXRl)X1gd+9)-k$5 z-W78N=c4E=KDE6I>RVlN(1Zkt!2zueOjqfx|15F3z_A5$4-S6Wet#~X@yJZf)?C@? z(OK!?&plH~ScLH#xV($Z@MquAA1%$+YZ0ZsVodQ7J>7`-bedw zK^iYvHzkfD(R>$zG8na^NQB}PmZ0KPG#7n8L!G3-P#$sS$xD=Z@FP^`gTB_qzAh@O zFG5{ROxU{m&Dqat8RL*2NYa1cLEN}cxTMUs4(THI~dEU`oT%JFPbh!$xe+yCJ!fGCU(F6xLdY`t+ zYVCX~IMNGP5A(@VlI{Hdl37cz;+cyy(7|}`y^7~vjJ?_bAC>~kPUrVev_!WC{*lpo z$|8qf*VvpeQOMBnR0KI5M)i_2YK`>9#J@xswv zP&pu5d>3mLHIuSmF!jRceml>;08E>r$F200`OtS;c{_MdcdFfnMV~||%*uXc2{or} zep)K!HcWX}DSSq2U5;%Evvvnnk=C&?rwcw$7jSi$ew#6UW;cD*rB*o7muJChULYtP zYu$3a`o$&brj3+6U76@dd)f<>JALEmP(F0dC6`Xqe|<2Y_>cRjD4`3R}*A^j0a>zS2tbso6flaXwRe)swEqB>E>Nrnb+Tg>86V#$qXU zl0W9pLs*iJf55-RJA0jHQ8UDCk?fUTWi`cj_`*rQoEfA1Nj)ad;&x}in5`|wv$D?| zC@MYCOoJXNa{FJn{ACmFO}}qV_cvBd_Y`dH$Sl&Dpbz=3DQGFa&Y}kwc8GZHMlD!W;Z^ zg28g|-PIcOKRY2<@DuXO+4#lX5Wo2H9w)X8ym{bBBWPzIlcp-G^??cGc@J8c-DYSS zU;gm!!Ga#qb2K8tQ?C9LAck49@Rd%h+5jyY^4=Fz>CP&0DTn1oKkdbL1MfOlh61rO zp8(4m=f%d~tGOR@^ps%InIQ+gXqgaN2o%|jpapR42zVu`6&T9a;Gciuck}x=p@qtW zRWL7%{01`pVP5@Hdu1)7MCiMD=e$=6=ED8jN~){{*{lzD0bWQYBA#~0a>+BDw{t`Tz5>z3#_&69s@=0xesoCi21Paur z`%1M7k)jfaL5ywkAoDM&*+|jm8Zv0#kgwSR^<{&av6t*v9UMy!nD2ccMVMaBaC#nj zF4v4N8a1G=0qj?ox-;SL&;6F$MB0NX0DC5lS2Mv_ZWp$GsoV82%jkkoU(NjqJ}HxC zQ+yfV`FKvna0c>X(SWn*mjR@tA`iD*t?_!Q(R^DvnL&ySf&9{Jj9J&Vv=aeN>sY;q zRpo=zPXt)Fzp_ae*HU&=R$!MJVsvp$?jx)v3O~;#$}OkyCzYTo#lhl{!!M%Mc^?`2 zp_1ty?{>|{m2gX(<9lrNG~6v0!NHQU#qm#p1ec~L^@BJ%GFM)B(|zEuFb+QUy3*>6 zCs*~cV~%q=Go0g$geK#ebXwFjKzlqb2`0dIN!Hhik2bp8#MahutS0#!-@ockql*n< z+9o+2g470V(kZXjZ)s8s%RZXjL9UG6NGvaS`GBY3NeLw$%{LIzA#r1ZpJ#E@s_r%n zAd@P|*sg1){#Hq-fyH%NXhOv-!Y*2G&OW)T31hIQN2XKxozADBk3rQFuu~5;$ARZc zxEr;h^7UJGwRY1eh;!40nqQu2(Mdb9_dA0+ek>z;_A5eM_!7eS(`9u3mt|z?U}tA; z@9OenIdLKuVQQZJTuUa&w8d4AU%?PR&fhnit$iI|kb2)Oj(kfv@oQ(Rf$5s85uRRb z{R zK1~Q%f0h#>E3JFC*cAM#iMUw!ZqwIwIm zpc`V~#(?BMcChtDmjlD-BDHMHyy0}8n3Sf(BpF8;HdedJ&x(cfZMW|j3swRuO89bR z@SbaVEhCjr!m2;Cc%dUu;lggB#A~c&lry3C5ZT&(^&xrzHV@8ulF6XjAq)Cy-@`TK zxD-St-V3K*eCxea>VOIu5w~aYi0t62ThY)}n}juL^{_-2Uz$NJne68}LZajX>YHt~ z^t`TDj^;F;&u7??b^8bOUAl+zrzMCr;!^#y9RM@XE?#0ge}DWKbf#htAH^*}rC}Q` zZ0>9Gb=wfx1Z5L`eehS_`%!cf{*+w8Ff#DI;v0!hD=wG zJ~Qv?sYu`vOD7VH;;V@UpR#Y-)QmS>x+dL)&Me z%M)QT5lG#j{q0#VNw1L^d#5L^$eS~i-KAQ?)V(+8G~#A6jk)}$sKrUruxBfXTis}} zd-#u&-@QN$`&wX>#>E7UqG0Tj744Nw5FV2mIISS472k7_AwheUmr%8&!kMggJ3use z^YP@6&BfQHni9u7Zr2>2@XeFbwCN9e1(6z^OX41rlFXW{nk{IK=M43TI21Kas+Sa} zrYWINl&6nXH-*6>2|Pq3C!W|2djt*7^ByjhJI|xGiK9L7loHYc84JB8jH$aH9?4m9 zrhE4`OzUl|de107Ub#Q_Fa;u_D~}>8?x%h-(uR@`E4TTGU($RxS)=&L_%!V`Ad+JE zJb4o-*rwPZ@8VFxw)O#LSt=0ez#(NWQo5h2V#YOtsDyycY+oPdYQ*KYQgi!@Zt8cD zh65+qlUB+tV6S)#2jTA2r*!v4@-Y(KdQ};sXGQ;?%FY5R%BJn(OLuo8&63h3At5Co zqDYI<0@B^xU5bRX(j5|lbST{dN|$ukxA1(=>$0xj`QG8|Irmx4_1ph_&&;(mJJ(#J z#Me(-E*xy;Mr6XmcK0=@9AR0I&k(+6l!JJkezSYO<_s-IW{|KxmsLjnQbUR$tr4(` zsIQk5-u`@&VvsXdbBO(}$G+A6t1T{-*D+-{(_`tYTJzK{Sr@X04K{W*hBOKF_JibF zp)_;0xKck0lpapLH4u4xmL@Kaf>mhBFtt5}Z@$W~R&kK{L3Q$d@W&W5=8+ip9PU>= zLsXb++}xd>V_o|3b(fD}W%Axe5r>{fWV=*sZ)VLj=Xjib)zK+x56((arNRb89duGZ zFYKVUN3=Eg@a!vT|D?Gr(+9Z@*t^fPj*3Xe+dNDqY}%xV5`{FGFN_F^D?T|g0l7o0 z530n;4-#aNwn+5(jv0IXaZmvf8ze!Jn;f1E!j3*Ffk7@~bYDk!DJb6M=gV-C4-VfU z*`by&E0X%0_62B(V0w(v%a$+1CzdX%`5>_4JFFxB`Ta~5BqY(!&?rDqK#;QA6w!vqTZ$u&#axnWaoaYUYBP32Ao>F zVk$$xw$Z&iWW||(@c7+C6$zJes@@g1t92(Qhl*Nxz6{>prOBu zMbC|HZPM<{*7NmJPw*SG`W0KM;+%m7%C{(|L=sjX(}vL}Qmr>65)+%>v*tWYYy1_^%JF@A5kT9~XZ4^W;Z9SY zQXmrQaFqXHRn-UIHwON5G*vP0v|p#)E269MRrK@ioX0BL*xM3KJ6@<2G(Th}81?-z zsrMY^ok$UD$l6gR2eBUPNesP78%6L=1S_|aW0XtFNpf#|k72z?yjZZ1@I>M+A?wIQnbZ|nm7ttd3PjQGGQ`&s?b~VkpS-x_5%!Zd( zZn{!7jX0Fq^m+n+Z8i}ypt*l1(?^%C1HS0s4(q6~}Z;AdI~Yg-hSpNG<<|I@Dwkq_R~)~_3Gw9nwi zd-c49eZWKZ=$om8A%J}A5a5H-K_b()`R#x=S4r%&*A;vIog#)&J=!|?9I7mLHdowL zJ72RV(?OHGQiZ*v0YTg`DSi=?EbmA`8TBX1Vs>jZl`C=0^JVg80&6x?x0A`Ba7#vK zGS5B7mR0YofW5ON@_3&|J6`yY9I1g-iBoBFt1nOHmXO`n z$?}g4_W92MT(^s257aGGSz3l^-1bGwwglq)-{WQSGUM4ZiPTgSZN4M}d2;TOhS{)j zwLU(ic=Os;N>`)l;J6~$kbXcbN0kdkKZiWN2X4RbYqho}?DQ9hLOP?Mb0Ehp zg)*_s95EE4-W9{7@~h}a_bJ_qU9z1a{VY2$&72>KlG)}EHmxm#y4gW$sm zZ*FbL^0(I1Is<%MhEF6fM>Y2O9D)Q;qn1e49k=AIyOPzt8+FRa08Wqn| zM3t<5cGZvaRBUZ|tqZUWAFxSgus$Vu8p8PwJFp`TPT~r&+JA0x^6*O;yJO!*{(G!5 zi@_jQ^yz*+hqER%S!YEwShZTzNdCAk`V59hd)yyv6X*+RI1$LoVoUnHBCIl3U5mtz zvc(Q53Nu@MF!MrWwyYT0Q?n#bIJ0)b8g03*$GRVYw4aMm0e}yn*WZH(p^SAoVNl=> z&u_-M?@QlZwCjcu^y#4##V`Nph3lqjKvPL&JgPI~(0-Jjh&h^aCWwMNphk@kL=5pz zeg=r4BgvEt_B^P@e`qT?QJB3?D3$7)|9Rb|RX3$7j1KvywYCuED(P@TN%)k- zb=Hp7Y;Qh4xn^p`kxwvZghS`>6tlVvHU%&PIU8qzgJecoo`rMtiGz0K zte%FMQXa05`qsno_BS0_n;6YK(VM$qUn%5OMAsreQI0qI0#|jU-TTudkjBB4=jGP3kt&cvr0+D-8M53Iy{Wu`@l#!Y%d5j9pGqS=?>gt{@J zy;wgQQD;1AL(R=|$heW^+O&Q!J16l@}cIg!8l%zGE?Pv%NM zZei|p=G>$;+1EeUW8|JI3Ca=G1H)42CE@6al2 z?0QSJ*az(1ObW79M*D&HhN0!^!Eu6L-|ro$2hSyCOSr;+TBB7E)ZoVQsvjG9Ka}c> zn;lG=(8pD{jjDX6hlg_aDoL=UsZguk3KSj4Fe9!q%c7lFSAYY{LX`8U9ZsEqPPh?f zsqBS7{5n4x^4Nu%T0yDIqffK!K?bDefTBcd8d^KG&M0JpU$1mS0x`QhZJoovZMnFo zNk|iYuT5>B2*A<5pGQi#fJ@ap-$g`QS0+u|1MJ2`@dI5(Ua491$UfaR5YrV~#4Z(PR^%U`gj!e~x4Q<{iZRI|WM_mP&wqR--Fn;k#vM!GZX zn}z5!f>P_yrhu>>%Io^dKVS4`m%7^E3Nf$-o}Q)+jk9!?58jIy!{SZbHy*q+SnZDu zo=ZCKo)~&93$!+#my64bn^W5>z&pP6rqM@38#h6@toyV z@HaY3MD`D2NQ$0$oEk}e!gY)e1BXrF%73B&IzKYR;pCN<8b^Ts?pzbHc7`vYMJMEi zW=GEZVWTw`bo|G(!&2&Yb5Xv=BV85L!pN&?2@cA2?%91FX?Jtfeuk-qUvfKAS8$=9 zieO*J$F>)JF3o|pchZ&6K@7P>`l=r!VCOECc0WJlg`_ABCiMw$M0!GC( zv?;{Jt+Wh{_7c0Qxuki57c2B^R`i>JQuE3U)4Dr|2bqy`ovI%7i>|rRH0lhw!NYPI z=4fy(%Zy&}?rZ^i8)$2LK>;}OWFv}!8@u!cp|Ne|64b_2IIkl=a5G1L0+h3L=9xOZ zNpmtyk+(j_c|o2-FF060EU-d-kh!q1ZBuC4iK|6E#IO5U-vO>xI$L1s6eT9(TZYrC zPY<+r@?QfTIF5gWBA8UFAGd6JJ~qD?zfVd=dn_#vEEX-HdvM@tYG2#jf_vypD+~kA zff(`Y^COi(L*M@OjQ9kKxXG_7+&a&XKggOLkw;H4^*nvh*xV$2K0+8kT#}y=k@`pv zu~FxM`IPwOei%wUf0R)xD2bvurOEbQcK8mdC(`E^9BwNLVM7|&sP#2UCb`{lm6%N# zNq*u!PE8t8T$XO3Uas2H-M)-To(-H>Fds{JlI~0n*NK@*eK*d=P}H^iF>Gmow;R*$ zYA&@dHXRe|mJ~m&Ylue?5n*}yxI0XEhk@-9I1)|mBVy2a#7L3e<44M*i!S2zO?fa# z3sEKXg>mJGJc$n^F z!LTKZ#M$h>}n+ctHt|)fSFXI{H7a_uXt>J-&QvA&obmsam z*$JZ~hhuc;g=(bkz;fLuwWxmOX2-PP%`qM%_RMd!*uLcT{IuYSBHCLX9W2$M6ImPq zX4qo8{-_InL9|Np;Qa5j$`~^=1}{#fBGc6KbmK~ymP6G`y|WS=2*YNB+CnX@HtR7z z351tA6i5q~>52L!v71{tJwng5$y+)sNL5J>9Gzk5^oAd(1sX7{eC8{4zYs*#F!LIK zqnc90fTQ(%5jon;LNnr7VdltSf)@`Ij`)$7JKKL}j;$Y~Q0#m7{>v2Pmt?NTUdgbI z^&w3w?OhHs_4&;(3nQzT?ZhR7Ou$C3Wth@JdmU9Z`ea-0ZbG@NGRw!mSX@8t;@+Dp zq9qk}p|S1R%yTvfGfd9-b=GD9@JzU{(`~Wj83-8UYi}8G3nJ#&HLxaT-Q%7hK>9iC zlt{THGgz^C^f+w!_2ks;i;ZWr(t*zkvbH9k)NKQS1H=?#Z%Q#ai%xe&G8k1_ASY#%vMUffj5{;Q1kcyI>6lxuT3ruC-NCH*=Tx1S5awM^HZh*30>M}r1skt6 z>c$3kEkh(zRq@DvDdY{C2zo7OQ9MSG>?{ewI>Sb(Ukc+-cE|BM?RE){7)dqT zPAkJ`^T2V->VennkL(~9xo9vu|GGpp@EpU=D$8VC;-drSoMks-!Y@a*(!edl>#>P~ z@Jo1I(4vvX|G98Ceed1Wb1id9fLI4RM)d_8wBdo4-2Sd?p&m`WmY&?9e2yA85EI zppOH5Uw+?0_3(V()!_?GSvnU}WlJ1I^!Vo5(C+H9JJOaV8F7oFPsIcnqch<{?ESl? zM<-=Ve&tVWqqY+AbrQ+h^!Yy}j8B=+^es$qtO?LD%}m?e(-bmPd183yvyh%!aT)i( z8{J|w?>B_Ps{yU98Z#tL_C*@|>vL+8Cp8^mSO9?^j`Nss9S6k1eLQ^6_ypsK2;4=H zYM4g+kgE4{I=5qLLn$=ajN`3M+XJhTUs+f8i{N2{sP zUOZ`@e1=7cNq#Drq#LEfE;!sJvHxbjI_&^7uBbVR|u9@oOa;sfvj$jHb5IkHaG zotF>aWrO}#4q7X@o+aKcpk=my*GF#lcS2gP@W;s4f=qBh1hQME^PqP?*8i+x-eTM= zX+naC9k6*3KoV9#yH0P*SOmQbCWGs`JwsX-p&xKSm%)SpMaB&SstnhrRnP_w0Knl7 z89>kqI+zS@Xfg~Ww7u$}J`G$HC^BvsP-T#2%xvBP${3luAOV6yE=ia{aKO+!5b@M^ON!8H7UN|L-UXpnwx7l$&!UAzSd@pUV$W zgN8!6Iosm@qFiT6{9RZnpi=%hBI5s#aiWBdf!ccOTvtzpK*5=kd60demFfob$AJ~l zo^N5^`onV2jQ~NtiT3vmfv%+3ASvKkGp|`tz0R`OY_}6g7Rv`nS+eYqz?*Y4pmr*M zm;-|ABpH9oLKK0dfXVv1PY!pog|7!~k&Y~el!bN=5_lsEYA?+&=#PUg3qui-0w(M4 zKKfBi)YS$^mUs!IEZm2Xz#CamyZy-H&TQziq_iO^V6tvlP%ANJNcE}{dQ>~;=gph}bmx%}I0OWgvJ0w(K*1yxr2sDUCWNR~(`q%2QMNZ^eusE#Su zJs1zVtXX$R3Ye@L7F1a~a%iylAX%+{$U^dj1m4Jk>gg^{M=zjPqJ9J<1x(fr3#u&e zLwaw2Ng72$0&iqNb)Dd++}^H4>r_Yzn5^H&`$D=u@RNsb%W_DA1m4Jk>WjgT?zt_C zy9km3ChPZ^MUb+Jk_;Q#)3(AD|K4d9P-8IT0<=xoq;9$4XD`G@eIT@#R| z;6b$@-vL&p*8fu<|Ax>)8iNObUdP@3E_%OijQ^>@Kca9UIpCp^*8!2g%l6i-;893J z@Zh-HhL}4y0{)Gwgfs+SX}mo}d$M<9`0p(5CXx=)8hmZ#wzb6NAJ&|Z*5JQ>+_nyf yfw>+9f`2w>UG(-G7kvJEySXug_6kAyy+lO;0W{qM0QfL Date: Thu, 17 Jun 2021 10:52:26 +0200 Subject: [PATCH 52/79] Update test function --- test/{test.csv => data/score_medidas.csv} | 1 - test/test_score_medidas.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) rename test/{test.csv => data/score_medidas.csv} (71%) diff --git a/test/test.csv b/test/data/score_medidas.csv similarity index 71% rename from test/test.csv rename to test/data/score_medidas.csv index 25fcf36..103225b 100644 --- a/test/test.csv +++ b/test/data/score_medidas.csv @@ -1,3 +1,2 @@ fecha,porcentaje_afectado,CD.12 1996-09-14,100.0,0.5 -1996-09-15,100.0,0.5 diff --git a/test/test_score_medidas.py b/test/test_score_medidas.py index d3911dc..ff3d9d2 100644 --- a/test/test_score_medidas.py +++ b/test/test_score_medidas.py @@ -1,6 +1,6 @@ import pandas as pd -from covidnpi.score.score_medidas import medidas_to_score +from covidnpi.score.score_medidas import score_medidas from covidnpi.utils.taxonomia import return_taxonomia @@ -14,5 +14,5 @@ def test_score_medidas( path_taxonomia=path_taxonomia, path_output=path_tax_out ) med = pd.read_csv(path_medidas) - sc_med = medidas_to_score(med, taxonomia) + sc_med = score_medidas(med, taxonomia) sc_med.to_csv(path_score) From 5eb8597d01854deef2e99cb35091b416e0038f95 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 10:54:29 +0200 Subject: [PATCH 53/79] Test do not store additional files --- test/data/score_medidas.csv | 2 - test/data/taxonomia.csv | 80 ------------------------------------- test/test_score_medidas.py | 7 +--- 3 files changed, 1 insertion(+), 88 deletions(-) delete mode 100644 test/data/score_medidas.csv delete mode 100644 test/data/taxonomia.csv diff --git a/test/data/score_medidas.csv b/test/data/score_medidas.csv deleted file mode 100644 index 103225b..0000000 --- a/test/data/score_medidas.csv +++ /dev/null @@ -1,2 +0,0 @@ -fecha,porcentaje_afectado,CD.12 -1996-09-14,100.0,0.5 diff --git a/test/data/taxonomia.csv b/test/data/taxonomia.csv deleted file mode 100644 index 2313f91..0000000 --- a/test/data/taxonomia.csv +++ /dev/null @@ -1,80 +0,0 @@ -codigo,item,ambito,alto,medio,bajo -CE.1,1,ceremonias,existe,, -CE.2,1,ceremonias,,<=35%,>35% -CE.7,2,ceremonias,existe,, -CE.3,3,ceremonias,,<=35%o<=10personas,>35%o>10personas -CE.4,4,ceremonias,,<=35%o<=10personas,>35%o>10personas -CE.9,4,ceremonias,existe,, -CE.10,5,ceremonias,existe,, -CE.5,5,ceremonias,,<=35%o<=10personas,>35%o>10personas -CE.6,6,ceremonias,,<=35%o<=10personas,>35%o>10personas -CO.1,1,comercio,existe,, -CO.8,1,comercio,,<=35%,>35% -CO.7,2,comercio,,antesoigualquelas18:00,despuésdelas18:00yantesdelas20:00. -CO.2,3,comercio,existe,, -CO.3,4,comercio,existe,, -CO.4,5,comercio,existe,, -CO.9,5,comercio,,<=35%,>35% -CO.5,6,comercio,existe,, -CO.10,7,comercio,,<=35%,>35% -CO.6,7,comercio,existe,, -CD.1,1,cultura,existe,, -CD.2,1,cultura,existe,, -CD.6,1,cultura,,<=35%,>35% -CD.7,1,cultura,,<=35%,>35% -CD.8,1,cultura,,<=35%,>35% -CD.10,2,cultura,,<=35%,>35% -CD.3,2,cultura,existe,, -CD.4,2,cultura,existe,, -CD.9,2,cultura,,<=35%,>35% -CD.11,3,cultura,,<=35%,>35% -CD.5,3,cultura,existe,, -CD.14,4,cultura,,<=35%,>35% -CD.17,4,cultura,existe,, -CD.15,5,cultura,,<=35%,>35% -CD.16,5,cultura,existe,, -AF.1,1,deporte_exterior,existe,, -AF.6,1,deporte_exterior,,<=35%o<=6personas,>35%o>6personas(onoseespecifica) -AF.7,1,deporte_exterior,,<=6personas,>6personas(onoseespecifica) -AF.17,2,deporte_exterior,,<=6personas(onoseespecifica),>6personas -AF.4,2,deporte_exterior,existe,, -AF.13,3,deporte_exterior,existe,, -AF.15,3,deporte_exterior,,<=35%,>35% -AF.3,3,deporte_exterior,existe,, -AF.1,1,deporte_interior,existe,, -AF.12,1,deporte_interior,,<=6personas,>6personas(onoseespecifica) -AF.2,1,deporte_interior,existe,, -AF.5,1,deporte_interior,,<=35%o<=6personas,>35%o>6personas(onoseespecifica) -AF.17,2,deporte_interior,,<=6personas(onoseespecifica),>6personas -AF.4,2,deporte_interior,existe,, -AF.14,3,deporte_interior,existe,, -AF.16,3,deporte_interior,,<=35%,>35% -AF.3,3,deporte_interior,existe,, -CD.12,1,distancia_social,,<=100personas,>100personas -CD.13,1,distancia_social,,<=100personas,>100personas -MV.1,2,distancia_social,existe,, -MV.2,2,distancia_social,,,existe -RS.1,3,distancia_social,<=6personas,>6y<=10,>10 -RS.2,3,distancia_social,<=6personas,>6y<=10,>10 -RS.3,3,distancia_social,<=6personas,>6y<=10,>10 -RS.8,3,distancia_social,existe,, -TP.1,4,distancia_social,existe,, -MV.3,1,movilidad,existe,, -MV.4,2,movilidad,existe,, -MV.4,3,movilidad,existe,, -MV.4,4,movilidad,existe,, -MV.7,4,movilidad,existe,, -RH.1,1,restauracion_exterior,existe,, -RH.2,1,restauracion_exterior,existe,, -RH.6,1,restauracion_exterior,,<=35%,>35% -RH.5,2,restauracion_exterior,,antesoigualquelas18:00,despuésdelas18:00 -RH.10,3,restauracion_exterior,,<6personas,>=6personas -RH.9,3,restauracion_exterior,,<6personas,>=6personas -RH.1,1,restauracion_interior,existe,, -RH.2,1,restauracion_interior,existe,, -RH.3,1,restauracion_interior,existe,, -RH.4,1,restauracion_interior,,,existe -RH.7,1,restauracion_interior,,<=35%,>35% -RH.5,2,restauracion_interior,,antesoigualquelas18:00,despuésdelas18:00 -RH.11,3,restauracion_interior,,<6personas,>=6personas -RH.9,3,restauracion_interior,,<6personas,>=6personas diff --git a/test/test_score_medidas.py b/test/test_score_medidas.py index ff3d9d2..72fc568 100644 --- a/test/test_score_medidas.py +++ b/test/test_score_medidas.py @@ -7,12 +7,7 @@ def test_score_medidas( path_medidas: str = "test/data/medidas.csv", path_taxonomia: str = "test/data/taxonomia.xlsx", - path_tax_out: str = "test/data/taxonomia.csv", - path_score: str = "test/data/score_medidas.csv", ): - taxonomia = return_taxonomia( - path_taxonomia=path_taxonomia, path_output=path_tax_out - ) + taxonomia = return_taxonomia(path_taxonomia=path_taxonomia, path_output=None) med = pd.read_csv(path_medidas) sc_med = score_medidas(med, taxonomia) - sc_med.to_csv(path_score) From b87ec1799571c0e1e8f83375640d3cd968dd81e6 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 11:03:38 +0200 Subject: [PATCH 54/79] Integrate pytest in Github --- .github/workflows/main.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..637a09b --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,24 @@ +name: Train model +on: + pull_request: + branches: [master] +jobs: + train_model: + name: Train model + runs-on: self-hosted + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Test with pytest + run: | + pip install pytest + pip install pytest-cov + pytest tests.py --doctest-modules --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html From be15ff46ee264d0fa5d8c791bd426d70dc388023 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 11:04:55 +0200 Subject: [PATCH 55/79] Update pytest integration --- .github/workflows/main.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 637a09b..6eafb8a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,10 +1,10 @@ -name: Train model -on: - pull_request: - branches: [master] +name: pytest + +on: [push] + jobs: - train_model: - name: Train model + build: + runs-on: self-hosted steps: @@ -12,11 +12,11 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.x' + python-version: '3.7' - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt + pip install .[dev] - name: Test with pytest run: | pip install pytest From 70cbb4d914f9a101634a043e251bf53b78b78ba6 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 11:06:19 +0200 Subject: [PATCH 56/79] runs on to ubuntu-latest --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6eafb8a..aeafac1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -5,7 +5,7 @@ on: [push] jobs: build: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 From 0d94ba70c098cc3c7eab48c971b653c5964a58f1 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 11:11:09 +0200 Subject: [PATCH 57/79] Add pytest to setup --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 363899b..9946338 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ "pandas==1.0.3", "pip==19.3", "pymongo==3.11.0", + "pytest==6.2.4", "toml==0.10.2", "typer==0.3.2", "xlrd==1.1.0", From fcf9b84ee6a39fd43bcbb9d07e84be03a7b9484f Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 11:11:17 +0200 Subject: [PATCH 58/79] Simplify pytest integration --- .github/workflows/main.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index aeafac1..a9d03ae 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,6 +19,4 @@ jobs: pip install .[dev] - name: Test with pytest run: | - pip install pytest - pip install pytest-cov - pytest tests.py --doctest-modules --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html + pytest From 2a2c82eced26c25183fa7d3c003eca849ca8c4a2 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 11:17:58 +0200 Subject: [PATCH 59/79] Storing dictionary of conditions is optional --- covidnpi/score/score_medidas.py | 20 ++++++++++++++++---- test/test_score_medidas.py | 2 +- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/covidnpi/score/score_medidas.py b/covidnpi/score/score_medidas.py index 2438284..c6813e5 100644 --- a/covidnpi/score/score_medidas.py +++ b/covidnpi/score/score_medidas.py @@ -138,7 +138,11 @@ def list_missing_codigos(taxonomia: pd.DataFrame, dict_condicion: dict): logger.error(f"Faltan codigos en condicones: {', '.join(list_missing)}") -def add_score_medida(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: +def add_score_medida( + df: pd.DataFrame, + taxonomia: pd.DataFrame, + path_out_conditions: str = "output/dict_condicion.json", +) -> pd.DataFrame: df_score = df.copy() # Asumimos que por defecto es baja df_score["score_medida"] = 0.2 @@ -195,7 +199,7 @@ def add_score_medida(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: dict_condicion.update({nivel: condicion}) # Store dictionary - store_dict_condicion(dict_condicion) + store_dict_condicion(dict_condicion, path_output=path_out_conditions) # List missing codigos list_missing_codigos(taxonomia, dict_condicion) @@ -229,7 +233,11 @@ def pivot_df_score(df_score: pd.DataFrame): return df_medida -def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: +def score_medidas( + df: pd.DataFrame, + taxonomia: pd.DataFrame, + path_out_conditions: str = "output/dict_condicion.json", +) -> pd.DataFrame: """Receives the medidas dataframe and outputs a new dataframe of scores Parameters @@ -238,6 +246,8 @@ def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: Dataframe of medidas taxonomia : pd.DataFrame Dataframe with taxonomy data + path_out_conditions: str, optional + Path where the extracted conditions are stored, by default "output/dict_condicion.json" Returns ------- @@ -247,7 +257,9 @@ def score_medidas(df: pd.DataFrame, taxonomia: pd.DataFrame) -> pd.DataFrame: df_sub = df.copy() df_sub = process_hora(df_sub) df_sub_extended = extend_fecha(df_sub) - df_score = add_score_medida(df_sub_extended, taxonomia) + df_score = add_score_medida( + df_sub_extended, taxonomia, path_out_conditions="output/dict_condicion.json" + ) df_score = pivot_df_score(df_score) return df_score diff --git a/test/test_score_medidas.py b/test/test_score_medidas.py index 72fc568..e880792 100644 --- a/test/test_score_medidas.py +++ b/test/test_score_medidas.py @@ -10,4 +10,4 @@ def test_score_medidas( ): taxonomia = return_taxonomia(path_taxonomia=path_taxonomia, path_output=None) med = pd.read_csv(path_medidas) - sc_med = score_medidas(med, taxonomia) + sc_med = score_medidas(med, taxonomia, path_out_conditions=None) From 0da28ea010d0a4386c6d46d62302b90f4b794771 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 11:21:33 +0200 Subject: [PATCH 60/79] Use param path_out_condition --- covidnpi/score/score_medidas.py | 2 +- covidnpi/utils/dictionaries.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/covidnpi/score/score_medidas.py b/covidnpi/score/score_medidas.py index c6813e5..189fdc8 100644 --- a/covidnpi/score/score_medidas.py +++ b/covidnpi/score/score_medidas.py @@ -258,7 +258,7 @@ def score_medidas( df_sub = process_hora(df_sub) df_sub_extended = extend_fecha(df_sub) df_score = add_score_medida( - df_sub_extended, taxonomia, path_out_conditions="output/dict_condicion.json" + df_sub_extended, taxonomia, path_out_conditions=path_out_conditions ) df_score = pivot_df_score(df_score) return df_score diff --git a/covidnpi/utils/dictionaries.py b/covidnpi/utils/dictionaries.py index af04029..b44496c 100644 --- a/covidnpi/utils/dictionaries.py +++ b/covidnpi/utils/dictionaries.py @@ -61,5 +61,7 @@ def store_dict_condicion( dict_condicion: dict, path_output: str = "output/dict_condicion.json" ): """Guarda un json con las condiciones aplicadas por la taxonomia""" + if path_output is None: + return with open(path_output, "w") as f: json.dump(dict_condicion, f) From 9a5e7d7f1273ccd3c576a8dd5ed8c67a0095acbe Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 17:49:15 +0200 Subject: [PATCH 61/79] Remove files before storing new ones --- covidnpi/utils/dictionaries.py | 4 ++++ update.sh | 1 + 2 files changed, 5 insertions(+) diff --git a/covidnpi/utils/dictionaries.py b/covidnpi/utils/dictionaries.py index b44496c..c1e9fa2 100644 --- a/covidnpi/utils/dictionaries.py +++ b/covidnpi/utils/dictionaries.py @@ -18,6 +18,10 @@ def store_dict_provincia_to_medidas( for provincia, df_medida in dict_medidas.items(): path_file = os.path.join(path_output, provincia.split("/")[0] + ".csv") + # Remove file if it exist + if os.path.exists(path_file): + os.remove(path_file) + # Store new file df_medida.to_csv(path_file, index=False) diff --git a/update.sh b/update.sh index 1153b71..c7d52da 100644 --- a/update.sh +++ b/update.sh @@ -1,3 +1,4 @@ +rm -r output/ python covidnpi/preprocess_and_score.py --path-raw ../modelos-covid/datos_NPI_3 > log.out python covidnpi/initialize_web.py --path-config config.toml python covidnpi/initialize_web.py --path-config config-staging.toml From b876fe05dfd4024eb67c2a95d89a6e8563138986 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 18:14:49 +0200 Subject: [PATCH 62/79] Raise error when province is missing --- covidnpi/utils/preprocess.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/covidnpi/utils/preprocess.py b/covidnpi/utils/preprocess.py index 0c02918..71d870e 100644 --- a/covidnpi/utils/preprocess.py +++ b/covidnpi/utils/preprocess.py @@ -32,6 +32,7 @@ "CEU": "ceuta", "MEL": "melilla", "MUR": "murcia", + "NAV": "navarra", "RIO": "rioja_la", } @@ -183,7 +184,7 @@ def read_npi_data( logger.warning(f"La columna 'provincia' se ha rellenado con '{value}'") break else: - logger.warning("La columna 'provincia' no ha sido rellenada") + raise ValueError("La columna 'provincia' no puede ser rellenada") return df From 2aa86263d465c1315ce8326f60c1e4986c711191 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 18:15:00 +0200 Subject: [PATCH 63/79] Fix path in update script --- update.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/update.sh b/update.sh index c7d52da..9583f05 100644 --- a/update.sh +++ b/update.sh @@ -1,5 +1,5 @@ rm -r output/ -python covidnpi/preprocess_and_score.py --path-raw ../modelos-covid/datos_NPI_3 > log.out +python covidnpi/preprocess_and_score.py --path-raw datos_NPI python covidnpi/initialize_web.py --path-config config.toml python covidnpi/initialize_web.py --path-config config-staging.toml python covidnpi/initialize_web.py --path-config config-live.toml From 5d313bc4d38f460f1bc983528e3b4efa3a53baf9 Mon Sep 17 00:00:00 2001 From: daniprec Date: Thu, 17 Jun 2021 18:44:09 +0200 Subject: [PATCH 64/79] Raise warning when percentage over 100 is found --- covidnpi/score/score_ambitos.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/covidnpi/score/score_ambitos.py b/covidnpi/score/score_ambitos.py index 2951f7a..7060b14 100644 --- a/covidnpi/score/score_ambitos.py +++ b/covidnpi/score/score_ambitos.py @@ -27,7 +27,15 @@ def compute_proportion(df: pd.DataFrame, item: str): .groupby("fecha")["porcentaje_afectado"] .sum() ) - porcentaje_general[porcentaje_general < 0] = 0 + # Avisamos si hay sumas de porcentajes que superan el 100 + list_dates = [ + d.strftime("%d-%m-%Y") for d in porcentaje_general[porcentaje_general < 0].index + ] + if len(list_dates) > 0: + logger.warning( + f"La suma de porcentajes para {item} supera 100 en: {', '.join(list_dates)}" + ) + porcentaje_general[porcentaje_general < 0] = 0 # Identificamos las medidas que se han aplicado exclusivamente con caracter general mask_general = df_sub["porcentaje_afectado"] == 100 @@ -52,11 +60,14 @@ def compute_proportion(df: pd.DataFrame, item: str): ignore_index=True, ) - # Se pondera la score de cada item = score * porcentaje que afecta / 100 - df_sub["ponderado"] = df_sub["porcentaje_afectado"] * df_sub[item] / 100 + # Se pondera la score de cada item = score * porcentaje que afecta + df_sub["ponderado"] = df_sub["porcentaje_afectado"] * df_sub[item] # Agrupamos por dia, sumando las score ponderadas - score = df_sub.groupby("fecha")["ponderado"].sum() + score = ( + df_sub.groupby("fecha")["ponderado"].sum() + / df_sub.groupby("fecha")["porcentaje_afectado"].sum() + ) return score From 5edd64001fe1e90d3136287e0565a892e789bd79 Mon Sep 17 00:00:00 2001 From: daniprec Date: Fri, 18 Jun 2021 11:46:40 +0200 Subject: [PATCH 65/79] pd.sum() do not replaces NaN values --- covidnpi/score/score_items.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/covidnpi/score/score_items.py b/covidnpi/score/score_items.py index 620fc81..8082881 100644 --- a/covidnpi/score/score_items.py +++ b/covidnpi/score/score_items.py @@ -71,7 +71,7 @@ def score_items(df: pd.DataFrame): df_item["RIN_afo"] = np.nanmax( [ df[["RH.1", "RH.2", "RH.3"]].max(axis=1), - df[["RH.4", "RH.7"]].sum(axis=1) + df[["RH.4", "RH.7"]].sum(axis=1, skipna=False) * df[["RH.1", "RH.2", "RH.3"]].isna().all(axis=1), ], axis=0, From e49bbd69cf48daeffcc557e2845f2d829302a6c6 Mon Sep 17 00:00:00 2001 From: daniprec Date: Sat, 19 Jun 2021 11:07:17 +0200 Subject: [PATCH 66/79] Move regions dict to script --- covidnpi/config.toml | 237 +----------------------------- covidnpi/preprocess_and_score.py | 5 +- covidnpi/utils/casos.py | 8 +- covidnpi/utils/mobility.py | 17 +-- covidnpi/utils/preprocess.py | 11 +- covidnpi/utils/regions.py | 239 +++++++++++++++++++++++++++++++ covidnpi/web/datastore.py | 20 ++- covidnpi/web/generate_json.py | 4 +- 8 files changed, 266 insertions(+), 275 deletions(-) create mode 100644 covidnpi/utils/regions.py diff --git a/covidnpi/config.toml b/covidnpi/config.toml index 7cd4c3a..59fb589 100644 --- a/covidnpi/config.toml +++ b/covidnpi/config.toml @@ -11,239 +11,4 @@ fillna_date_end = "today" # "today" or "start" [casos] movavg = 7 -link = "https://cnecovid.isciii.es/covid19/resources/casos_tecnica_provincia.csv" - -[postal_to_code] -01 = "VI" -02 = "AB" -03 = "A" -04 = "AL" -05 = "AV" -06 = "BA" -07 = "PM" -08 = "B" -09 = "BU" -10 = "CC" -11 = "CA" -12 = "CS" -13 = "CR" -14 = "CO" -15 = "C" -16 = "CU" -17 = "GI" -18 = "GR" -19 = "GU" -20 = "SS" -21 = "H" -22 = "HU" -23 = "J" -24 = "LE" -25 = "L" -26 = "LO" -27 = "LU" -28 = "M" -29 = "MA" -30 = "MU" -31 = "NA" -32 = "OR" -33 = "O" -34 = "P" -35 = "GC" -36 = "PO" -37 = "SA" -38 = "TF" -39 = "S" -40 = "SG" -41 = "SE" -42 = "SO" -43 = "T" -44 = "TE" -45 = "TO" -46 = "V" -47 = "VA" -48 = "BI" -49 = "ZA" -50 = "Z" -51 = "CE" -52 = "ML" - -[code_to_provincia] -A = "Alacant" -AB = "Albacete" -AL = "Almería" -AV = "Ávila" -B = "Barcelona" -BA = "Badajoz" -BI = "Bizkaia" -BU = "Burgos" -C = "A Coruña" -CA = "Cádiz" -CC = "Cáceres" -CE = "Ceuta" -CO = "Córdoba" -CR = "Ciudad Real" -CS = "Castelló" -CU = "Cuenca" -GC = "Las Palmas" -GI = "Girona" -GR = "Granada" -GU = "Guadalajara" -H = "Huelva" -HU = "Huesca" -J = "Jaén" -L = "Lleida" -LE = "León" -LO = "La Rioja" -LU = "Lugo" -M = "Madrid" -MA = "Málaga" -ML = "Melilla" -MU = "Murcia" -NA = "Nafarroa" -OR = "Ourense" -O = "Asturias" -P = "Palencia" -PM = "Illes Balears" -PO = "Pontevedra" -SA = "Salamanca" -TF = "Sta. Cruz de Tenerife" -S = "Cantabria" -SG = "Segovia" -SE = "Sevilla" -SO = "Soria" -SS = "Gipuzkoa" -T = "Tarragona" -TE = "Teruel" -TO = "Toledo" -V = "Valéncia" -VA = "Valladolid" -VI = "Álava" -ZA = "Zamora" -Z = "Zaragoza" - -[code_reassign] -AS = "O" -CB = "S" -IB = "PM" -MD = "M" -MC = "MU" -NC = "NA" - -[provincia_to_code] -alava = "VI" -albacete = "AB" -alicante = "A" -almeria = "AL" -avila = "AV" -badajoz = "BA" -mallorca = "PM" -barcelona = "B" -burgos = "BU" -caceres = "CC" -cadiz = "CA" -castellon = "CS" -ceuta = "CE" -ciudad_real = "CR" -cordoba = "CO" -coruna_la = "C" -cuenca = "CU" -girona = "GI" -granada = "GR" -guadalajara = "GU" -guipuzcoa = "SS" -huelva = "H" -huesca = "HU" -jaen = "J" -leon = "LE" -lleida = "L" -rioja_la = "LO" -lugo = "LU" -madrid = "M" -malaga = "MA" -melilla = "ML" -murcia = "MU" -navarra = "NA" -orense = "OR" -asturias = "O" -palencia = "P" -grancanaria = "GC" -gran_canaria = "GC" -pontevedra = "PO" -salamanca = "SA" -santa_cruz_de_tenerife = "TF" -cantabria = "S" -segovia = "SG" -sevilla = "SE" -soria = "SO" -tarragona = "T" -tenerife = "TF" -teruel = "TE" -toledo = "TO" -valencia = "V" -valladolid = "VA" -vizcaya = "BI" -zamora = "ZA" -zaragoza = "Z" - -[isla_to_provincia] -elhierro = "grancanaria" -formentera = "tenerife" -fuerteventura = "grancanaria" -ibiza = "tenerife" -lagomera = "grancanaria" -lanzarote = "grancanaria" -menorca = "tenerife" - -[code_to_poblacion] -AB = 388270 -A = 1879888 -AL = 727945 -VI = 333940 -O = 1018784 -AV = 157664 -BA = 672137 -PM = 1171543 -B = 5743402 -BI = 1159443 -BU = 357650 -CC = 391850 -CA = 1244049 -S = 582905 -CS = 585590 -CR = 495045 -CO = 781451 -C = 1121815 -CU = 196139 -SS = 727121 -GI = 781788 -GR = 919168 -GU = 261995 -H = 524278 -HU = 222687 -J = 631381 -LE = 456439 -L = 438517 -LU = 327946 -M = 6779888 -MA = 1685920 -MU = 1511251 -NA = 661197 -OR = 306650 -P = 160321 -GC = 1131065 -PO = 945408 -LO = 319914 -SA = 329245 -TF = 1044887 -SG = 153478 -SE = 1950219 -SO = 88884 -T = 816772 -TE = 134176 -TO = 703772 -V = 2591875 -VA = 520649 -ZA = 170588 -Z = 972528 -CE = 84202 -ML = 87076 \ No newline at end of file +link = "https://cnecovid.isciii.es/covid19/resources/casos_tecnica_provincia.csv" \ No newline at end of file diff --git a/covidnpi/preprocess_and_score.py b/covidnpi/preprocess_and_score.py index 204a608..9c041c4 100644 --- a/covidnpi/preprocess_and_score.py +++ b/covidnpi/preprocess_and_score.py @@ -19,7 +19,6 @@ def main( path_raw: str = "datos_NPI", path_taxonomia: str = PATH_TAXONOMIA, - path_config: str = "config.toml", path_output: str = "output", ): """Reads the raw data stored in path_raw, preprocess and scores it, while storing @@ -32,8 +31,6 @@ def main( Path to raw data, by default "datos_NPI_2" path_taxonomia : str, optional Path to taxonomia xlsx file, by default `PATH_TAXONOMIA` - path_config : str, optional - Path to config file, by default 'config.toml' path_output : str, optional Output folder, by default "output" @@ -80,7 +77,7 @@ def main( ) path_mobility = os.path.join(path_output, "mobility") - mobility_report_to_csv(path_config=path_config, path_output=path_mobility) + mobility_report_to_csv(path_output=path_mobility) logger.debug(f"La informacion de movilidad ha sido guardada en {path_mobility}\n") diff --git a/covidnpi/utils/casos.py b/covidnpi/utils/casos.py index 6dbe50c..150252b 100644 --- a/covidnpi/utils/casos.py +++ b/covidnpi/utils/casos.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from covidnpi.utils.config import load_config +from covidnpi.utils.regions import CODE_TO_POBLACION from covidnpi.utils.log import logger warnings.filterwarnings("ignore", category=RuntimeWarning) @@ -75,7 +75,6 @@ def return_casos_of_provincia_normed( casos: pd.DataFrame, code: str, per_inhabitants: int = 100000, - path_config: str = "covidnpi/config.toml", ) -> pd.Series: """Return the series of cases of COVID per date, per N inhabitants, for a province @@ -88,8 +87,6 @@ def return_casos_of_provincia_normed( Code of the province (example: "M" for "Madrid") per_inhabitants : int, optional Normalization value, N, by default 100,000 - path_config : str, optional - Path to the config file, by default "covidnpi/config.toml" Returns ------- @@ -98,6 +95,5 @@ def return_casos_of_provincia_normed( """ series = return_casos_of_provincia(casos, code) - code_to_poblacion = load_config(path_config, "code_to_poblacion") - pob = code_to_poblacion[code] + pob = CODE_TO_POBLACION[code] return per_inhabitants * series / pob diff --git a/covidnpi/utils/mobility.py b/covidnpi/utils/mobility.py index aaad039..15ab806 100644 --- a/covidnpi/utils/mobility.py +++ b/covidnpi/utils/mobility.py @@ -4,7 +4,7 @@ import typer from covidnpi.utils.casos import load_casos_df, return_casos_of_provincia_normed -from covidnpi.utils.config import load_config +from covidnpi.utils.regions import CODE_REASSIGN, PROVINCIA_TO_CODE, CODE_TO_PROVINCIA from covidnpi.utils.log import logger from covidnpi.utils.rho import compute_rho from covidnpi.utils.series import ( @@ -89,7 +89,7 @@ def return_reports_of_provincia(mob: pd.DataFrame, code: str) -> dict: def mobility_report_to_csv( - path_config: str = "covidnpi/config.toml", path_output: str = "output/mobility" + path_output: str = "output/mobility", ): """Stores the Google mobility reports in csv format""" @@ -98,24 +98,19 @@ def mobility_report_to_csv( mob = load_mobility_report() casos = load_casos_df() - code_to_provincia = load_config(path_config, "code_to_provincia") - code_reassign = load_config(path_config, "code_reassign") - provincia_to_code = load_config(path_config, "provincia_to_code") - code_to_filename = {v: k for k, v in provincia_to_code.items()} + code_to_filename = {v: k for k, v in PROVINCIA_TO_CODE.items()} for code in mob["code"].unique(): # Reassign code if needed - code = code_reassign.get(code, code) + code = CODE_REASSIGN.get(code, code) try: - provincia = code_to_provincia[code] + provincia = CODE_TO_PROVINCIA[code] logger.debug(f"{code} - {provincia}") except KeyError: logger.warning(f"Omitted {code}") continue dict_reports = return_reports_of_provincia(mob, code) - series_casos = return_casos_of_provincia_normed( - casos, code, path_config=path_config - ) + series_casos = return_casos_of_provincia_normed(casos, code) series_ia7 = cumulative_incidence(series_casos, 7) series_growth = compute_growth_rate(series_casos, 7) series_rho = compute_rho(series_casos) diff --git a/covidnpi/utils/preprocess.py b/covidnpi/utils/preprocess.py index 71d870e..a592d26 100644 --- a/covidnpi/utils/preprocess.py +++ b/covidnpi/utils/preprocess.py @@ -14,6 +14,7 @@ raise_missing_warning, ) from covidnpi.utils.taxonomia import return_all_medidas, PATH_TAXONOMIA +from covidnpi.utils.regions import DICT_PROVINCE_RENAME, DICT_FILL_PROVINCIA LIST_BASE_SHEET = ["base", "base-regional-provincias", "BASE"] @@ -27,15 +28,6 @@ "pesqueradeduero": 0.1, } -DICT_FILL_PROVINCIA = { - "CTB": "cantabria", - "CEU": "ceuta", - "MEL": "melilla", - "MUR": "murcia", - "NAV": "navarra", - "RIO": "rioja_la", -} - DICT_COL_RENAME = { "cod_con": "codigo", "unidad_de_medida": "unidad", @@ -77,7 +69,6 @@ "vizcaya": "pais_vasco", } -DICT_PROVINCE_RENAME = {"a_coruna": "coruna_la", "cyl": "", "guipuzkoa": "guipuzcoa"} DICT_CCAA_RENAME = {"autonomico": np.nan} LIST_MEDIDAS_NO_HORA = ["MV.3", "MV.4", "MV.7"] diff --git a/covidnpi/utils/regions.py b/covidnpi/utils/regions.py new file mode 100644 index 0000000..ad5b702 --- /dev/null +++ b/covidnpi/utils/regions.py @@ -0,0 +1,239 @@ +POSTAL_TO_CODE = { + "01": "VI", + "02": "AB", + "03": "A", + "04": "AL", + "05": "AV", + "06": "BA", + "07": "PM", + "08": "B", + "09": "BU", + "10": "CC", + "11": "CA", + "12": "CS", + "13": "CR", + "14": "CO", + "15": "C", + "16": "CU", + "17": "GI", + "18": "GR", + "19": "GU", + "20": "SS", + "21": "H", + "22": "HU", + "23": "J", + "24": "LE", + "25": "L", + "26": "LO", + "27": "LU", + "28": "M", + "29": "MA", + "30": "MU", + "31": "NA", + "32": "OR", + "33": "O", + "34": "P", + "35": "GC", + "36": "PO", + "37": "SA", + "38": "TF", + "39": "S", + "40": "SG", + "41": "SE", + "42": "SO", + "43": "T", + "44": "TE", + "45": "TO", + "46": "V", + "47": "VA", + "48": "BI", + "49": "ZA", + "50": "Z", + "51": "CE", + "52": "ML", +} + +CODE_TO_PROVINCIA = { + "A": "Alacant", + "AB": "Albacete", + "AL": "Almería", + "AV": "Ávila", + "B": "Barcelona", + "BA": "Badajoz", + "BI": "Bizkaia", + "BU": "Burgos", + "C": "A Coruña", + "CA": "Cádiz", + "CC": "Cáceres", + "CE": "Ceuta", + "CO": "Córdoba", + "CR": "Ciudad Real", + "CS": "Castelló", + "CU": "Cuenca", + "GC": "Las Palmas", + "GI": "Girona", + "GR": "Granada", + "GU": "Guadalajara", + "H": "Huelva", + "HU": "Huesca", + "J": "Jaén", + "L": "Lleida", + "LE": "León", + "LO": "La Rioja", + "LU": "Lugo", + "M": "Madrid", + "MA": "Málaga", + "ML": "Melilla", + "MU": "Murcia", + "NA": "Nafarroa", + "OR": "Ourense", + "O": "Asturias", + "P": "Palencia", + "PM": "Illes Balears", + "PO": "Pontevedra", + "SA": "Salamanca", + "TF": "Sta. Cruz de Tenerife", + "S": "Cantabria", + "SG": "Segovia", + "SE": "Sevilla", + "SO": "Soria", + "SS": "Gipuzkoa", + "T": "Tarragona", + "TE": "Teruel", + "TO": "Toledo", + "V": "Valéncia", + "VA": "Valladolid", + "VI": "Álava", + "ZA": "Zamora", + "Z": "Zaragoza", +} + +CODE_REASSIGN = {"AS": "O", "CB": "S", "IB": "PM", "MD": "M", "MC": "MU", "NC": "NA"} + +PROVINCIA_TO_CODE = { + "alava": "VI", + "albacete": "AB", + "alicante": "A", + "almeria": "AL", + "avila": "AV", + "badajoz": "BA", + "mallorca": "PM", + "barcelona": "B", + "burgos": "BU", + "caceres": "CC", + "cadiz": "CA", + "castellon": "CS", + "ceuta": "CE", + "ciudad_real": "CR", + "cordoba": "CO", + "coruna_la": "C", + "cuenca": "CU", + "girona": "GI", + "granada": "GR", + "guadalajara": "GU", + "guipuzcoa": "SS", + "huelva": "H", + "huesca": "HU", + "jaen": "J", + "leon": "LE", + "lleida": "L", + "rioja_la": "LO", + "lugo": "LU", + "madrid": "M", + "malaga": "MA", + "melilla": "ML", + "murcia": "MU", + "navarra": "NA", + "orense": "OR", + "asturias": "O", + "palencia": "P", + "gran_canaria": "GC", + "pontevedra": "PO", + "salamanca": "SA", + "santa_cruz_de_tenerife": "TF", + "cantabria": "S", + "segovia": "SG", + "sevilla": "SE", + "soria": "SO", + "tarragona": "T", + "tenerife": "TF", + "teruel": "TE", + "toledo": "TO", + "valencia": "V", + "valladolid": "VA", + "vizcaya": "BI", + "zamora": "ZA", + "zaragoza": "Z", +} + +DICT_PROVINCE_RENAME = { + "a_coruna": "coruna_la", + "cyl": "", + "guipuzkoa": "guipuzcoa", + "grancanaria": "gran_canaria", +} + +ISLA_TO_PROVINCIA = { + "elhierro": "gran_canaria", + "formentera": "tenerife", + "fuerteventura": "gran_canaria", + "ibiza": "tenerife", + "lagomera": "gran_canaria", + "lanzarote": "gran_canaria", + "menorca": "tenerife", +} + +CODE_TO_POBLACION = { + "AB": 388270, + "A": 1879888, + "AL": 727945, + "VI": 333940, + "O": 1018784, + "AV": 157664, + "BA": 672137, + "PM": 1171543, + "B": 5743402, + "BI": 1159443, + "BU": 357650, + "CC": 391850, + "CA": 1244049, + "S": 582905, + "CS": 585590, + "CR": 495045, + "CO": 781451, + "C": 1121815, + "CU": 196139, + "SS": 727121, + "GI": 781788, + "GR": 919168, + "GU": 261995, + "H": 524278, + "HU": 222687, + "J": 631381, + "LE": 456439, + "L": 438517, + "LU": 327946, + "M": 6779888, + "MA": 1685920, + "MU": 1511251, + "NA": 661197, + "OR": 306650, + "P": 160321, + "GC": 1131065, + "PO": 945408, + "LO": 319914, + "SA": 329245, + "TF": 1044887, + "SG": 153478, + "SE": 1950219, + "SO": 88884, + "T": 816772, + "TE": 134176, + "TO": 703772, + "V": 2591875, + "VA": 520649, + "ZA": 170588, + "Z": 972528, + "CE": 84202, + "ML": 87076, +} diff --git a/covidnpi/web/datastore.py b/covidnpi/web/datastore.py index 78eff7d..2b2d477 100644 --- a/covidnpi/web/datastore.py +++ b/covidnpi/web/datastore.py @@ -9,6 +9,7 @@ return_casos_of_provincia_normed, ) from covidnpi.utils.config import load_config +from covidnpi.utils.regions import PROVINCIA_TO_CODE from covidnpi.utils.log import logger from covidnpi.utils.series import cumulative_incidence, compute_growth_rate from covidnpi.utils.taxonomia import return_taxonomia, PATH_TAXONOMIA @@ -36,8 +37,6 @@ def store_scores_in_mongo( cfg_mongo = load_config(path_config, key="mongo") mongo = load_mongo(cfg_mongo) - provincia_to_code = load_config(path_config, key="provincia_to_code") - taxonomia = return_taxonomia(path_taxonomia=path_taxonomia) list_ambito = taxonomia["ambito"].unique().tolist() # Get the minimum date in datetime format @@ -53,7 +52,7 @@ def store_scores_in_mongo( try: dict_provincia = { "provincia": provincia, - "code": provincia_to_code[provincia], + "code": PROVINCIA_TO_CODE[provincia], "fechas": df.index.tolist(), } except KeyError: @@ -74,13 +73,17 @@ def store_scores_in_mongo( _ = mongo.insert_new_dict("scores", dict_provincia) -def store_casos_in_mongo(path_config: str = "covidnpi/config.toml"): +def store_casos_in_mongo( + path_config: str = "covidnpi/config.toml", +): """Store incidence and growth rate in mongo Parameters ---------- path_config : str, optional Config file contains the route and credentials of mongo server + path_regions : str, optional + Regions file contains info about provinces and AC """ cfg_mongo = load_config(path_config, key="mongo") @@ -97,7 +100,8 @@ def store_casos_in_mongo(path_config: str = "covidnpi/config.toml"): dict_provincia = {} try: series = return_casos_of_provincia_normed( - casos, code, path_config=path_config + casos, + code, ) # Filter dates previous to the minimum date mask_date = series.index >= date_min @@ -142,11 +146,15 @@ def datastore( Path to taxonomia xlsx file path_config : str, optional Path to the config toml file + path_regions : str, optional + Path to the regions file """ logger.debug("\n-----\nStoring scores in mongo\n-----\n") store_scores_in_mongo( - path_output=path_output, path_taxonomia=path_taxonomia, path_config=path_config + path_output=path_output, + path_taxonomia=path_taxonomia, + path_config=path_config, ) logger.debug("\n-----\nStoring number of cases in mongo\n-----\n") store_casos_in_mongo(path_config=path_config) diff --git a/covidnpi/web/generate_json.py b/covidnpi/web/generate_json.py index 2c95ab7..c84cecc 100644 --- a/covidnpi/web/generate_json.py +++ b/covidnpi/web/generate_json.py @@ -5,6 +5,7 @@ from covidnpi.utils.config import load_config from covidnpi.utils.log import logger from covidnpi.web.mongo import load_mongo +from covidnpi.utils.regions import CODE_TO_PROVINCIA def json_code_to_provincia(code_to_provincia: dict, path_json: str): @@ -74,8 +75,7 @@ def generate_json( """ logger.debug(f"\n-----\nStoring provinces json in {path_json_provincia}\n-----") - code_to_provincia = load_config(path_config, key="code_to_provincia") - json_code_to_provincia(code_to_provincia, path_json=path_json_provincia) + json_code_to_provincia(CODE_TO_PROVINCIA, path_json=path_json_provincia) logger.debug(f"\n-----\nStoring ambits json in {path_json_ambitos}\n-----") json_ambitos(path_config, path_json_ambitos) From 26176d71c7a0e95f93977e65de70c93c8fcee363 Mon Sep 17 00:00:00 2001 From: daniprec Date: Sat, 19 Jun 2021 11:09:11 +0200 Subject: [PATCH 67/79] Add DICT_FILL_PROVINCIA to regions script --- covidnpi/utils/regions.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/covidnpi/utils/regions.py b/covidnpi/utils/regions.py index ad5b702..9408796 100644 --- a/covidnpi/utils/regions.py +++ b/covidnpi/utils/regions.py @@ -173,6 +173,15 @@ "grancanaria": "gran_canaria", } +DICT_FILL_PROVINCIA = { + "CTB": "cantabria", + "CEU": "ceuta", + "MEL": "melilla", + "MUR": "murcia", + "NAV": "navarra", + "RIO": "rioja_la", +} + ISLA_TO_PROVINCIA = { "elhierro": "gran_canaria", "formentera": "tenerife", From a10984bc5756d48d3f8427af551df815444e06cd Mon Sep 17 00:00:00 2001 From: daniprec Date: Sat, 19 Jun 2021 11:42:58 +0200 Subject: [PATCH 68/79] Store isles as whole unit --- covidnpi/preprocess_and_score.py | 8 +++++--- covidnpi/score/score_ambitos.py | 5 ++--- covidnpi/score/score_islas.py | 29 +++++++++++++++++++++++++++++ covidnpi/utils/dictionaries.py | 6 +++++- covidnpi/utils/regions.py | 18 ++++++++++++++++++ 5 files changed, 59 insertions(+), 7 deletions(-) create mode 100644 covidnpi/score/score_islas.py diff --git a/covidnpi/preprocess_and_score.py b/covidnpi/preprocess_and_score.py index 9c041c4..72af76e 100644 --- a/covidnpi/preprocess_and_score.py +++ b/covidnpi/preprocess_and_score.py @@ -2,13 +2,13 @@ import typer +from covidnpi.score.score_ambitos import return_dict_score_ambitos from covidnpi.score.score_items import return_dict_score_items from covidnpi.score.score_medidas import return_dict_score_medidas -from covidnpi.score.score_ambitos import return_dict_score_ambitos -from covidnpi.utils.config import load_config +from covidnpi.score.score_islas import return_dict_score_islas from covidnpi.utils.dictionaries import ( - store_dict_scores, store_dict_provincia_to_medidas, + store_dict_scores, ) from covidnpi.utils.log import logger from covidnpi.utils.mobility import mobility_report_to_csv @@ -68,6 +68,8 @@ def main( ) dict_ambito = return_dict_score_ambitos(dict_items, path_taxonomia=path_taxonomia) + dict_islas = return_dict_score_islas(dict_ambito) + dict_ambito.update(dict_islas) path_score_ambito = os.path.join(path_output, "score_ambito") store_dict_scores(dict_ambito, path_output=path_score_ambito) diff --git a/covidnpi/score/score_ambitos.py b/covidnpi/score/score_ambitos.py index 7060b14..4d5d0f8 100644 --- a/covidnpi/score/score_ambitos.py +++ b/covidnpi/score/score_ambitos.py @@ -1,9 +1,8 @@ import pandas as pd import typer - -from covidnpi.utils.dictionaries import store_dict_scores, load_dict_scores +from covidnpi.utils.dictionaries import load_dict_scores, store_dict_scores from covidnpi.utils.log import logger -from covidnpi.utils.taxonomia import return_item_ponderacion, PATH_TAXONOMIA +from covidnpi.utils.taxonomia import PATH_TAXONOMIA, return_item_ponderacion def compute_proportion(df: pd.DataFrame, item: str): diff --git a/covidnpi/score/score_islas.py b/covidnpi/score/score_islas.py new file mode 100644 index 0000000..0ae7d75 --- /dev/null +++ b/covidnpi/score/score_islas.py @@ -0,0 +1,29 @@ +from functools import reduce + +from covidnpi.utils.log import logger +from covidnpi.utils.regions import ISLA_TO_PERCENTAGE + + +def store_score_isle(dict_islas: dict, dict_ambito: dict): + list_df = [] + for isle, percentage in dict_islas.items(): + try: + list_df.append(dict_ambito[isle].copy() * percentage) + except KeyError: + raise KeyError(f"Falta la isla: {isle}") + return reduce(lambda x, y: x.add(y, fill_value=0), list_df) + + +def return_dict_score_islas(dict_ambito: dict): + dict_ccaa = {} + # Ensure the isles of each group are present + # If so, perform grouping operation + for ccaa, dict_islas in ISLA_TO_PERCENTAGE.items(): + logger.debug(ccaa) + try: + df = store_score_isle(dict_islas, dict_ambito) + dict_ccaa.update({ccaa: df}) + except KeyError as er: + logger.error(f"No se pudo calcular {ccaa}. {er}") + continue + return dict_ccaa diff --git a/covidnpi/utils/dictionaries.py b/covidnpi/utils/dictionaries.py index c1e9fa2..ac3da31 100644 --- a/covidnpi/utils/dictionaries.py +++ b/covidnpi/utils/dictionaries.py @@ -2,6 +2,7 @@ import os import pandas as pd +from covidnpi.utils.log import logger def extract_codes_to_dict(df: pd.DataFrame, category: str): @@ -42,7 +43,10 @@ def store_dict_scores(dict_scores, path_output: str = "output/score_medidas"): for provincia, df_score in dict_scores.items(): path_file = os.path.join(path_output, provincia.split("/")[0] + ".csv") - df_score.to_csv(path_file, float_format="%.3f") + try: + df_score.to_csv(path_file, float_format="%.3f") + except AttributeError as er: + logger.error(f"Provincia {provincia} no puede guardarse: {er}") def load_dict_scores(path_scores: str = "output/score_medidas"): diff --git a/covidnpi/utils/regions.py b/covidnpi/utils/regions.py index 9408796..dc36400 100644 --- a/covidnpi/utils/regions.py +++ b/covidnpi/utils/regions.py @@ -246,3 +246,21 @@ "CE": 84202, "ML": 87076, } + +ISLA_TO_PERCENTAGE = { + "islas_baleares": { + "mallorca": 0.778, + "menorca": 0.081, + "ibiza": 0.130, + "formentera": 0.011, + }, + "islas_canarias": { + "tenerife": 0.427, + "gran_canaria": 0.393, + "lanzarote": 0.071, + "fuerteventura": 0.055, + "lapalma": 0.038, + "lagomera": 0.010, + "elhierro": 0.005, + }, +} From 8ddc92dbde0ea9801d51efb287d97ee9c44f7b5a Mon Sep 17 00:00:00 2001 From: daniprec Date: Sat, 19 Jun 2021 11:44:03 +0200 Subject: [PATCH 69/79] Function names --- covidnpi/score/score_islas.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/covidnpi/score/score_islas.py b/covidnpi/score/score_islas.py index 0ae7d75..a806253 100644 --- a/covidnpi/score/score_islas.py +++ b/covidnpi/score/score_islas.py @@ -1,10 +1,11 @@ from functools import reduce +import pandas as pd from covidnpi.utils.log import logger from covidnpi.utils.regions import ISLA_TO_PERCENTAGE -def store_score_isle(dict_islas: dict, dict_ambito: dict): +def aggregate_score_isles(dict_islas: dict, dict_ambito: dict) -> pd.DataFrame: list_df = [] for isle, percentage in dict_islas.items(): try: @@ -14,14 +15,14 @@ def store_score_isle(dict_islas: dict, dict_ambito: dict): return reduce(lambda x, y: x.add(y, fill_value=0), list_df) -def return_dict_score_islas(dict_ambito: dict): +def return_dict_score_islas(dict_ambito: dict) -> dict: dict_ccaa = {} # Ensure the isles of each group are present # If so, perform grouping operation for ccaa, dict_islas in ISLA_TO_PERCENTAGE.items(): logger.debug(ccaa) try: - df = store_score_isle(dict_islas, dict_ambito) + df = aggregate_score_isles(dict_islas, dict_ambito) dict_ccaa.update({ccaa: df}) except KeyError as er: logger.error(f"No se pudo calcular {ccaa}. {er}") From cdd0311800b2fb860b3157658a03ed9eb444950a Mon Sep 17 00:00:00 2001 From: daniprec Date: Sun, 20 Jun 2021 12:18:06 +0200 Subject: [PATCH 70/79] Split islas canarias --- covidnpi/utils/regions.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/covidnpi/utils/regions.py b/covidnpi/utils/regions.py index dc36400..2051cf7 100644 --- a/covidnpi/utils/regions.py +++ b/covidnpi/utils/regions.py @@ -254,13 +254,11 @@ "ibiza": 0.130, "formentera": 0.011, }, - "islas_canarias": { - "tenerife": 0.427, - "gran_canaria": 0.393, - "lanzarote": 0.071, - "fuerteventura": 0.055, - "lapalma": 0.038, - "lagomera": 0.010, - "elhierro": 0.005, + "tenerife": {"tenerife": 0.77, "lanzarote": 0.13, "fuerteventura": 0.10}, + "gran_canaria": { + "gran_canaria": 0.88, + "lapalma": 0.09, + "lagomera": 0.02, + "elhierro": 0.01, }, } From e1bf46b51a8e4dab86dfdfa0ac381cf65f6d2e5d Mon Sep 17 00:00:00 2001 From: daniprec Date: Sun, 20 Jun 2021 12:20:51 +0200 Subject: [PATCH 71/79] Docstring --- covidnpi/score/score_islas.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/covidnpi/score/score_islas.py b/covidnpi/score/score_islas.py index a806253..a77e505 100644 --- a/covidnpi/score/score_islas.py +++ b/covidnpi/score/score_islas.py @@ -6,6 +6,25 @@ def aggregate_score_isles(dict_islas: dict, dict_ambito: dict) -> pd.DataFrame: + """Aggregates the scores of isles, normalizing by population percentage + + Parameters + ---------- + dict_islas : dict + Contains couples of {island: population percentage} + dict_ambito : dict + Contains couples of {island: pd.DataFrame of scores} + + Returns + ------- + pd.DataFrame + Scores, aggregated + + Raises + ------ + KeyError + If an island is missing + """ list_df = [] for isle, percentage in dict_islas.items(): try: @@ -16,6 +35,20 @@ def aggregate_score_isles(dict_islas: dict, dict_ambito: dict) -> pd.DataFrame: def return_dict_score_islas(dict_ambito: dict) -> dict: + """Given a dictionary of scores by island, returns a dictionary + of scores by group of islands + + Parameters + ---------- + dict_ambito : dict + Contains couples of {island: pd.DataFrame of scores} + + Returns + ------- + dict + Contains couples of {group of islands: pd.DataFrame of scores} + """ + # Initialize output dictionary dict_ccaa = {} # Ensure the isles of each group are present # If so, perform grouping operation From 6e2d340916c0d39a383e7a9a20f72b8fa47aab44 Mon Sep 17 00:00:00 2001 From: daniprec Date: Sun, 20 Jun 2021 12:27:51 +0200 Subject: [PATCH 72/79] Organize imports --- covidnpi/utils/combine.py | 11 ++++------- covidnpi/utils/mobility.py | 8 ++------ covidnpi/web/datastore.py | 12 ++++-------- 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/covidnpi/utils/combine.py b/covidnpi/utils/combine.py index b60a118..9cbef67 100644 --- a/covidnpi/utils/combine.py +++ b/covidnpi/utils/combine.py @@ -3,10 +3,10 @@ import numpy as np import pandas as pd - import typer from covidnpi.utils.config import load_config from covidnpi.utils.dictionaries import reverse_dictionary +from covidnpi.utils.regions import CODE_TO_PROVINCIA, POSTAL_TO_CODE, PROVINCIA_TO_CODE COLS_AMBITO = [ "fecha", @@ -62,14 +62,11 @@ def add_province_code( df: pd.DataFrame, path_config: str = "covidnpi/config.toml" ) -> pd.DataFrame: # Load all conversion dictionaries - province_to_code = load_config(path_config, "provincia_to_code") - code_to_province = load_config(path_config, "code_to_provincia") - postal_to_code = load_config(path_config, "postal_to_code") - code_to_postal = reverse_dictionary(postal_to_code) + code_to_postal = reverse_dictionary(POSTAL_TO_CODE) # Get codes - code = df["provincia"].map(province_to_code) + code = df["provincia"].map(PROVINCIA_TO_CODE) # Replace province name and add code - df["provincia"] = code.map(code_to_province) + df["provincia"] = code.map(CODE_TO_PROVINCIA) df.insert(loc=1, column="cod_prov", value=code.map(code_to_postal)) return df diff --git a/covidnpi/utils/mobility.py b/covidnpi/utils/mobility.py index 15ab806..3959c92 100644 --- a/covidnpi/utils/mobility.py +++ b/covidnpi/utils/mobility.py @@ -2,15 +2,11 @@ import pandas as pd import typer - from covidnpi.utils.casos import load_casos_df, return_casos_of_provincia_normed -from covidnpi.utils.regions import CODE_REASSIGN, PROVINCIA_TO_CODE, CODE_TO_PROVINCIA from covidnpi.utils.log import logger +from covidnpi.utils.regions import CODE_REASSIGN, CODE_TO_PROVINCIA, PROVINCIA_TO_CODE from covidnpi.utils.rho import compute_rho -from covidnpi.utils.series import ( - cumulative_incidence, - compute_growth_rate, -) +from covidnpi.utils.series import compute_growth_rate, cumulative_incidence URL_MOBILITY = "https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv" diff --git a/covidnpi/web/datastore.py b/covidnpi/web/datastore.py index 2b2d477..82c52e6 100644 --- a/covidnpi/web/datastore.py +++ b/covidnpi/web/datastore.py @@ -3,16 +3,12 @@ import pandas as pd import typer - -from covidnpi.utils.casos import ( - load_casos_df, - return_casos_of_provincia_normed, -) +from covidnpi.utils.casos import load_casos_df, return_casos_of_provincia_normed from covidnpi.utils.config import load_config -from covidnpi.utils.regions import PROVINCIA_TO_CODE from covidnpi.utils.log import logger -from covidnpi.utils.series import cumulative_incidence, compute_growth_rate -from covidnpi.utils.taxonomia import return_taxonomia, PATH_TAXONOMIA +from covidnpi.utils.regions import PROVINCIA_TO_CODE +from covidnpi.utils.series import compute_growth_rate, cumulative_incidence +from covidnpi.utils.taxonomia import PATH_TAXONOMIA, return_taxonomia from covidnpi.web.mongo import load_mongo From 387a7c217a4b1fa7c2e748e6900117478ff7e6a4 Mon Sep 17 00:00:00 2001 From: daniprec Date: Sun, 20 Jun 2021 12:29:14 +0200 Subject: [PATCH 73/79] Optimize imports --- covidnpi/utils/combine.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/covidnpi/utils/combine.py b/covidnpi/utils/combine.py index 9cbef67..2bc48f1 100644 --- a/covidnpi/utils/combine.py +++ b/covidnpi/utils/combine.py @@ -4,9 +4,13 @@ import numpy as np import pandas as pd import typer -from covidnpi.utils.config import load_config from covidnpi.utils.dictionaries import reverse_dictionary -from covidnpi.utils.regions import CODE_TO_PROVINCIA, POSTAL_TO_CODE, PROVINCIA_TO_CODE +from covidnpi.utils.regions import ( + CODE_TO_PROVINCIA, + ISLA_TO_PROVINCIA, + POSTAL_TO_CODE, + PROVINCIA_TO_CODE, +) COLS_AMBITO = [ "fecha", @@ -43,14 +47,10 @@ def combine_csv(path: Union[Path, str], colname: str) -> pd.DataFrame: return pd.concat(df_dict, names=[colname]).reset_index().drop(columns="level_1") -def add_unidad_territorial( - df: pd.DataFrame, path_config: str = "covidnpi/config.toml" -) -> pd.DataFrame: - # Load all conversion dictionaries - isle_to_province = load_config(path_config, "isla_to_provincia") +def add_unidad_territorial(df: pd.DataFrame) -> pd.DataFrame: # Check for islands unidad = df["provincia"].copy() - province = df["provincia"].replace(isle_to_province) + province = df["provincia"].replace(ISLA_TO_PROVINCIA) # Create unidad_territorial column, that contains the islands df.insert(loc=2, column="unidad_territorial", value=unidad) df.loc[unidad == province, "unidad_territorial"] = np.nan @@ -58,9 +58,7 @@ def add_unidad_territorial( return df -def add_province_code( - df: pd.DataFrame, path_config: str = "covidnpi/config.toml" -) -> pd.DataFrame: +def add_province_code(df: pd.DataFrame) -> pd.DataFrame: # Load all conversion dictionaries code_to_postal = reverse_dictionary(POSTAL_TO_CODE) # Get codes From 8779cb10e8a7bc3e3b3fac217165fae1a6e81f71 Mon Sep 17 00:00:00 2001 From: daniprec Date: Sun, 20 Jun 2021 12:41:46 +0200 Subject: [PATCH 74/79] Improve debug msg in datastore --- covidnpi/web/datastore.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/covidnpi/web/datastore.py b/covidnpi/web/datastore.py index 82c52e6..7c157bd 100644 --- a/covidnpi/web/datastore.py +++ b/covidnpi/web/datastore.py @@ -52,7 +52,9 @@ def store_scores_in_mongo( "fechas": df.index.tolist(), } except KeyError: - logger.debug(f"\nProvincia '{provincia}' code not found\n") + logger.debug( + f"\nProvincia '{provincia}' code not found. Not stored in mongo.\n" + ) continue logger.debug(f"\n{provincia}") for ambito in list_ambito: From da17555d4c378a0c4050802ed12c15abb3b34a1d Mon Sep 17 00:00:00 2001 From: daniprec Date: Mon, 21 Jun 2021 10:47:22 +0200 Subject: [PATCH 75/79] Update dictionaries --- covidnpi/utils/combine.py | 7 ++----- covidnpi/utils/mobility.py | 5 ++--- covidnpi/utils/regions.py | 9 ++++++++- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/covidnpi/utils/combine.py b/covidnpi/utils/combine.py index 2bc48f1..8db44ea 100644 --- a/covidnpi/utils/combine.py +++ b/covidnpi/utils/combine.py @@ -4,11 +4,10 @@ import numpy as np import pandas as pd import typer -from covidnpi.utils.dictionaries import reverse_dictionary from covidnpi.utils.regions import ( + CODE_TO_POSTAL, CODE_TO_PROVINCIA, ISLA_TO_PROVINCIA, - POSTAL_TO_CODE, PROVINCIA_TO_CODE, ) @@ -59,13 +58,11 @@ def add_unidad_territorial(df: pd.DataFrame) -> pd.DataFrame: def add_province_code(df: pd.DataFrame) -> pd.DataFrame: - # Load all conversion dictionaries - code_to_postal = reverse_dictionary(POSTAL_TO_CODE) # Get codes code = df["provincia"].map(PROVINCIA_TO_CODE) # Replace province name and add code df["provincia"] = code.map(CODE_TO_PROVINCIA) - df.insert(loc=1, column="cod_prov", value=code.map(code_to_postal)) + df.insert(loc=1, column="cod_prov", value=code.map(CODE_TO_POSTAL)) return df diff --git a/covidnpi/utils/mobility.py b/covidnpi/utils/mobility.py index 3959c92..d6abe98 100644 --- a/covidnpi/utils/mobility.py +++ b/covidnpi/utils/mobility.py @@ -4,7 +4,7 @@ import typer from covidnpi.utils.casos import load_casos_df, return_casos_of_provincia_normed from covidnpi.utils.log import logger -from covidnpi.utils.regions import CODE_REASSIGN, CODE_TO_PROVINCIA, PROVINCIA_TO_CODE +from covidnpi.utils.regions import CODE_REASSIGN, CODE_TO_FILENAME, CODE_TO_PROVINCIA from covidnpi.utils.rho import compute_rho from covidnpi.utils.series import compute_growth_rate, cumulative_incidence @@ -94,7 +94,6 @@ def mobility_report_to_csv( mob = load_mobility_report() casos = load_casos_df() - code_to_filename = {v: k for k, v in PROVINCIA_TO_CODE.items()} for code in mob["code"].unique(): # Reassign code if needed @@ -117,7 +116,7 @@ def mobility_report_to_csv( .assign(ia7=series_ia7, growth_rate=series_growth, rho=series_rho) .rename_axis("date", axis=0) ) - filename = code_to_filename[code] + filename = CODE_TO_FILENAME[code] df_store.to_csv(os.path.join(path_output, f"{filename}.csv")) diff --git a/covidnpi/utils/regions.py b/covidnpi/utils/regions.py index 2051cf7..2054bc8 100644 --- a/covidnpi/utils/regions.py +++ b/covidnpi/utils/regions.py @@ -1,3 +1,5 @@ +from covidnpi.utils.dictionaries import reverse_dictionary + POSTAL_TO_CODE = { "01": "VI", "02": "AB", @@ -53,6 +55,8 @@ "52": "ML", } +CODE_TO_POSTAL = reverse_dictionary(POSTAL_TO_CODE) + CODE_TO_PROVINCIA = { "A": "Alacant", "AB": "Albacete", @@ -117,7 +121,8 @@ "almeria": "AL", "avila": "AV", "badajoz": "BA", - "mallorca": "PM", + # "mallorca": "PM", + "islas_baleares": "PM", "barcelona": "B", "burgos": "BU", "caceres": "CC", @@ -166,6 +171,8 @@ "zaragoza": "Z", } +CODE_TO_FILENAME = reverse_dictionary(PROVINCIA_TO_CODE) + DICT_PROVINCE_RENAME = { "a_coruna": "coruna_la", "cyl": "", From ee50a2dcd82aa6ccc9ee857f1b996dce5c8432a6 Mon Sep 17 00:00:00 2001 From: daniprec Date: Mon, 21 Jun 2021 10:47:50 +0200 Subject: [PATCH 76/79] Assert percentages sum 1 --- covidnpi/score/score_islas.py | 1 + 1 file changed, 1 insertion(+) diff --git a/covidnpi/score/score_islas.py b/covidnpi/score/score_islas.py index a77e505..be5fc3e 100644 --- a/covidnpi/score/score_islas.py +++ b/covidnpi/score/score_islas.py @@ -26,6 +26,7 @@ def aggregate_score_isles(dict_islas: dict, dict_ambito: dict) -> pd.DataFrame: If an island is missing """ list_df = [] + assert sum(dict_islas.values()) == 1, "The percentage does not sum 1" for isle, percentage in dict_islas.items(): try: list_df.append(dict_ambito[isle].copy() * percentage) From 1822b47bcee99d6e379ab888fee7a6aecf6bc2e8 Mon Sep 17 00:00:00 2001 From: daniprec Date: Mon, 21 Jun 2021 10:48:10 +0200 Subject: [PATCH 77/79] Keep isles apart from provinces --- covidnpi/preprocess_and_score.py | 5 +++-- covidnpi/utils/dictionaries.py | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/covidnpi/preprocess_and_score.py b/covidnpi/preprocess_and_score.py index 72af76e..a98fcd6 100644 --- a/covidnpi/preprocess_and_score.py +++ b/covidnpi/preprocess_and_score.py @@ -3,12 +3,13 @@ import typer from covidnpi.score.score_ambitos import return_dict_score_ambitos +from covidnpi.score.score_islas import return_dict_score_islas from covidnpi.score.score_items import return_dict_score_items from covidnpi.score.score_medidas import return_dict_score_medidas -from covidnpi.score.score_islas import return_dict_score_islas from covidnpi.utils.dictionaries import ( store_dict_provincia_to_medidas, store_dict_scores, + update_keep_old_keys, ) from covidnpi.utils.log import logger from covidnpi.utils.mobility import mobility_report_to_csv @@ -69,7 +70,7 @@ def main( dict_ambito = return_dict_score_ambitos(dict_items, path_taxonomia=path_taxonomia) dict_islas = return_dict_score_islas(dict_ambito) - dict_ambito.update(dict_islas) + dict_ambito = update_keep_old_keys(dict_ambito, dict_islas) path_score_ambito = os.path.join(path_output, "score_ambito") store_dict_scores(dict_ambito, path_output=path_score_ambito) diff --git a/covidnpi/utils/dictionaries.py b/covidnpi/utils/dictionaries.py index ac3da31..bfd1a20 100644 --- a/covidnpi/utils/dictionaries.py +++ b/covidnpi/utils/dictionaries.py @@ -5,6 +5,31 @@ from covidnpi.utils.log import logger +def update_keep_old_keys(dict_old: dict, dict_add: dict, label: str = "_isla") -> dict: + """Updates a dictionary, but if the same keys are found, keep old ones with a label + + Parameters + ---------- + dict_old : dict + Dictionary to update + dict_add : dict + Dictionary with new information + label : str, optional + Label of old keys, by default "_isla" + + Returns + ------- + dict + Updated dictionary + """ + dict_new = dict_old.copy() + for key, _ in dict_add.items(): + if key in dict_old.keys(): + dict_new[key + label] = dict_new.pop(key) + dict_new.update(dict_add) + return dict_new + + def extract_codes_to_dict(df: pd.DataFrame, category: str): df_sub = df[df["Nombre TM"] == category] d = pd.Series(df_sub["Literal"].values, index=df_sub["Código"]).to_dict() From 3edd742e82a7eed280033bb29d68069632649f77 Mon Sep 17 00:00:00 2001 From: daniprec Date: Wed, 23 Jun 2021 17:26:06 +0200 Subject: [PATCH 78/79] Rename Tenerife as Santa Cruz de Tenerife --- covidnpi/utils/regions.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/covidnpi/utils/regions.py b/covidnpi/utils/regions.py index 2054bc8..e9f6ce9 100644 --- a/covidnpi/utils/regions.py +++ b/covidnpi/utils/regions.py @@ -261,7 +261,11 @@ "ibiza": 0.130, "formentera": 0.011, }, - "tenerife": {"tenerife": 0.77, "lanzarote": 0.13, "fuerteventura": 0.10}, + "santa_cruz_de_tenerife": { + "tenerife": 0.77, + "lanzarote": 0.13, + "fuerteventura": 0.10, + }, "gran_canaria": { "gran_canaria": 0.88, "lapalma": 0.09, From 96a5914a7656ddb1c8fdeda361b297ab4cb37526 Mon Sep 17 00:00:00 2001 From: daniprec Date: Wed, 23 Jun 2021 17:57:30 +0200 Subject: [PATCH 79/79] Improve test score medidas --- test/data/score_medidas.csv | 2 ++ test/test_score_medidas.py | 28 ++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 test/data/score_medidas.csv diff --git a/test/data/score_medidas.csv b/test/data/score_medidas.csv new file mode 100644 index 0000000..103225b --- /dev/null +++ b/test/data/score_medidas.csv @@ -0,0 +1,2 @@ +fecha,porcentaje_afectado,CD.12 +1996-09-14,100.0,0.5 diff --git a/test/test_score_medidas.py b/test/test_score_medidas.py index e880792..2dfbc58 100644 --- a/test/test_score_medidas.py +++ b/test/test_score_medidas.py @@ -1,13 +1,29 @@ import pandas as pd - +import pytest from covidnpi.score.score_medidas import score_medidas from covidnpi.utils.taxonomia import return_taxonomia +@pytest.fixture +def medidas() -> pd.DataFrame: + path_medidas = "test/data/medidas.csv" + yield pd.read_csv(path_medidas) + + +@pytest.fixture +def taxonomia() -> pd.DataFrame: + path_taxonomia = "test/data/taxonomia.xlsx" + yield return_taxonomia(path_taxonomia=path_taxonomia, path_output=None) + + +@pytest.fixture +def sc_medidas() -> pd.DataFrame: + path_score = "test/data/score_medidas.csv" + yield pd.read_csv(path_score, parse_dates=["fecha"]) + + def test_score_medidas( - path_medidas: str = "test/data/medidas.csv", - path_taxonomia: str = "test/data/taxonomia.xlsx", + medidas: pd.DataFrame, taxonomia: pd.DataFrame, sc_medidas: pd.DataFrame ): - taxonomia = return_taxonomia(path_taxonomia=path_taxonomia, path_output=None) - med = pd.read_csv(path_medidas) - sc_med = score_medidas(med, taxonomia, path_out_conditions=None) + sc_med = score_medidas(medidas, taxonomia, path_out_conditions=None).reset_index() + pd.testing.assert_frame_equal(sc_med, sc_medidas, check_names=False)