From 6fe87f0a9b2e17d51b6655274c00b34f20befd2e Mon Sep 17 00:00:00 2001 From: jdebacker Date: Thu, 14 Mar 2024 16:44:47 -0400 Subject: [PATCH 1/8] remove demographics models from OG-IND --- ogind/demographics.py | 703 ------------------------------------------ 1 file changed, 703 deletions(-) delete mode 100755 ogind/demographics.py diff --git a/ogind/demographics.py b/ogind/demographics.py deleted file mode 100755 index b8c57b1..0000000 --- a/ogind/demographics.py +++ /dev/null @@ -1,703 +0,0 @@ -""" ------------------------------------------------------------------------- -Functions for generating demographic objects necessary for the OG-IND -model ------------------------------------------------------------------------- -""" -# Import packages -import os -import requests -import numpy as np -import scipy.optimize as opt -import pandas as pd -import matplotlib.pyplot as plt -from ogcore import parameter_plots as pp -from ogind.utils import get_legacy_session -from io import StringIO - - -UN_COUNTRY_CODE = "356" # UN code for IND -# create output director for figures -CUR_PATH = os.path.split(os.path.abspath(__file__))[0] -OUTPUT_DIR = os.path.join(CUR_PATH, "OUTPUT", "Demographics") -if os.access(OUTPUT_DIR, os.F_OK) is False: - os.makedirs(OUTPUT_DIR) - - -""" ------------------------------------------------------------------------- -Define functions ------------------------------------------------------------------------- -""" - - -def get_un_data( - variable_code, country_id=UN_COUNTRY_CODE, start_year=2022, end_year=2022 -): - """ - This function retrieves data from the United Nations Data Portal API - for UN population data (see - https://population.un.org/dataportal/about/dataapi) - - Args: - variable_code (str): variable code for UN data - country_id (str): country id for UN data - start_year (int): start year for UN data - end_year (int): end year for UN data - - Returns: - df (Pandas DataFrame): DataFrame of UN data - """ - target = ( - "https://population.un.org/dataportalapi/api/v1/data/indicators/" - + variable_code - + "/locations/" - + country_id - + "/start/" - + str(start_year) - + "/end/" - + str(end_year) - ) - - # get data from url - response = get_legacy_session().get(target) - # Check if the request was successful before processing - if response.status_code == 200: - # Converts call into JSON - j = response.json() - else: - print( - f"Failed to retrieve population data. HTTP status code: {response.status_code}" - ) - # Convert JSON into a pandas DataFrame. - # pd.json_normalize flattens the JSON to accommodate nested lists - # within the JSON structure - df = pd.json_normalize(j["data"]) - # Loop until there are new pages with data - while j["nextPage"] is not None: - # Reset the target to the next page - target = j["nextPage"] - # call the API for the next page - response = get_legacy_session().get(target) - # Convert response to JSON format - if response.status_code == 200: - # Converts call into JSON - j = response.json() - else: - print( - f"Failed to retrieve population data. HTTP status code: {response.status_code}" - ) - # Store the next page in a data frame - df_temp = pd.json_normalize(j["data"]) - # Append next page to the data frame - df = pd.concat([df, df_temp]) - # keep just what is needed from data - df = df[df.variant == "Median"] - df = df[df.sex == "Both sexes"][["timeLabel", "ageLabel", "value"]] - df.rename({"timeLabel": "year", "ageLabel": "age"}, axis=1, inplace=True) - df.loc[df.age == "100+", "age"] = 100 - df.age = df.age.astype(int) - df.year = df.year.astype(int) - df = df[df.age <= 100] - - return df - - -def get_fert(totpers=100, min_age=0, max_age=100, graph=False): - """ - This function generates a vector of fertility rates by model period - age that corresponds to the fertility rate data by age in years. - - Args: - totpers (int): total number of agent life periods (E+S), >= 3 - min_age (int): age in years at which agents are born, >= 0 - max_age (int): age in years at which agents die with certainty, - >= 4 - graph (bool): =True if want graphical output - - Returns: - fert_rates (Numpy array): fertility rates for each model period - of life - - """ - # Read UN data - df = get_un_data("68") - # put in vector - fert_rates = df.value.values - # fill in with zeros for ages < 15 and > 49 - # NOTE: this assumes min_year < 15 and max_age > 49 - fert_rates = np.append(fert_rates, np.zeros(max_age - 49)) - fert_rates = np.append(np.zeros(15 - min_age), fert_rates) - # divide by 1000 because fertility rates are number of births per - # 1000 woman and we want births per person (might update to account - # from fraction men more correctly - below assumes 50/50 men and women) - fert_rates = fert_rates / 2000 - # Rebin data in the case that model period not equal to one calendar - # year - fert_rates = pop_rebin(fert_rates, totpers) - - # if graph: # need to fix plot function for new data output - # pp.plot_fert_rates(fert_rates, age_midp, totpers, min_age, max_age, - # fert_rates, fert_rates, output_dir=OUTPUT_DIR) - - output_dir = OUTPUT_DIR - # Using pyplot here until update to OG-Core mort rates plotting function - plt.plot( - np.arange(totpers), - fert_rates, - ) - plt.xlabel(r"Age $s$") - plt.ylabel(r"Fertility rate") - plt.legend(loc="upper left") - plt.text( - -5, - -0.2, - "Source: United Nations Population Prospects.", - fontsize=9, - ) - plt.tight_layout(rect=(0, 0.03, 1, 1)) - output_path = os.path.join(output_dir, "fert_rates") - plt.savefig(output_path) - plt.close() - - return fert_rates - - -def get_mort(totpers=100, min_age=0, max_age=100, graph=True): - """ - This function generates a vector of mortality rates by model period - age. - - Args: - totpers (int): total number of agent life periods (E+S), >= 3 - min_age (int): age in years at which agents are born, >= 0 - max_age (int): age in years at which agents die with certainty, - >= 4 - graph (bool): =True if want graphical output - - Returns: - mort_rates (Numpy array) mortality rates that correspond to each - period of life - infmort_rate (scalar): infant mortality rate - - """ - # Read UN data - df = get_un_data("80") - # put in vector - mort_rates_data = df.value.values - # In UN data, mortality rates for 0 year olds are the infant - # mortality rates - infmort_rate = mort_rates_data[0] - # Rebin data in the case that model period not equal to one calendar - # year - mort_rates = pop_rebin(mort_rates_data[1:], totpers) - - # Mortality rate in last period is set to 1 - mort_rates[-1] = 1 - - if graph: - output_dir = OUTPUT_DIR - # Using pyplot here until update to OG-Core mort rates plotting function - plt.plot( - df.age.values, - mort_rates_data, - ) - plt.xlabel(r"Age $s$") - plt.ylabel(r"Mortality rate $\rho_{s}$") - plt.legend(loc="upper left") - plt.text( - -5, - -0.2, - "Source: United Nations Population Prospects.", - fontsize=9, - ) - plt.tight_layout(rect=(0, 0.03, 1, 1)) - # Save or return figure - if output_dir: - output_path = os.path.join(output_dir, "mort_rates") - plt.savefig(output_path) - plt.close() - else: - plt.show() - - return mort_rates, infmort_rate - - -def pop_rebin(curr_pop_dist, totpers_new): - """ - For cases in which totpers (E+S) is less than the number of periods - in the population distribution data, this function calculates a new - population distribution vector with totpers (E+S) elements. - - Args: - curr_pop_dist (Numpy array): population distribution over N - periods - totpers_new (int): number of periods to which we are - transforming the population distribution, >= 3 - - Returns: - curr_pop_new (Numpy array): new population distribution over - totpers (E+S) periods that approximates curr_pop_dist - - """ - # Number of periods in original data - assert totpers_new >= 3 - # Number of periods in original data - totpers_orig = len(curr_pop_dist) - if int(totpers_new) == totpers_orig: - curr_pop_new = curr_pop_dist - elif int(totpers_new) < totpers_orig: - num_sub_bins = float(10000) - curr_pop_sub = np.repeat( - np.float64(curr_pop_dist) / num_sub_bins, num_sub_bins - ) - len_subbins = (np.float64(totpers_orig * num_sub_bins)) / totpers_new - curr_pop_new = np.zeros(totpers_new, dtype=np.float64) - end_sub_bin = 0 - for i in range(totpers_new): - beg_sub_bin = int(end_sub_bin) - end_sub_bin = int(np.rint((i + 1) * len_subbins)) - curr_pop_new[i] = curr_pop_sub[beg_sub_bin:end_sub_bin].sum() - # Return curr_pop_new to single precision float (float32) - # datatype - curr_pop_new = np.float32(curr_pop_new) - - return curr_pop_new - - -def get_imm_rates(totpers=100, min_age=0, max_age=100): - """ - Calculate immigration rates by age as a residual given population - levels in different periods, then output average calculated - immigration rate. We have to replace the first mortality rate in - this function in order to adjust the first implied immigration rate - - Args: - totpers (int): total number of agent life periods (E+S), >= 3 - min_age (int): age in years at which agents are born, >= 0 - max_age (int): age in years at which agents die with certainty, - >= 4 - graph (bool): =True if want graphical output - - Returns: - imm_rates (Numpy array):immigration rates that correspond to - each period of life, length E+S - - """ - # Read UN data - start_year = 2016 - num_years = 4 - end_year = start_year + num_years - 1 - df = get_un_data("47", start_year=start_year, end_year=end_year) - - # separate pop dist by year and put into dictionary of arrays - pop_dict = {} - for t in range(num_years): - pop_dist = df[ - (df.year == start_year + t) & (df.age <= 100) & (df.age > 0) - ].value.values - pop_dict[t] = pop_rebin(pop_dist, totpers) - pop_dict[t] = pop_dist - - # Create num_years - 1 years of estimated immigration rates for youngest age - # individuals - imm_mat = np.zeros((num_years - 1, totpers)) - pop_list = [] - for t in range(num_years): - pop_list.append(pop_dict[t][0]) - pop11vec = np.array(pop_list[:-1]) - pop21vec = np.array(pop_list[1:]) - fert_rates = get_fert(totpers, min_age, max_age, False) - mort_rates, infmort_rate = get_mort(totpers, min_age, max_age, False) - newbornvec = np.dot( - fert_rates, np.vstack((pop_dict[0], pop_dict[1], pop_dict[2])).T - ) - imm_mat[:, 0] = (pop21vec - (1 - infmort_rate) * newbornvec) / pop11vec - # Estimate num_years - 1 years of immigration rates for all other-aged - # individuals - pop_mat_dict = {} - pop_mat_dict[0] = np.vstack( - (pop_dict[0][:-1], pop_dict[1][:-1], pop_dict[2][:-1]) - ) - pop_mat_dict[1] = np.vstack( - (pop_dict[0][1:], pop_dict[1][1:], pop_dict[2][1:]) - ) - pop_mat_dict[2] = np.vstack( - (pop_dict[1][1:], pop_dict[2][1:], pop_dict[3][1:]) - ) - mort_mat = np.tile(mort_rates[:-1], (num_years - 1, 1)) - imm_mat[:, 1:] = ( - pop_mat_dict[2] - (1 - mort_mat) * pop_mat_dict[0] - ) / pop_mat_dict[1] - # Final estimated immigration rates are the averages over 3 years - imm_rates = imm_mat.mean(axis=0) - # replace highest agg imm rate bc it doesn't make sense - # imm_rates[-1] = imm_rates[-2] - - output_dir = OUTPUT_DIR - # Using pyplot here until update to OG-Core mort rates plotting function - plt.plot( - np.arange(totpers), - imm_rates, - label="Estimated", - ) - plt.xlabel(r"Age $s$") - plt.ylabel(r"Immigration Rates") - plt.legend(loc="upper left") - plt.text( - -5, - -0.2, - "Source: United Nations Population Prospects.", - fontsize=9, - ) - plt.tight_layout(rect=(0, 0.03, 1, 1)) - output_path = os.path.join(output_dir, "imm_rates_w_un_data.png") - plt.savefig(output_path) - plt.close() - - return imm_rates - - -def immsolve(imm_rates, *args): - """ - This function generates a vector of errors representing the - difference in two consecutive periods stationary population - distributions. This vector of differences is the zero-function - objective used to solve for the immigration rates vector, similar to - the original immigration rates vector from get_imm_rates(), that - sets the steady-state population distribution by age equal to the - population distribution in period int(1.5*S) - - Args: - imm_rates (Numpy array):immigration rates that correspond to - each period of life, length E+S - args (tuple): (fert_rates, mort_rates, infmort_rate, omega_cur, - g_n_SS) - - Returns: - omega_errs (Numpy array): difference between omega_new and - omega_cur_pct, length E+S - - """ - fert_rates, mort_rates, infmort_rate, omega_cur_lev, g_n_SS = args - omega_cur_pct = omega_cur_lev / omega_cur_lev.sum() - totpers = len(fert_rates) - OMEGA = np.zeros((totpers, totpers)) - OMEGA[0, :] = (1 - infmort_rate) * fert_rates + np.hstack( - (imm_rates[0], np.zeros(totpers - 1)) - ) - OMEGA[1:, :-1] += np.diag(1 - mort_rates[:-1]) - OMEGA[1:, 1:] += np.diag(imm_rates[1:]) - omega_new = np.dot(OMEGA, omega_cur_pct) / (1 + g_n_SS) - omega_errs = omega_new - omega_cur_pct - - return omega_errs - - -def get_pop_objs( - E=20, - S=80, - T=320, - min_age=0, - max_age=100, - data_year=2021, - model_year=2022, - GraphDiag=True, -): - """ - This function produces the demographics objects to be used in the - OG-IND model package. - - Args: - E (int): number of model periods in which agent is not - economically active, >= 1 - S (int): number of model periods in which agent is economically - active, >= 3 - T (int): number of periods to be simulated in TPI, > 2*S - min_age (int): age in years at which agents are born, >= 0 - max_age (int): age in years at which agents die with certainty, - >= 4 - model_year (int): current year for which analysis will begin, - >= 2016 - GraphDiag (bool): =True if want graphical output and printed - diagnostics - - Returns: - pop_dict (dict): includes: - omega_path_S (Numpy array), time path of the population - distribution from the current state to the steady-state, - size T+S x S - g_n_SS (scalar): steady-state population growth rate - omega_SS (Numpy array): normalized steady-state population - distribution, length S - mort_rates (Numpy array): mortality rates that correspond to - each model period of life, length S - g_n_path (Numpy array): population growth rates over the time - path, length T + S - - """ - assert model_year >= 2011 and model_year <= 2100 - assert data_year >= 2011 and data_year <= 2100 - # need data year to be before model year to get omega_S_preTP - assert data_year < model_year - - # Get fertility, mortality, and immigration rates - # will be used to generate population distribution in future years - fert_rates = get_fert(E + S, min_age, max_age) - mort_rates, infmort_rate = get_mort(E + S, min_age, max_age) - mort_rates_S = mort_rates[-S:] - imm_rates_orig = get_imm_rates(E + S, min_age, max_age) - OMEGA_orig = np.zeros((E + S, E + S)) - OMEGA_orig[0, :] = (1 - infmort_rate) * fert_rates + np.hstack( - (imm_rates_orig[0], np.zeros(E + S - 1)) - ) - OMEGA_orig[1:, :-1] += np.diag(1 - mort_rates[:-1]) - OMEGA_orig[1:, 1:] += np.diag(imm_rates_orig[1:]) - - # Solve for steady-state population growth rate and steady-state - # population distribution by age using eigenvalue and eigenvector - # decomposition - eigvalues, eigvectors = np.linalg.eig(OMEGA_orig) - g_n_SS = (eigvalues[np.isreal(eigvalues)].real).max() - 1 - eigvec_raw = eigvectors[ - :, (eigvalues[np.isreal(eigvalues)].real).argmax() - ].real - omega_SS_orig = eigvec_raw / eigvec_raw.sum() - - # Generate time path of the nonstationary population distribution - omega_path_lev = np.zeros((E + S, T + S)) - pop_data = get_un_data("47", start_year=data_year, end_year=data_year) - # TODO: allow one to read in multiple years of UN forecast then - # extrapolate from the end of that - pop_data_sample = pop_data[ - (pop_data["age"] >= min_age - 1) & (pop_data["age"] <= max_age - 1) - ] - pop = pop_data_sample.value.values - # Generate the current population distribution given that E+S might - # be less than max_age-min_age+1 - age_per_EpS = np.arange(1, E + S + 1) - pop_EpS = pop_rebin(pop, E + S) - pop_pct = pop_EpS / pop_EpS.sum() - # Age the data to the model year - pop_curr = pop_EpS.copy() - pop_next = np.dot(OMEGA_orig, pop_curr) - g_n_curr = (pop_next[-S:].sum() - pop_curr[-S:].sum()) / pop_curr[ - -S: - ].sum() # g_n in data year - pop_past = pop_curr - for per in range(model_year - data_year): - pop_next = np.dot(OMEGA_orig, pop_curr) - g_n_curr = (pop_next[-S:].sum() - pop_curr[-S:].sum()) / pop_curr[ - -S: - ].sum() - pop_past = pop_curr - pop_curr = pop_next - # Generate time path of the population distribution - omega_path_lev[:, 0] = pop_curr.copy() - for per in range(1, T + S): - pop_next = np.dot(OMEGA_orig, pop_curr) - omega_path_lev[:, per] = pop_next.copy() - pop_curr = pop_next.copy() - - # Force the population distribution after 1.5*S periods to be the - # steady-state distribution by adjusting immigration rates, holding - # constant mortality, fertility, and SS growth rates - imm_tol = 1e-14 - fixper = int(1.5 * S) - omega_SSfx = omega_path_lev[:, fixper] / omega_path_lev[:, fixper].sum() - imm_objs = ( - fert_rates, - mort_rates, - infmort_rate, - omega_path_lev[:, fixper], - g_n_SS, - ) - imm_fulloutput = opt.fsolve( - immsolve, - imm_rates_orig, - args=(imm_objs), - full_output=True, - xtol=imm_tol, - ) - imm_rates_adj = imm_fulloutput[0] - imm_diagdict = imm_fulloutput[1] - omega_path_S = omega_path_lev[-S:, :] / np.tile( - omega_path_lev[-S:, :].sum(axis=0), (S, 1) - ) - omega_path_S[:, fixper:] = np.tile( - omega_path_S[:, fixper].reshape((S, 1)), (1, T + S - fixper) - ) - g_n_path = np.zeros(T + S) - g_n_path[0] = g_n_curr.copy() - g_n_path[1:] = ( - omega_path_lev[-S:, 1:].sum(axis=0) - - omega_path_lev[-S:, :-1].sum(axis=0) - ) / omega_path_lev[-S:, :-1].sum(axis=0) - g_n_path[fixper + 1 :] = g_n_SS - omega_S_preTP = (pop_past.copy()[-S:]) / (pop_past.copy()[-S:].sum()) - imm_rates_mat = np.hstack( - ( - np.tile(np.reshape(imm_rates_orig[E:], (S, 1)), (1, fixper)), - np.tile( - np.reshape(imm_rates_adj[E:], (S, 1)), (1, T + S - fixper) - ), - ) - ) - - if GraphDiag: - # Check whether original SS population distribution is close to - # the period-T population distribution - omegaSSmaxdif = np.absolute( - omega_SS_orig - (omega_path_lev[:, T] / omega_path_lev[:, T].sum()) - ).max() - if omegaSSmaxdif > 0.0003: - print( - "POP. WARNING: Max. abs. dist. between original SS " - + "pop. dist'n and period-T pop. dist'n is greater than" - + " 0.0003. It is " - + str(omegaSSmaxdif) - + "." - ) - else: - print( - "POP. SUCCESS: orig. SS pop. dist is very close to " - + "period-T pop. dist'n. The maximum absolute " - + "difference is " - + str(omegaSSmaxdif) - + "." - ) - - # Plot the adjusted steady-state population distribution versus - # the original population distribution. The difference should be - # small - omegaSSvTmaxdiff = np.absolute(omega_SS_orig - omega_SSfx).max() - if omegaSSvTmaxdiff > 0.0003: - print( - "POP. WARNING: The maximum absolute difference " - + "between any two corresponding points in the original" - + " and adjusted steady-state population " - + "distributions is" - + str(omegaSSvTmaxdiff) - + ", " - + "which is greater than 0.0003." - ) - else: - print( - "POP. SUCCESS: The maximum absolute difference " - + "between any two corresponding points in the original" - + " and adjusted steady-state population " - + "distributions is " - + str(omegaSSvTmaxdiff) - ) - - # Print whether or not the adjusted immigration rates solved the - # zero condition - immtol_solved = np.absolute(imm_diagdict["fvec"].max()) < imm_tol - if immtol_solved: - print( - "POP. SUCCESS: Adjusted immigration rates solved " - + "with maximum absolute error of " - + str(np.absolute(imm_diagdict["fvec"].max())) - + ", which is less than the tolerance of " - + str(imm_tol) - ) - else: - print( - "POP. WARNING: Adjusted immigration rates did not " - + "solve. Maximum absolute error of " - + str(np.absolute(imm_diagdict["fvec"].max())) - + " is greater than the tolerance of " - + str(imm_tol) - ) - - # Test whether the steady-state growth rates implied by the - # adjusted OMEGA matrix equals the steady-state growth rate of - # the original OMEGA matrix - OMEGA2 = np.zeros((E + S, E + S)) - OMEGA2[0, :] = (1 - infmort_rate) * fert_rates + np.hstack( - (imm_rates_adj[0], np.zeros(E + S - 1)) - ) - OMEGA2[1:, :-1] += np.diag(1 - mort_rates[:-1]) - OMEGA2[1:, 1:] += np.diag(imm_rates_adj[1:]) - eigvalues2, eigvectors2 = np.linalg.eig(OMEGA2) - g_n_SS_adj = (eigvalues[np.isreal(eigvalues2)].real).max() - 1 - if np.max(np.absolute(g_n_SS_adj - g_n_SS)) > 10 ** (-8): - print( - "FAILURE: The steady-state population growth rate" - + " from adjusted OMEGA is different (diff is " - + str(g_n_SS_adj - g_n_SS) - + ") than the steady-" - + "state population growth rate from the original" - + " OMEGA." - ) - elif np.max(np.absolute(g_n_SS_adj - g_n_SS)) <= 10 ** (-8): - print( - "SUCCESS: The steady-state population growth rate" - + " from adjusted OMEGA is close to (diff is " - + str(g_n_SS_adj - g_n_SS) - + ") the steady-" - + "state population growth rate from the original" - + " OMEGA." - ) - - # Do another test of the adjusted immigration rates. Create the - # new OMEGA matrix implied by the new immigration rates. Plug in - # the adjusted steady-state population distribution. Hit is with - # the new OMEGA transition matrix and it should return the new - # steady-state population distribution - omega_new = np.dot(OMEGA2, omega_SSfx) - omega_errs = np.absolute(omega_new - omega_SSfx) - print( - "The maximum absolute difference between the adjusted " - + "steady-state population distribution and the " - + "distribution generated by hitting the adjusted OMEGA " - + "transition matrix is " - + str(omega_errs.max()) - ) - - # Plot the original immigration rates versus the adjusted - # immigration rates - immratesmaxdiff = np.absolute(imm_rates_orig - imm_rates_adj).max() - print( - "The maximum absolute distance between any two points " - + "of the original immigration rates and adjusted " - + "immigration rates is " - + str(immratesmaxdiff) - ) - - # plots - pp.plot_omega_fixed( - age_per_EpS, omega_SS_orig, omega_SSfx, E, S, output_dir=OUTPUT_DIR - ) - pp.plot_imm_fixed( - age_per_EpS, - imm_rates_orig, - imm_rates_adj, - E, - S, - output_dir=OUTPUT_DIR, - ) - pp.plot_population_path( - age_per_EpS, - pop_pct, - omega_path_lev, - omega_SSfx, - model_year, - E, - S, - output_dir=OUTPUT_DIR, - ) - - # return omega_path_S, g_n_SS, omega_SSfx, - # mort_rates_S, and g_n_path - pop_dict = { - "omega": omega_path_S.T, - "g_n_ss": g_n_SS, - "omega_SS": omega_SSfx[-S:] / omega_SSfx[-S:].sum(), - "rho": [mort_rates_S], - "g_n": g_n_path, - "imm_rates": imm_rates_mat.T, - "omega_S_preTP": omega_S_preTP, - } - - return pop_dict From 6a4e45174ce937375479b21c5dd04c89d7ab3a46 Mon Sep 17 00:00:00 2001 From: jdebacker Date: Thu, 14 Mar 2024 17:40:58 -0400 Subject: [PATCH 2/8] fix link --- PSL_catalog.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PSL_catalog.json b/PSL_catalog.json index 1d44fd6..be53255 100644 --- a/PSL_catalog.json +++ b/PSL_catalog.json @@ -1,6 +1,6 @@ { "name": "OG-IND", - "img": "https://github.com/Revenue-Academy/OG-IND/blob/main/docs/OG-IND_logo.png?raw=true", + "img": "https://raw.githubusercontent.com/Revenue-Academy/OG-IND/main/docs/OG-IND_logo.png", "banner_title": "OG-IND", "banner_subtitle": "A large-scale, open source, overlapping generations model of India", "detailed_description": "OG-IND is an overlapping-generations (OG) model that allows for dynamic general equilibrium analysis of fiscal policy for India. OG-IND is built on the OG-Core framework. The model output includes changes in macroeconomic aggregates (GDP, investment, consumption), wages, interest rates, and the stream of tax revenues over time. Regularly updated documentation of the model theory--its output, and solution method--and the Python API is available at https://pslmodels.github.io/OG-Core and documentation of the specific Indian calibration of the model will be available soon.", From 93f6786f12a88b52915d7b42c14014dd5fc30a6e Mon Sep 17 00:00:00 2001 From: jdebacker Date: Thu, 14 Mar 2024 17:41:06 -0400 Subject: [PATCH 3/8] change pin --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 6bd39dd..ec4d3ea 100644 --- a/environment.yml +++ b/environment.yml @@ -2,7 +2,7 @@ name: ogind-dev channels: - conda-forge dependencies: -- python>=3.7.7, <3.12 +- python<3.12 - ipython - setuptools - psutil From 4ce1296d10f9e872e1bad1608b22194b81ddee4f Mon Sep 17 00:00:00 2001 From: jdebacker Date: Thu, 14 Mar 2024 17:41:28 -0400 Subject: [PATCH 4/8] use ogcore demographics --- ogind/calibrate.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ogind/calibrate.py b/ogind/calibrate.py index f2ba010..f3acb5f 100644 --- a/ogind/calibrate.py +++ b/ogind/calibrate.py @@ -1,7 +1,7 @@ from ogind import demographics, macro_params, get_micro_data, income import os import numpy as np -from ogcore import txfunc +from ogcore import txfunc, demographics from ogcore.utils import safe_read_pickle, mkdirs @@ -55,14 +55,14 @@ def __init__( # demographics self.demographic_params = demographics.get_pop_objs( - p.E, - p.S, - p.T, - 1, - 100, - p.start_year - 1, - p.start_year, - GraphDiag=True, + p.E, + p.S, + p.T, + 0, + 99, + country_id="356", + initial_data_year=p.start_year - 1, + final_data_year=p.start_year, ) # earnings profiles From e1ecde6b91c763b831d1fcbe5c2242a213eda22d Mon Sep 17 00:00:00 2001 From: jdebacker Date: Thu, 14 Mar 2024 17:41:50 -0400 Subject: [PATCH 5/8] remove tests of demographics --- ogind/tests/test_demographics.py | 145 ------------------------------- 1 file changed, 145 deletions(-) delete mode 100644 ogind/tests/test_demographics.py diff --git a/ogind/tests/test_demographics.py b/ogind/tests/test_demographics.py deleted file mode 100644 index 2b8bb14..0000000 --- a/ogind/tests/test_demographics.py +++ /dev/null @@ -1,145 +0,0 @@ -import numpy as np -from ogind import demographics - - -def test_get_pop_objs(): - """ - Test of the that omega_SS and the last period of omega_path_S are - close to each other. - """ - E = 20 - S = 80 - T = int(round(4.0 * S)) - start_year = 2019 - - pop_dict = demographics.get_pop_objs( - E, S, T, 1, 100, start_year, start_year + 1, False - ) - - assert np.allclose(pop_dict["omega_SS"], pop_dict["omega"][-1, :]) - - -def test_omega_sum1(): - """ - Test of the that omega sums to one in each period - """ - E = 20 - S = 80 - T = int(round(4.0 * S)) - start_year = 2019 - - pop_dict = demographics.get_pop_objs( - E, S, T, 1, 100, start_year, start_year + 1, False - ) - - assert np.allclose(pop_dict["omega_SS"].sum(), 1.0) - assert np.allclose(pop_dict["omega"].sum(axis=1).max(), 1.0) - assert np.allclose(pop_dict["omega"].sum(axis=1).min(), 1.0) - - -def test_pop_smooth(): - """ - Test that population growth rates evolve smoothly. - """ - E = 20 - S = 80 - T = int(round(4.0 * S)) - start_year = 2019 - - pop_dict = demographics.get_pop_objs( - E, S, T, 1, 100, start_year, start_year + 1, False - ) - - assert np.any( - np.absolute(pop_dict["omega"][:-1, :] - pop_dict["omega"][1:, :]) - < 0.0001 - ) - assert np.any( - np.absolute(pop_dict["g_n"][:-1] - pop_dict["g_n"][1:]) < 0.0001 - ) - - -def test_imm_smooth(): - """ - Test that population growth rates evolve smoothly. - """ - E = 20 - S = 80 - T = int(round(4.0 * S)) - start_year = 2019 - - pop_dict = demographics.get_pop_objs( - E, S, T, 1, 100, start_year, start_year + 1, False - ) - - assert np.any( - np.absolute( - pop_dict["imm_rates"][:-1, :] - pop_dict["imm_rates"][1:, :] - ) - < 0.0001 - ) - - -def test_get_fert(): - """ - Test of function to get fertility rates from data - """ - S = 100 - fert_rates = demographics.get_fert(S, 0, 100, graph=False) - assert fert_rates.shape[0] == S - - -def test_get_mort(): - """ - Test of function to get mortality rates from data - """ - S = 100 - mort_rates, _ = demographics.get_mort(S, 0, 100, graph=False) - assert mort_rates.shape[0] == S - - -def test_get_mort_lt1(): - """ - Test that mortality rates don't exceed 1 - """ - S = 100 - mort_rates, _ = demographics.get_mort(S, 0, 100, graph=False) - assert mort_rates.max() <= 1.0 - - -def test_get_mort_lastperiod(): - """ - Test that mortality rate in last period is 1 - """ - S = 100 - mort_rates, _ = demographics.get_mort(S, 0, 100, graph=False) - assert np.allclose(mort_rates[-1], 1.0) - - -def test_infant_mort(): - """ - Test of function to get mortality rates from data - """ - _, infmort_rate = demographics.get_mort(100, 0, 100, graph=False) - # check that infant mortality equals rate hardcoded into - # demographics.py - assert np.allclose(infmort_rate, 0.02693456) - - -def test_pop_rebin(): - """ - Test of population rebin function - """ - curr_pop_dist = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) - totpers_new = 5 - rebinned_data = demographics.pop_rebin(curr_pop_dist, totpers_new) - assert rebinned_data.shape[0] == totpers_new - - -def test_get_imm_rates(): - """ - Test of function to solve for immigration rates from population data - """ - S = 100 - imm_rates = demographics.get_imm_rates(S, 0, 100) - assert imm_rates.shape[0] == S From 981f6dd7d1c1e32e26214a71e88e1bfadcf226ec Mon Sep 17 00:00:00 2001 From: jdebacker Date: Thu, 14 Mar 2024 22:24:18 -0400 Subject: [PATCH 6/8] remove ind demog import --- ogind/calibrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ogind/calibrate.py b/ogind/calibrate.py index f3acb5f..4426fe9 100644 --- a/ogind/calibrate.py +++ b/ogind/calibrate.py @@ -1,4 +1,4 @@ -from ogind import demographics, macro_params, get_micro_data, income +from ogind import macro_params, get_micro_data, income import os import numpy as np from ogcore import txfunc, demographics From 8828377811f7d019e3fb588aa2fee6c677491d3b Mon Sep 17 00:00:00 2001 From: jdebacker Date: Thu, 14 Mar 2024 22:24:48 -0400 Subject: [PATCH 7/8] use just 2d of e --- ogind/income.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ogind/income.py b/ogind/income.py index db2f90b..0d12fef 100644 --- a/ogind/income.py +++ b/ogind/income.py @@ -90,7 +90,7 @@ def f( gini_usa_data = 41.5 # Find the model implied Gini for the USA gini_usa_model = utils.Inequality( - usa_params.e, + usa_params.e[0, :, :], usa_params.omega_SS, usa_params.lambdas, usa_params.S, @@ -100,7 +100,7 @@ def f( x = opt.root_scalar( f, args=( - usa_params.e, + usa_params.e[0, :, :], usa_params.omega_SS, usa_params.lambdas, gini_to_match, @@ -112,7 +112,7 @@ def f( xtol=1e-10, ) a = x.root - e_new = usa_params.e * np.exp(a * usa_params.e) + e_new = usa_params.e[0, :, :] * np.exp(a * usa_params.e[0, :, :]) emat_new_scaled = ( e_new / ( From 53254e596b4fb8e236308d4eb029616ac826f111 Mon Sep 17 00:00:00 2001 From: jdebacker Date: Fri, 15 Mar 2024 10:31:33 -0400 Subject: [PATCH 8/8] format --- ogind/calibrate.py | 16 ++++++++-------- ogind/get_micro_data.py | 1 + ogind/tests/test_run_example.py | 1 + 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ogind/calibrate.py b/ogind/calibrate.py index 4426fe9..116ccbd 100644 --- a/ogind/calibrate.py +++ b/ogind/calibrate.py @@ -55,14 +55,14 @@ def __init__( # demographics self.demographic_params = demographics.get_pop_objs( - p.E, - p.S, - p.T, - 0, - 99, - country_id="356", - initial_data_year=p.start_year - 1, - final_data_year=p.start_year, + p.E, + p.S, + p.T, + 0, + 99, + country_id="356", + initial_data_year=p.start_year - 1, + final_data_year=p.start_year, ) # earnings profiles diff --git a/ogind/get_micro_data.py b/ogind/get_micro_data.py index 1efbd25..0ab8688 100644 --- a/ogind/get_micro_data.py +++ b/ogind/get_micro_data.py @@ -4,6 +4,7 @@ model for India (The taxcalc package from the India-PIT repo). ------------------------------------------------------------------------ """ + from taxcalc import Records, Calculator, Policy, GSTRecords, CorpRecords from pandas import DataFrame from dask import delayed, compute diff --git a/ogind/tests/test_run_example.py b/ogind/tests/test_run_example.py index 43d66a4..5d710b8 100644 --- a/ogind/tests/test_run_example.py +++ b/ogind/tests/test_run_example.py @@ -3,6 +3,7 @@ work by making sure that it does not break (is still running) after 5 minutes (300 seconds). """ + import multiprocessing import time import os