diff --git a/example_calm/run_populationsim.py b/example_calm/run_populationsim.py index 2c8ffc7..ddb1d12 100644 --- a/example_calm/run_populationsim.py +++ b/example_calm/run_populationsim.py @@ -12,7 +12,7 @@ from activitysim.core.config import handle_standard_args from activitysim.core.tracing import print_elapsed_time -from populationsim.util import setting +from activitysim.core.config import setting from populationsim import lp from populationsim import multi_integerizer diff --git a/example_calm_repop/run_populationsim.py b/example_calm_repop/run_populationsim.py index 2c8ffc7..ddb1d12 100644 --- a/example_calm_repop/run_populationsim.py +++ b/example_calm_repop/run_populationsim.py @@ -12,7 +12,7 @@ from activitysim.core.config import handle_standard_args from activitysim.core.tracing import print_elapsed_time -from populationsim.util import setting +from activitysim.core.config import setting from populationsim import lp from populationsim import multi_integerizer diff --git a/example_survey_weighting/run_populationsim.py b/example_survey_weighting/run_populationsim.py index d1bb23e..85e5979 100755 --- a/example_survey_weighting/run_populationsim.py +++ b/example_survey_weighting/run_populationsim.py @@ -12,7 +12,7 @@ from activitysim.core.config import handle_standard_args from activitysim.core.tracing import print_elapsed_time -from populationsim.util import setting +from activitysim.core.config import setting from populationsim import lp from populationsim import multi_integerizer diff --git a/example_test/run_populationsim.py b/example_test/run_populationsim.py index 6295582..d305d20 100644 --- a/example_test/run_populationsim.py +++ b/example_test/run_populationsim.py @@ -11,7 +11,7 @@ from activitysim.core.config import handle_standard_args from populationsim import steps -from populationsim.util import setting +from activitysim.core.config import setting from populationsim import lp from populationsim import multi_integerizer diff --git a/populationsim/balancer.py b/populationsim/balancer.py index 68e2836..49e0126 100644 --- a/populationsim/balancer.py +++ b/populationsim/balancer.py @@ -10,7 +10,7 @@ import pandas as pd -from .util import setting +from activitysim.core.config import setting logger = logging.getLogger(__name__) diff --git a/populationsim/integerizer.py b/populationsim/integerizer.py index 4483e91..0abdd98 100644 --- a/populationsim/integerizer.py +++ b/populationsim/integerizer.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from .util import setting +from activitysim.core.config import setting from .lp import get_single_integerizer from .lp import STATUS_SUCCESS diff --git a/populationsim/lp.py b/populationsim/lp.py index be1f0ae..9ec225b 100644 --- a/populationsim/lp.py +++ b/populationsim/lp.py @@ -4,7 +4,7 @@ import logging -from .util import setting +from activitysim.core.config import setting from . import lp_cvx from . import lp_ortools diff --git a/populationsim/lp_cvx.py b/populationsim/lp_cvx.py index 9ba119d..578fa32 100644 --- a/populationsim/lp_cvx.py +++ b/populationsim/lp_cvx.py @@ -5,7 +5,7 @@ import logging import numpy as np -from .util import setting +from activitysim.core.config import setting logger = logging.getLogger(__name__) diff --git a/populationsim/multi_integerizer.py b/populationsim/multi_integerizer.py index 5ba67de..cf0281c 100644 --- a/populationsim/multi_integerizer.py +++ b/populationsim/multi_integerizer.py @@ -12,7 +12,7 @@ import pandas as pd -from .util import setting +from activitysim.core.config import setting from .lp import get_simul_integerizer from .lp import STATUS_SUCCESS diff --git a/populationsim/simul_balancer.py b/populationsim/simul_balancer.py index 916fba4..c745871 100644 --- a/populationsim/simul_balancer.py +++ b/populationsim/simul_balancer.py @@ -11,7 +11,7 @@ import pandas as pd -from .util import setting +from activitysim.core.config import setting logger = logging.getLogger(__name__) diff --git a/populationsim/steps/__init__.py b/populationsim/steps/__init__.py index bfe6b2a..a5bfb9b 100644 --- a/populationsim/steps/__init__.py +++ b/populationsim/steps/__init__.py @@ -1,7 +1,10 @@ -from __future__ import absolute_import # PopulationSim # See full license in LICENSE.txt. +from __future__ import absolute_import + +from activitysim.core import inject as _inject + from . import input_pre_processor from . import setup_data_structures from . import initial_seed_balancing @@ -11,7 +14,14 @@ from . import sub_balancing from . import expand_households from . import summarize -from . import write_tables from . import write_synthetic_population - from . import repop_balancing + +from activitysim.core.steps.output import write_data_dictionary +from activitysim.core.steps.output import write_tables + + +@_inject.injectable(cache=True) +def preload_injectables(): + _inject.add_step('write_data_dictionary', write_data_dictionary) + _inject.add_step('write_tables', write_tables) diff --git a/populationsim/steps/expand_households.py b/populationsim/steps/expand_households.py index 11bfff5..a7846b0 100644 --- a/populationsim/steps/expand_households.py +++ b/populationsim/steps/expand_households.py @@ -11,7 +11,7 @@ from activitysim.core import pipeline from activitysim.core import inject -from populationsim.util import setting +from activitysim.core.config import setting from .helper import get_control_table from .helper import get_weight_table diff --git a/populationsim/steps/final_seed_balancing.py b/populationsim/steps/final_seed_balancing.py index a2c7ef9..398e33b 100644 --- a/populationsim/steps/final_seed_balancing.py +++ b/populationsim/steps/final_seed_balancing.py @@ -8,7 +8,7 @@ from activitysim.core import inject -from populationsim.util import setting +from activitysim.core.config import setting from ..balancer import do_balancing from .helper import get_control_table diff --git a/populationsim/steps/initial_seed_balancing.py b/populationsim/steps/initial_seed_balancing.py index b5267b3..b75cb97 100644 --- a/populationsim/steps/initial_seed_balancing.py +++ b/populationsim/steps/initial_seed_balancing.py @@ -8,7 +8,7 @@ from activitysim.core import inject from activitysim.core import pipeline -from populationsim.util import setting +from activitysim.core.config import setting from ..balancer import do_balancing diff --git a/populationsim/steps/input_pre_processor.py b/populationsim/steps/input_pre_processor.py index cdd7675..722f93b 100644 --- a/populationsim/steps/input_pre_processor.py +++ b/populationsim/steps/input_pre_processor.py @@ -7,12 +7,11 @@ import pandas as pd import numpy as np -from activitysim.core import inject -from activitysim.core import pipeline - -from populationsim.util import data_dir_from_settings -from populationsim.util import setting - +from activitysim.core import ( + inject, + config, + input +) logger = logging.getLogger(__name__) @@ -30,8 +29,8 @@ def input_pre_processor(): unless an alternate table_list name is specified as a model step argument 'table_list'. (This allows alternate/additional input files to be read for repop) - In the case of repop, this step is being run after an initial populationsim run has - completed, in which case the input_table_list may specify replacement tables. + In the case of repop, this step is being run after an initial run has completed, + in which case the input_table_list may specify replacement tables. (e.g. lowest geography controls that will replace the previous low controls dataframe.) See input_table_list in settings.yaml in the example folder for a working example @@ -39,7 +38,7 @@ def input_pre_processor(): +--------------+----------------------------------------------------------+ | key | description | +==============+=========================================+================+ - | tablename | ame of pipeline table in which to store dataframe | + | tablename | name of pipeline table in which to store dataframe | +--------------+----------------------------------------------------------+ | filename | name of csv file to read (in data_dir) | +--------------+----------------------------------------------------------+ @@ -54,85 +53,18 @@ def input_pre_processor(): # alternate table list name may have been provided as a model argument table_list_name = inject.get_step_arg('table_list', default='input_table_list') - table_list = setting(table_list_name) - assert table_list is not None, "table list '%s' not in settings." % table_list_name + table_list = config.setting(table_list_name) - data_dir = data_dir_from_settings() + assert table_list is not None, "no table list '%s' found in settings." % table_list_name + + logger.info('Using table list: %s' % table_list) for table_info in table_list: - tablename = table_info['tablename'] - - logger.info("input_pre_processor processing %s" % tablename) - - # read the csv file - data_filename = table_info.get('filename', None) - data_file_path = os.path.join(data_dir, data_filename) - if not os.path.exists(data_file_path): - raise RuntimeError("input_pre_processor %s - input file not found: %s" - % (tablename, data_file_path, )) - - logger.info("Reading csv file %s" % data_file_path) - df = read_csv_with_fallback_encoding(data_file_path) - - logger.info("input file columns: %s" % df.columns.values) - - drop_columns = table_info.get('drop_columns', None) - if drop_columns: - for c in drop_columns: - logger.info("dropping column '%s'" % c) - del df[c] - - # rename columns - column_map = table_info.get('column_map', None) - if column_map: - df.rename(columns=column_map, inplace=True) - - # set index - index_col = table_info.get('index_col', None) - if index_col is not None: - if index_col in df.columns: - assert not df.duplicated(index_col).any() - df.set_index(index_col, inplace=True) - else: - df.index.names = [index_col] - - # read expression file - # expression_filename = table_info.get('expression_filename', None) - # if expression_filename: - # assert False - # expression_file_path = os.path.join(configs_dir, expression_filename) - # if not os.path.exists(expression_file_path): - # raise RuntimeError("input_pre_processor %s - expression file not found: %s" - # % (table, expression_file_path, )) - # spec = assign.read_assignment_spec(expression_file_path) - # - # df_alias = table_info.get('df_alias', table) - # - # locals_d = {} - # - # results, trace_results, trace_assigned_locals \ - # = assign.assign_variables(spec, df, locals_d, df_alias=df_alias) - # # for column in results.columns: - # # orca.add_column(table, column, results[column]) - # - # df = pd.concat([df, results], axis=1) - - logger.info("adding table %s" % tablename) + tablename = table_info.get('tablename') + df = input.read_from_table_info(table_info) + logger.info('registering table %s' % tablename) # add (or replace) pipeline table repop = inject.get_step_arg('repop', default=False) inject.add_table(tablename, df, replace=repop) - - -def read_csv_with_fallback_encoding(filepath): - """read a CSV to a pandas DataFrame using default utf-8 encoding, - but try alternate Windows-compatible cp1252 if unicode fails - - """ - try: - return pd.read_csv(filepath, comment='#') - except UnicodeDecodeError: - logger.warning( - "Reading %s with default utf-8 encoding failed, trying cp1252 instead", filepath) - return pd.read_csv(filepath, comment='#', encoding='cp1252') diff --git a/populationsim/steps/integerize_final_seed_weights.py b/populationsim/steps/integerize_final_seed_weights.py index ffea4a6..a93d1fb 100644 --- a/populationsim/steps/integerize_final_seed_weights.py +++ b/populationsim/steps/integerize_final_seed_weights.py @@ -13,7 +13,7 @@ from .helper import get_control_table from .helper import weight_table_name from .helper import get_weight_table -from populationsim.util import setting +from activitysim.core.config import setting logger = logging.getLogger(__name__) diff --git a/populationsim/steps/repop_balancing.py b/populationsim/steps/repop_balancing.py index 44c8ee8..cc89a42 100644 --- a/populationsim/steps/repop_balancing.py +++ b/populationsim/steps/repop_balancing.py @@ -7,7 +7,7 @@ from activitysim.core import inject -from populationsim.util import setting +from activitysim.core.config import setting from .helper import get_control_table from .helper import weight_table_name diff --git a/populationsim/steps/setup_data_structures.py b/populationsim/steps/setup_data_structures.py index 2bb4fd4..80da217 100644 --- a/populationsim/steps/setup_data_structures.py +++ b/populationsim/steps/setup_data_structures.py @@ -18,7 +18,7 @@ from .helper import get_control_table from .helper import get_control_data_table -from populationsim.util import setting +from activitysim.core.config import setting logger = logging.getLogger(__name__) diff --git a/populationsim/steps/sub_balancing.py b/populationsim/steps/sub_balancing.py index de5d8dd..cc29c6c 100644 --- a/populationsim/steps/sub_balancing.py +++ b/populationsim/steps/sub_balancing.py @@ -12,7 +12,7 @@ from activitysim.core import inject from activitysim.core import pipeline -from populationsim.util import setting +from activitysim.core.config import setting from .helper import get_control_table from .helper import weight_table_name diff --git a/populationsim/steps/summarize.py b/populationsim/steps/summarize.py index eda857e..be771d4 100644 --- a/populationsim/steps/summarize.py +++ b/populationsim/steps/summarize.py @@ -12,7 +12,7 @@ from .helper import get_control_table from .helper import get_weight_table -from populationsim.util import setting +from activitysim.core.config import setting logger = logging.getLogger(__name__) diff --git a/populationsim/steps/write_synthetic_population.py b/populationsim/steps/write_synthetic_population.py index 2078a80..5045005 100644 --- a/populationsim/steps/write_synthetic_population.py +++ b/populationsim/steps/write_synthetic_population.py @@ -8,7 +8,7 @@ from activitysim.core import pipeline from activitysim.core import inject -from populationsim.util import setting +from activitysim.core.config import setting logger = logging.getLogger(__name__) diff --git a/populationsim/steps/write_tables.py b/populationsim/steps/write_tables.py deleted file mode 100644 index 1768d2f..0000000 --- a/populationsim/steps/write_tables.py +++ /dev/null @@ -1,94 +0,0 @@ -# PopulationSim -# See full license in LICENSE.txt. - -import logging -import os - -from activitysim.core import pipeline -from activitysim.core import inject - -from populationsim.util import setting - -logger = logging.getLogger(__name__) - - -@inject.step() -def write_tables(output_dir): - """ - Write pipeline tables as csv files (in output directory) as specified by output_tables list - in settings file. - - Pipeline tables are intermediate computational tables, not to be confused with the - synthetic population tables written by the write_synthetic_population step. - - 'output_tables' can specify either a list of output tables to include or to skip - if no output_tables list is specified, then no checkpointed tables will be written - - Intermediate tables likely to be of particular interest or utility are the controls and weights - tables for the various geographies. For example, if one of your geographies is TRACT, then: - TRACT_controls has control totals for every TRACT (and aggregated subzone) controls. - TRACT_weights has balanced_weight and integer_weight for every TRACT. - - To write all output tables EXCEPT the households and persons tables: - - :: - - output_tables: - action: skip - tables: - - households - - persons - - To write ONLY the expanded_household_ids table: - - :: - - output_tables: - action: include - tables: - - expanded_household_ids - - Parameters - ---------- - output_dir: str - - """ - - output_tables_settings_name = 'output_tables' - - output_tables_settings = setting(output_tables_settings_name) - - output_tables_list = pipeline.checkpointed_tables() - - if output_tables_settings is None: - logger.info("No output_tables specified in settings file. Nothing to write.") - return - - action = output_tables_settings.get('action') - tables = output_tables_settings.get('tables') - - if action not in ['include', 'skip']: - raise RuntimeError("expected %s action '%s' to be either 'include' or 'skip'" % - (output_tables_settings_name, action)) - - if action == 'include': - output_tables_list = tables - elif action == 'skip': - output_tables_list = [t for t in output_tables_list if t not in tables] - - # should provide option to also write checkpoints? - # output_tables_list.append("checkpoints.csv") - - for table_name in output_tables_list: - table = inject.get_table(table_name, None) - - if table is None: - logger.warn("Skipping '%s': Table not found." % table_name) - continue - - df = table.to_frame() - file_name = "%s.csv" % table_name - logger.info("writing output file %s" % file_name) - file_path = os.path.join(output_dir, file_name) - write_index = df.index.name is not None - df.to_csv(file_path, index=write_index) diff --git a/populationsim/tests/configs/settings.yaml b/populationsim/tests/configs/settings.yaml index f7273d8..95bb2f1 100644 --- a/populationsim/tests/configs/settings.yaml +++ b/populationsim/tests/configs/settings.yaml @@ -37,6 +37,7 @@ seed_geography: PUMA output_tables: action: skip + prefix: '' tables: - households - persons @@ -62,4 +63,3 @@ output_synthetic_population: - per_num - OSUTAG - OCCP - diff --git a/populationsim/tests/configs2/settings.yaml b/populationsim/tests/configs2/settings.yaml index 84a66e3..70498ea 100644 --- a/populationsim/tests/configs2/settings.yaml +++ b/populationsim/tests/configs2/settings.yaml @@ -39,6 +39,7 @@ seed_geography: PUMA output_tables: action: include + prefix: '' tables: - expanded_household_ids - summary_DISTRICT diff --git a/populationsim/tests/test_balancer.py b/populationsim/tests/test_balancer.py index c5f2bdb..ef8aa3b 100644 --- a/populationsim/tests/test_balancer.py +++ b/populationsim/tests/test_balancer.py @@ -52,7 +52,7 @@ def test_Konduri(): published_final_weights = [1.36, 25.66, 7.98, 27.79, 18.45, 8.64, 1.47, 8.64] published_weighted_sum = [ - round((incidence_table.ix[:, c] * published_final_weights).sum(), 2) + round((incidence_table.loc[:, c] * published_final_weights).sum(), 2) for c in controls.index] npt.assert_almost_equal(weighted_sum, published_weighted_sum, decimal=1) diff --git a/populationsim/tests/test_flex.py b/populationsim/tests/test_flex.py index 6c13ea5..6b8927c 100644 --- a/populationsim/tests/test_flex.py +++ b/populationsim/tests/test_flex.py @@ -6,6 +6,7 @@ from activitysim.core import tracing from activitysim.core import pipeline from activitysim.core import inject +from activitysim.core.config import setting from populationsim import steps @@ -53,9 +54,9 @@ def test_full_run2(): assert isinstance(pipeline.get_table('expanded_household_ids'), pd.DataFrame) # output tables list action: include - assert os.path.exists(os.path.join(output_dir, 'expanded_household_ids.csv')) - assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT.csv')) - assert not os.path.exists(os.path.join(output_dir, 'summary_TAZ.csv')) + assert os.path.exists(config.output_file_path('expanded_household_ids.csv')) + assert os.path.exists(config.output_file_path('summary_DISTRICT.csv')) + assert not os.path.exists(config.output_file_path('summary_TAZ.csv')) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() diff --git a/populationsim/util.py b/populationsim/util.py deleted file mode 100644 index 73ef141..0000000 --- a/populationsim/util.py +++ /dev/null @@ -1,38 +0,0 @@ -# PopulationSim -# See full license in LICENSE.txt. - -from __future__ import absolute_import - -import logging - -from activitysim.core import inject - - -logger = logging.getLogger(__name__) - - -def setting(key, default=None): - - settings = inject.get_injectable('settings') - - return settings.get(key, default) - - -def data_dir_from_settings(): - """ - legacy strategy foir specifying data_dir is with orca injectable. - Calling this function provides an alternative by reading it from settings file - """ - - # FIXME - not sure this plays well with orca - # it may depend on when file with orca decorator is imported - - data_dir = setting('data_dir', None) - - if data_dir: - inject.add_injectable('data_dir', data_dir) - else: - data_dir = inject.get_injectable('data_dir') - - logger.info("data_dir: %s" % data_dir) - return data_dir diff --git a/setup.py b/setup.py index 8534aaa..fa4d490 100644 --- a/setup.py +++ b/setup.py @@ -27,10 +27,10 @@ include_package_data=True, python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*', install_requires=[ + 'activitysim >= 0.9.2', 'numpy >= 1.16.1', 'pandas >= 0.24.1', - 'activitysim >= 0.9.1', - 'ortools >= 5.1.4045', + 'ortools >= 5.1.4045, < 7.5', 'future >= 0.16.0' ] )