Merge branch 'main' into auto-drop-vars

wsp-sag · Apr 1, 2024 · 712cd1b · 712cd1b
2 parents e5d9878 + 2540ede
commit 712cd1b
Show file tree

Hide file tree

Showing 24 changed files with 798 additions and 66 deletions.
diff --git a/activitysim/abm/models/auto_ownership.py b/activitysim/abm/models/auto_ownership.py
@@ -7,9 +7,17 @@
 import pandas as pd
 from pydantic import validator
 
-from activitysim.core import config, estimation, simulate, tracing, workflow
+from activitysim.core import (
+    config,
+    expressions,
+    estimation,
+    simulate,
+    tracing,
+    workflow,
+)
 from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable
 from activitysim.core.configuration.logit import LogitComponentSettings
+from .util import annotate
 
 logger = logging.getLogger(__name__)
 
@@ -19,7 +27,8 @@ class AutoOwnershipSettings(LogitComponentSettings):
     Settings for the `auto_ownership` component.
     """
 
-    # This model is relatively simple and has no unique settings
+    preprocessor: PreprocessorSettings | None = None
+    annotate_households: PreprocessorSettings | None = None
 
 
 @workflow.step
@@ -57,6 +66,21 @@ def auto_ownership_simulate(
 
     logger.info("Running %s with %d households", trace_label, len(choosers))
 
+    # - preprocessor
+    preprocessor_settings = model_settings.preprocessor
+    if preprocessor_settings:
+
+        locals_d = {}
+        if constants is not None:
+            locals_d.update(constants)
+
+        expressions.assign_columns(
+            df=choosers,
+            model_settings=preprocessor_settings,
+            locals_dict=locals_d,
+            trace_label=trace_label,
+        )
+
     if estimator:
         estimator.write_model_settings(model_settings, model_settings_file_name)
         estimator.write_spec(model_settings)
@@ -92,5 +116,8 @@ def auto_ownership_simulate(
         "auto_ownership", households.auto_ownership, value_counts=True
     )
 
+    if model_settings.annotate_households:
+        annotate.annotate_households(model_settings, trace_label)
+
     if trace_hh_id:
         state.tracing.trace_df(households, label="auto_ownership", warn_if_empty=True)
diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
@@ -181,6 +181,11 @@ def cdap_simulate(
         for hhsize in range(2, cdap.MAX_HHSIZE + 1):
             spec = cdap.get_cached_spec(state, hhsize)
             estimator.write_table(spec, "spec_%s" % hhsize, append=False)
+            if add_joint_tour_utility:
+                joint_spec = cdap.get_cached_joint_spec(hhsize)
+                estimator.write_table(
+                    joint_spec, "joint_spec_%s" % hhsize, append=False
+                )
 
     logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))
 
@@ -215,6 +220,11 @@ def cdap_simulate(
     if estimator:
         estimator.write_choices(choices)
         choices = estimator.get_survey_values(choices, "persons", "cdap_activity")
+        if add_joint_tour_utility:
+            hh_joint.index.name = "household_id"
+            hh_joint = estimator.get_survey_values(
+                hh_joint, "households", "has_joint_tour"
+            )
         estimator.write_override_choices(choices)
         estimator.end_estimation()
 

diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py
@@ -154,6 +154,15 @@ class DisaggregateAccessibilitySettings(PydanticReadable, extra="forbid"):
       procedure work.
     """
 
+    KEEP_COLS: list[str] | None = None
+    """
+    Disaggreate accessibility table is grouped by the "by" cols above and the KEEP_COLS are averaged
+    across the group.  Initializing the below as NA if not in the auto ownership level, they are skipped
+    in the groupby mean and the values are correct. 
+    (It's a way to avoid having to update code to reshape the table and introduce new functionality there.)
+    If none, will keep all of the columns with "accessibility" in the name.
+    """
+
     FROM_TEMPLATES: bool = False
     annotate_proto_tables: list[DisaggregateAccessibilityAnnotateSettings] = []
     """
@@ -164,6 +173,11 @@ class DisaggregateAccessibilitySettings(PydanticReadable, extra="forbid"):
     """
     NEAREST_METHOD: str = "skims"
 
+    postprocess_proto_tables: list[DisaggregateAccessibilityAnnotateSettings] = []
+    """
+    List of preprocessor settings to apply to the proto-population tables after generation.
+    """
+
 
 def read_disaggregate_accessibility_yaml(
     state: workflow.State, file_name
@@ -846,6 +860,10 @@ def compute_disaggregate_accessibility(
             state.tracing.register_traceable_table(tablename, df)
         del df
 
+    disagg_model_settings = read_disaggregate_accessibility_yaml(
+        state, "disaggregate_accessibility.yaml"
+    )
+
     # Run location choice
     logsums = get_disaggregate_logsums(
         state,
@@ -906,4 +924,23 @@ def compute_disaggregate_accessibility(
     for k, df in logsums.items():
         state.add_table(k, df)
 
+    # available post-processing
+    for annotations in disagg_model_settings.postprocess_proto_tables:
+        tablename = annotations.tablename
+        df = state.get_dataframe(tablename)
+        assert df is not None
+        assert annotations is not None
+        assign_columns(
+            state,
+            df=df,
+            model_settings={
+                **annotations.annotate.dict(),
+                **disagg_model_settings.suffixes.dict(),
+            },
+            trace_label=tracing.extend_trace_label(
+                "disaggregate_accessibility.postprocess", tablename
+            ),
+        )
+        state.add_table(tablename, df)
+
     return
diff --git a/activitysim/abm/models/joint_tour_destination.py b/activitysim/abm/models/joint_tour_destination.py
@@ -67,7 +67,7 @@ def joint_tour_destination(
 
     choices_df, save_sample_df = tour_destination.run_tour_destination(
         state,
-        tours,
+        joint_tours,
         persons_merged,
         want_logsums,
         want_sample_table,

diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py
@@ -55,9 +55,11 @@ def joint_tour_frequency_composition(
             model_settings_file_name,
         )
 
+    # FIXME setting index as "alt" causes crash in estimation mode...
     alts = simulate.read_model_alts(
-        state, "joint_tour_frequency_composition_alternatives.csv", set_index="alt"
+        state, "joint_tour_frequency_composition_alternatives.csv", set_index=None
     )
+    alts.index = alts["alt"].values
 
     # - only interested in households with more than one cdap travel_active person and
     # - at least one non-preschooler
@@ -116,14 +118,16 @@ def joint_tour_frequency_composition(
         estimator.write_model_settings(model_settings, model_settings_file_name)
         estimator.write_coefficients(coefficients_df, model_settings)
         estimator.write_choosers(choosers)
-        estimator.write_alternatives(alts)
 
         assert choosers.index.name == "household_id"
         assert "household_id" not in choosers.columns
         choosers["household_id"] = choosers.index
 
         estimator.set_chooser_id(choosers.index.name)
 
+        # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables?
+        estimator.set_alt_id("alt_id")
+
     # The choice value 'joint_tour_frequency_composition' assigned by interaction_simulate
     # is the index value of the chosen alternative in the alternatives table.
     choices = interaction_simulate(
@@ -157,6 +161,7 @@ def joint_tour_frequency_composition(
     # - but we don't know the tour participants yet
     # - so we arbitrarily choose the first person in the household
     # - to be point person for the purpose of generating an index and setting origin
+    # FIXME: not all models are guaranteed to have PNUM
     temp_point_persons = persons.loc[persons.PNUM == 1]
     temp_point_persons["person_id"] = temp_point_persons.index
     temp_point_persons = temp_point_persons.set_index("household_id")

diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py
@@ -214,6 +214,10 @@ def participants_chooser(
                 probs[choice_col] = np.where(probs[choice_col] > 0, 1, 0)
                 non_choice_col = [col for col in probs.columns if col != choice_col][0]
                 probs[non_choice_col] = 1 - probs[choice_col]
+                if iter > MAX_ITERATIONS + 1:
+                    raise RuntimeError(
+                        f"{num_tours_remaining} tours could not be satisfied even with forcing participation"
+                    )
             else:
                 raise RuntimeError(
                     f"{num_tours_remaining} tours could not be satisfied after {iter} iterations"

diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
@@ -17,6 +17,7 @@
 )
 from activitysim.core.interaction_sample import interaction_sample
 from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
+from activitysim.core.util import reindex
 
 # import multiprocessing
 
@@ -141,22 +142,26 @@ def _location_sample(
 
     sample_size = model_settings.SAMPLE_SIZE
 
-    if state.settings.disable_destination_sampling or (
-        estimator and estimator.want_unsampled_alternatives
-    ):
-        # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
+    if estimator:
+        sample_size = model_settings.ESTIMATION_SAMPLE_SIZE
         logger.info(
-            "Estimation mode for %s using unsampled alternatives short_circuit_choices"
-            % (trace_label,)
+            f"Estimation mode for {trace_label} using sample size of {sample_size}"
         )
+
+    if state.settings.disable_destination_sampling:
         sample_size = 0
+        logger.info(
+            f"SAMPLE_SIZE set to 0 for {trace_label} because disable_destination_sampling is set"
+        )
 
     locals_d = {
         "skims": skims,
         "segment_size": segment_name,
         "orig_col_name": skims.orig_key,  # added for sharrow flows
         "dest_col_name": skims.dest_key,  # added for sharrow flows
         "timeframe": "timeless",
+        "reindex": reindex,
+        "land_use": state.get_dataframe("land_use"),
     }
     locals_d.update(model_settings.CONSTANTS or {})
 
@@ -484,6 +489,38 @@ def run_location_sample(
             trace_label=trace_label,
         )
 
+    # adding observed choice to alt set when running in estimation mode
+    if estimator:
+        # grabbing survey values
+        survey_persons = estimation.manager.get_survey_table("persons")
+        if "school_location" in trace_label:
+            survey_choices = survey_persons["school_zone_id"].reset_index()
+        elif ("workplace_location" in trace_label) and ("external" not in trace_label):
+            survey_choices = survey_persons["workplace_zone_id"].reset_index()
+        else:
+            return choices
+        survey_choices.columns = ["person_id", "alt_dest"]
+        survey_choices = survey_choices[
+            survey_choices["person_id"].isin(choices.index)
+            & (survey_choices.alt_dest > 0)
+        ]
+        # merging survey destination into table if not available
+        joined_data = survey_choices.merge(
+            choices, on=["person_id", "alt_dest"], how="left", indicator=True
+        )
+        missing_rows = joined_data[joined_data["_merge"] == "left_only"]
+        missing_rows["pick_count"] = 1
+        if len(missing_rows) > 0:
+            new_choices = missing_rows[
+                ["person_id", "alt_dest", "prob", "pick_count"]
+            ].set_index("person_id")
+            choices = choices.append(new_choices, ignore_index=False).sort_index()
+            # making probability the mean of all other sampled destinations by person
+            # FIXME is there a better way to do this? Does this even matter for estimation?
+            choices["prob"] = choices["prob"].fillna(
+                choices.groupby("person_id")["prob"].transform("mean")
+            )
+
     return choices
 
 
@@ -620,6 +657,8 @@ def run_location_simulate(
         "orig_col_name": skims.orig_key,  # added for sharrow flows
         "dest_col_name": skims.dest_key,  # added for sharrow flows
         "timeframe": "timeless",
+        "reindex": reindex,
+        "land_use": state.get_dataframe("land_use"),
     }
     locals_d.update(model_settings.CONSTANTS or {})
 
@@ -833,6 +872,24 @@ def run_location_choice(
                 )
                 state.tracing.trace_df(choices_df, estimation_trace_label)
 
+        if want_logsums & (not skip_choice):
+            # grabbing index, could be person_id or proto_person_id
+            index_name = choices_df.index.name
+            # merging mode choice logsum of chosen alternative to choices
+            choices_df = (
+                pd.merge(
+                    choices_df.reset_index(),
+                    location_sample_df.reset_index()[
+                        [index_name, model_settings.ALT_DEST_COL_NAME, ALT_LOGSUM]
+                    ],
+                    how="left",
+                    left_on=[index_name, "choice"],
+                    right_on=[index_name, model_settings.ALT_DEST_COL_NAME],
+                )
+                .drop(columns=model_settings.ALT_DEST_COL_NAME)
+                .set_index(index_name)
+            )
+
         choices_list.append(choices_df)
 
         if want_sample_table:
@@ -850,7 +907,7 @@ def run_location_choice(
     else:
         # this will only happen with small samples (e.g. singleton) with no (e.g.) school segs
         logger.warning("%s no choices", trace_label)
-        choices_df = pd.DataFrame(columns=["choice", "logsum"])
+        choices_df = pd.DataFrame(columns=["choice", "logsum", ALT_LOGSUM])
 
     if len(sample_list) > 0:
         save_sample_df = pd.concat(sample_list)
@@ -893,7 +950,8 @@ def iterate_location_choice(
     Returns
     -------
     adds choice column model_settings['DEST_CHOICE_COLUMN_NAME']
-    adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
+    adds destination choice logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
+    adds mode choice logsum to selected destination column model_settings['MODE_CHOICE_LOGSUM_COLUMN_NAME']- if provided
     adds annotations to persons table
     """
 
@@ -903,7 +961,11 @@ def iterate_location_choice(
     chooser_filter_column = model_settings.CHOOSER_FILTER_COLUMN_NAME
 
     dest_choice_column_name = model_settings.DEST_CHOICE_COLUMN_NAME
-    logsum_column_name = model_settings.DEST_CHOICE_LOGSUM_COLUMN_NAME
+    dc_logsum_column_name = model_settings.DEST_CHOICE_LOGSUM_COLUMN_NAME
+    mc_logsum_column_name = model_settings.MODE_CHOICE_LOGSUM_COLUMN_NAME
+    want_logsums = (dc_logsum_column_name is not None) | (
+        mc_logsum_column_name is not None
+    )
 
     sample_table_name = model_settings.DEST_CHOICE_SAMPLE_TABLE_NAME
     want_sample_table = (
@@ -954,7 +1016,7 @@ def iterate_location_choice(
             persons_merged_df_,
             network_los,
             shadow_price_calculator=spc,
-            want_logsums=logsum_column_name is not None,
+            want_logsums=want_logsums,
             want_sample_table=want_sample_table,
             estimator=estimator,
             model_settings=model_settings,
@@ -1029,10 +1091,15 @@ def iterate_location_choice(
     )
 
     # add the dest_choice_logsum column to persons dataframe
-    if logsum_column_name:
-        persons_df[logsum_column_name] = (
+    if dc_logsum_column_name:
+        persons_df[dc_logsum_column_name] = (
             choices_df["logsum"].reindex(persons_df.index).astype("float")
         )
+    # add the mode choice logsum column to persons dataframe
+    if mc_logsum_column_name:
+        persons_df[mc_logsum_column_name] = (
+            choices_df[ALT_LOGSUM].reindex(persons_df.index).astype("float")
+        )
 
     if save_sample_df is not None:
         # might be None for tiny samples even if sample_table_name was specified
@@ -1072,9 +1139,13 @@ def iterate_location_choice(
         if state.settings.trace_hh_id:
             state.tracing.trace_df(households_df, label=trace_label, warn_if_empty=True)
 
-    if logsum_column_name:
+    if dc_logsum_column_name:
+        tracing.print_summary(
+            dc_logsum_column_name, choices_df["logsum"], value_counts=True
+        )
+    if mc_logsum_column_name:
         tracing.print_summary(
-            logsum_column_name, choices_df["logsum"], value_counts=True
+            mc_logsum_column_name, choices_df[ALT_LOGSUM], value_counts=True
         )
 
     return persons_df