Skip to content

Commit

Permalink
Merge branch 'main' into auto-drop-vars
Browse files Browse the repository at this point in the history
  • Loading branch information
i-am-sijia authored Apr 1, 2024
2 parents e5d9878 + 2540ede commit 712cd1b
Show file tree
Hide file tree
Showing 24 changed files with 798 additions and 66 deletions.
31 changes: 29 additions & 2 deletions activitysim/abm/models/auto_ownership.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,17 @@
import pandas as pd
from pydantic import validator

from activitysim.core import config, estimation, simulate, tracing, workflow
from activitysim.core import (
config,
expressions,
estimation,
simulate,
tracing,
workflow,
)
from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable
from activitysim.core.configuration.logit import LogitComponentSettings
from .util import annotate

logger = logging.getLogger(__name__)

Expand All @@ -19,7 +27,8 @@ class AutoOwnershipSettings(LogitComponentSettings):
Settings for the `auto_ownership` component.
"""

# This model is relatively simple and has no unique settings
preprocessor: PreprocessorSettings | None = None
annotate_households: PreprocessorSettings | None = None


@workflow.step
Expand Down Expand Up @@ -57,6 +66,21 @@ def auto_ownership_simulate(

logger.info("Running %s with %d households", trace_label, len(choosers))

# - preprocessor
preprocessor_settings = model_settings.preprocessor
if preprocessor_settings:

locals_d = {}
if constants is not None:
locals_d.update(constants)

expressions.assign_columns(
df=choosers,
model_settings=preprocessor_settings,
locals_dict=locals_d,
trace_label=trace_label,
)

if estimator:
estimator.write_model_settings(model_settings, model_settings_file_name)
estimator.write_spec(model_settings)
Expand Down Expand Up @@ -92,5 +116,8 @@ def auto_ownership_simulate(
"auto_ownership", households.auto_ownership, value_counts=True
)

if model_settings.annotate_households:
annotate.annotate_households(model_settings, trace_label)

if trace_hh_id:
state.tracing.trace_df(households, label="auto_ownership", warn_if_empty=True)
10 changes: 10 additions & 0 deletions activitysim/abm/models/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,11 @@ def cdap_simulate(
for hhsize in range(2, cdap.MAX_HHSIZE + 1):
spec = cdap.get_cached_spec(state, hhsize)
estimator.write_table(spec, "spec_%s" % hhsize, append=False)
if add_joint_tour_utility:
joint_spec = cdap.get_cached_joint_spec(hhsize)
estimator.write_table(
joint_spec, "joint_spec_%s" % hhsize, append=False
)

logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))

Expand Down Expand Up @@ -215,6 +220,11 @@ def cdap_simulate(
if estimator:
estimator.write_choices(choices)
choices = estimator.get_survey_values(choices, "persons", "cdap_activity")
if add_joint_tour_utility:
hh_joint.index.name = "household_id"
hh_joint = estimator.get_survey_values(
hh_joint, "households", "has_joint_tour"
)
estimator.write_override_choices(choices)
estimator.end_estimation()

Expand Down
37 changes: 37 additions & 0 deletions activitysim/abm/models/disaggregate_accessibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,15 @@ class DisaggregateAccessibilitySettings(PydanticReadable, extra="forbid"):
procedure work.
"""

KEEP_COLS: list[str] | None = None
"""
Disaggreate accessibility table is grouped by the "by" cols above and the KEEP_COLS are averaged
across the group. Initializing the below as NA if not in the auto ownership level, they are skipped
in the groupby mean and the values are correct.
(It's a way to avoid having to update code to reshape the table and introduce new functionality there.)
If none, will keep all of the columns with "accessibility" in the name.
"""

FROM_TEMPLATES: bool = False
annotate_proto_tables: list[DisaggregateAccessibilityAnnotateSettings] = []
"""
Expand All @@ -164,6 +173,11 @@ class DisaggregateAccessibilitySettings(PydanticReadable, extra="forbid"):
"""
NEAREST_METHOD: str = "skims"

postprocess_proto_tables: list[DisaggregateAccessibilityAnnotateSettings] = []
"""
List of preprocessor settings to apply to the proto-population tables after generation.
"""


def read_disaggregate_accessibility_yaml(
state: workflow.State, file_name
Expand Down Expand Up @@ -846,6 +860,10 @@ def compute_disaggregate_accessibility(
state.tracing.register_traceable_table(tablename, df)
del df

disagg_model_settings = read_disaggregate_accessibility_yaml(
state, "disaggregate_accessibility.yaml"
)

# Run location choice
logsums = get_disaggregate_logsums(
state,
Expand Down Expand Up @@ -906,4 +924,23 @@ def compute_disaggregate_accessibility(
for k, df in logsums.items():
state.add_table(k, df)

# available post-processing
for annotations in disagg_model_settings.postprocess_proto_tables:
tablename = annotations.tablename
df = state.get_dataframe(tablename)
assert df is not None
assert annotations is not None
assign_columns(
state,
df=df,
model_settings={
**annotations.annotate.dict(),
**disagg_model_settings.suffixes.dict(),
},
trace_label=tracing.extend_trace_label(
"disaggregate_accessibility.postprocess", tablename
),
)
state.add_table(tablename, df)

return
2 changes: 1 addition & 1 deletion activitysim/abm/models/joint_tour_destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def joint_tour_destination(

choices_df, save_sample_df = tour_destination.run_tour_destination(
state,
tours,
joint_tours,
persons_merged,
want_logsums,
want_sample_table,
Expand Down
9 changes: 7 additions & 2 deletions activitysim/abm/models/joint_tour_frequency_composition.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@ def joint_tour_frequency_composition(
model_settings_file_name,
)

# FIXME setting index as "alt" causes crash in estimation mode...
alts = simulate.read_model_alts(
state, "joint_tour_frequency_composition_alternatives.csv", set_index="alt"
state, "joint_tour_frequency_composition_alternatives.csv", set_index=None
)
alts.index = alts["alt"].values

# - only interested in households with more than one cdap travel_active person and
# - at least one non-preschooler
Expand Down Expand Up @@ -116,14 +118,16 @@ def joint_tour_frequency_composition(
estimator.write_model_settings(model_settings, model_settings_file_name)
estimator.write_coefficients(coefficients_df, model_settings)
estimator.write_choosers(choosers)
estimator.write_alternatives(alts)

assert choosers.index.name == "household_id"
assert "household_id" not in choosers.columns
choosers["household_id"] = choosers.index

estimator.set_chooser_id(choosers.index.name)

# FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables?
estimator.set_alt_id("alt_id")

# The choice value 'joint_tour_frequency_composition' assigned by interaction_simulate
# is the index value of the chosen alternative in the alternatives table.
choices = interaction_simulate(
Expand Down Expand Up @@ -157,6 +161,7 @@ def joint_tour_frequency_composition(
# - but we don't know the tour participants yet
# - so we arbitrarily choose the first person in the household
# - to be point person for the purpose of generating an index and setting origin
# FIXME: not all models are guaranteed to have PNUM
temp_point_persons = persons.loc[persons.PNUM == 1]
temp_point_persons["person_id"] = temp_point_persons.index
temp_point_persons = temp_point_persons.set_index("household_id")
Expand Down
4 changes: 4 additions & 0 deletions activitysim/abm/models/joint_tour_participation.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ def participants_chooser(
probs[choice_col] = np.where(probs[choice_col] > 0, 1, 0)
non_choice_col = [col for col in probs.columns if col != choice_col][0]
probs[non_choice_col] = 1 - probs[choice_col]
if iter > MAX_ITERATIONS + 1:
raise RuntimeError(
f"{num_tours_remaining} tours could not be satisfied even with forcing participation"
)
else:
raise RuntimeError(
f"{num_tours_remaining} tours could not be satisfied after {iter} iterations"
Expand Down
99 changes: 85 additions & 14 deletions activitysim/abm/models/location_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)
from activitysim.core.interaction_sample import interaction_sample
from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
from activitysim.core.util import reindex

# import multiprocessing

Expand Down Expand Up @@ -141,22 +142,26 @@ def _location_sample(

sample_size = model_settings.SAMPLE_SIZE

if state.settings.disable_destination_sampling or (
estimator and estimator.want_unsampled_alternatives
):
# FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
if estimator:
sample_size = model_settings.ESTIMATION_SAMPLE_SIZE
logger.info(
"Estimation mode for %s using unsampled alternatives short_circuit_choices"
% (trace_label,)
f"Estimation mode for {trace_label} using sample size of {sample_size}"
)

if state.settings.disable_destination_sampling:
sample_size = 0
logger.info(
f"SAMPLE_SIZE set to 0 for {trace_label} because disable_destination_sampling is set"
)

locals_d = {
"skims": skims,
"segment_size": segment_name,
"orig_col_name": skims.orig_key, # added for sharrow flows
"dest_col_name": skims.dest_key, # added for sharrow flows
"timeframe": "timeless",
"reindex": reindex,
"land_use": state.get_dataframe("land_use"),
}
locals_d.update(model_settings.CONSTANTS or {})

Expand Down Expand Up @@ -484,6 +489,38 @@ def run_location_sample(
trace_label=trace_label,
)

# adding observed choice to alt set when running in estimation mode
if estimator:
# grabbing survey values
survey_persons = estimation.manager.get_survey_table("persons")
if "school_location" in trace_label:
survey_choices = survey_persons["school_zone_id"].reset_index()
elif ("workplace_location" in trace_label) and ("external" not in trace_label):
survey_choices = survey_persons["workplace_zone_id"].reset_index()
else:
return choices
survey_choices.columns = ["person_id", "alt_dest"]
survey_choices = survey_choices[
survey_choices["person_id"].isin(choices.index)
& (survey_choices.alt_dest > 0)
]
# merging survey destination into table if not available
joined_data = survey_choices.merge(
choices, on=["person_id", "alt_dest"], how="left", indicator=True
)
missing_rows = joined_data[joined_data["_merge"] == "left_only"]
missing_rows["pick_count"] = 1
if len(missing_rows) > 0:
new_choices = missing_rows[
["person_id", "alt_dest", "prob", "pick_count"]
].set_index("person_id")
choices = choices.append(new_choices, ignore_index=False).sort_index()
# making probability the mean of all other sampled destinations by person
# FIXME is there a better way to do this? Does this even matter for estimation?
choices["prob"] = choices["prob"].fillna(
choices.groupby("person_id")["prob"].transform("mean")
)

return choices


Expand Down Expand Up @@ -620,6 +657,8 @@ def run_location_simulate(
"orig_col_name": skims.orig_key, # added for sharrow flows
"dest_col_name": skims.dest_key, # added for sharrow flows
"timeframe": "timeless",
"reindex": reindex,
"land_use": state.get_dataframe("land_use"),
}
locals_d.update(model_settings.CONSTANTS or {})

Expand Down Expand Up @@ -833,6 +872,24 @@ def run_location_choice(
)
state.tracing.trace_df(choices_df, estimation_trace_label)

if want_logsums & (not skip_choice):
# grabbing index, could be person_id or proto_person_id
index_name = choices_df.index.name
# merging mode choice logsum of chosen alternative to choices
choices_df = (
pd.merge(
choices_df.reset_index(),
location_sample_df.reset_index()[
[index_name, model_settings.ALT_DEST_COL_NAME, ALT_LOGSUM]
],
how="left",
left_on=[index_name, "choice"],
right_on=[index_name, model_settings.ALT_DEST_COL_NAME],
)
.drop(columns=model_settings.ALT_DEST_COL_NAME)
.set_index(index_name)
)

choices_list.append(choices_df)

if want_sample_table:
Expand All @@ -850,7 +907,7 @@ def run_location_choice(
else:
# this will only happen with small samples (e.g. singleton) with no (e.g.) school segs
logger.warning("%s no choices", trace_label)
choices_df = pd.DataFrame(columns=["choice", "logsum"])
choices_df = pd.DataFrame(columns=["choice", "logsum", ALT_LOGSUM])

if len(sample_list) > 0:
save_sample_df = pd.concat(sample_list)
Expand Down Expand Up @@ -893,7 +950,8 @@ def iterate_location_choice(
Returns
-------
adds choice column model_settings['DEST_CHOICE_COLUMN_NAME']
adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
adds destination choice logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
adds mode choice logsum to selected destination column model_settings['MODE_CHOICE_LOGSUM_COLUMN_NAME']- if provided
adds annotations to persons table
"""

Expand All @@ -903,7 +961,11 @@ def iterate_location_choice(
chooser_filter_column = model_settings.CHOOSER_FILTER_COLUMN_NAME

dest_choice_column_name = model_settings.DEST_CHOICE_COLUMN_NAME
logsum_column_name = model_settings.DEST_CHOICE_LOGSUM_COLUMN_NAME
dc_logsum_column_name = model_settings.DEST_CHOICE_LOGSUM_COLUMN_NAME
mc_logsum_column_name = model_settings.MODE_CHOICE_LOGSUM_COLUMN_NAME
want_logsums = (dc_logsum_column_name is not None) | (
mc_logsum_column_name is not None
)

sample_table_name = model_settings.DEST_CHOICE_SAMPLE_TABLE_NAME
want_sample_table = (
Expand Down Expand Up @@ -954,7 +1016,7 @@ def iterate_location_choice(
persons_merged_df_,
network_los,
shadow_price_calculator=spc,
want_logsums=logsum_column_name is not None,
want_logsums=want_logsums,
want_sample_table=want_sample_table,
estimator=estimator,
model_settings=model_settings,
Expand Down Expand Up @@ -1029,10 +1091,15 @@ def iterate_location_choice(
)

# add the dest_choice_logsum column to persons dataframe
if logsum_column_name:
persons_df[logsum_column_name] = (
if dc_logsum_column_name:
persons_df[dc_logsum_column_name] = (
choices_df["logsum"].reindex(persons_df.index).astype("float")
)
# add the mode choice logsum column to persons dataframe
if mc_logsum_column_name:
persons_df[mc_logsum_column_name] = (
choices_df[ALT_LOGSUM].reindex(persons_df.index).astype("float")
)

if save_sample_df is not None:
# might be None for tiny samples even if sample_table_name was specified
Expand Down Expand Up @@ -1072,9 +1139,13 @@ def iterate_location_choice(
if state.settings.trace_hh_id:
state.tracing.trace_df(households_df, label=trace_label, warn_if_empty=True)

if logsum_column_name:
if dc_logsum_column_name:
tracing.print_summary(
dc_logsum_column_name, choices_df["logsum"], value_counts=True
)
if mc_logsum_column_name:
tracing.print_summary(
logsum_column_name, choices_df["logsum"], value_counts=True
mc_logsum_column_name, choices_df[ALT_LOGSUM], value_counts=True
)

return persons_df
Expand Down
Loading

0 comments on commit 712cd1b

Please sign in to comment.