ActivitySim · jpn-- · Feb 13, 2024 · Jul 12, 2023 · Jul 17, 2023 · Jul 25, 2023
diff --git a/activitysim/abm/models/atwork_subtour_destination.py b/activitysim/abm/models/atwork_subtour_destination.py
@@ -89,11 +89,21 @@ def atwork_subtour_destination(
         estimator.end_estimation()
 
     subtours[destination_column_name] = choices_df["choice"]
-    assign_in_place(tours, subtours[[destination_column_name]])
+    assign_in_place(
+        tours,
+        subtours[[destination_column_name]],
+        state.settings.downcast_int,
+        state.settings.downcast_float,
+    )
 
     if want_logsums:
         subtours[logsum_column_name] = choices_df["logsum"]
-        assign_in_place(tours, subtours[[logsum_column_name]])
+        assign_in_place(
+            tours,
+            subtours[[logsum_column_name]],
+            state.settings.downcast_int,
+            state.settings.downcast_float,
+        )
 
     state.add_table("tours", tours)
 

diff --git a/activitysim/abm/models/atwork_subtour_frequency.py b/activitysim/abm/models/atwork_subtour_frequency.py
@@ -24,7 +24,15 @@
 
 def add_null_results(state, trace_label, tours):
     logger.info("Skipping %s: add_null_results", trace_label)
-    tours["atwork_subtour_frequency"] = np.nan
+    cat_type = pd.api.types.CategoricalDtype(
+        [""],
+        ordered=False,
+    )
+    choices = choices.astype(cat_type)
+    tours["atwork_subtour_frequency"] = ""
+    tours["atwork_subtour_frequency"] = tours["atwork_subtour_frequency"].astype(
+        cat_type
+    )
     state.add_table("tours", tours)
 
 
@@ -117,6 +125,11 @@ def atwork_subtour_frequency(
 
     # convert indexes to alternative names
     choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
+    cat_type = pd.api.types.CategoricalDtype(
+        alternatives.index.tolist() + [""],
+        ordered=False,
+    )
+    choices = choices.astype(cat_type)
 
     if estimator:
         estimator.write_choices(choices)
@@ -137,6 +150,12 @@ def atwork_subtour_frequency(
 
     subtours = process_atwork_subtours(state, work_tours, alternatives)
 
+    # convert purpose to pandas categoricals
+    purpose_type = pd.api.types.CategoricalDtype(
+        alternatives.columns.tolist() + ["atwork"], ordered=False
+    )
+    subtours["tour_type"] = subtours["tour_type"].astype(purpose_type)
+
     tours = state.extend_table("tours", subtours)
 
     state.tracing.register_traceable_table("tours", subtours)

diff --git a/activitysim/abm/models/atwork_subtour_mode_choice.py b/activitysim/abm/models/atwork_subtour_mode_choice.py
@@ -190,7 +190,9 @@ def atwork_subtour_mode_choice(
         "%s choices" % trace_label, choices_df[mode_column_name], value_counts=True
     )
 
-    assign_in_place(tours, choices_df)
+    assign_in_place(
+        tours, choices_df, state.settings.downcast_int, state.settings.downcast_float
+    )
     state.add_table("tours", tours)
 
     # - annotate tours table

diff --git a/activitysim/abm/models/atwork_subtour_scheduling.py b/activitysim/abm/models/atwork_subtour_scheduling.py
@@ -111,7 +111,9 @@ def atwork_subtour_scheduling(
         choices.to_frame("tdd"), tdd_alts, left_on=["tdd"], right_index=True, how="left"
     )
 
-    assign_in_place(tours, tdd_choices)
+    assign_in_place(
+        tours, tdd_choices, state.settings.downcast_int, state.settings.downcast_float
+    )
     state.add_table("tours", tours)
 
     if trace_hh_id:

diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
@@ -219,6 +219,8 @@ def cdap_simulate(
         estimator.end_estimation()
 
     choices = choices.reindex(persons.index)
+    cap_cat_type = pd.api.types.CategoricalDtype(["", "M", "N", "H"], ordered=False)
+    choices = choices.astype(cap_cat_type)
     persons["cdap_activity"] = choices
 
     expressions.assign_columns(

diff --git a/activitysim/abm/models/joint_tour_composition.py b/activitysim/abm/models/joint_tour_composition.py
@@ -24,6 +24,10 @@
 def add_null_results(state, trace_label, tours):
     logger.info("Skipping %s: add_null_results" % trace_label)
     tours["composition"] = ""
+    cat_type = pd.api.types.CategoricalDtype(
+        ["", "adults", "children", "mixed"], ordered=False
+    )
+    tours["composition"] = tours["composition"].astype(cat_type)
     state.add_table("tours", tours)
 
 
@@ -123,6 +127,10 @@ def joint_tour_composition(
 
     # convert indexes to alternative names
     choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
+    cat_type = pd.api.types.CategoricalDtype(
+        model_spec.columns.tolist() + [""], ordered=False
+    )
+    choices = choices.astype(cat_type)
 
     if estimator:
         estimator.write_choices(choices)
@@ -134,7 +142,7 @@ def joint_tour_composition(
     joint_tours["composition"] = choices
 
     # reindex since we ran model on a subset of households
-    tours["composition"] = choices.reindex(tours.index).fillna("").astype(str)
+    tours["composition"] = choices.reindex(tours.index).fillna("")
     state.add_table("tours", tours)
 
     tracing.print_summary(

diff --git a/activitysim/abm/models/joint_tour_destination.py b/activitysim/abm/models/joint_tour_destination.py
@@ -87,12 +87,22 @@ def joint_tour_destination(
 
     # add column as we want joint_tours table for tracing.
     joint_tours["destination"] = choices_df.choice
-    assign_in_place(tours, joint_tours[["destination"]])
+    assign_in_place(
+        tours,
+        joint_tours[["destination"]],
+        state.settings.downcast_int,
+        state.settings.downcast_float,
+    )
     state.add_table("tours", tours)
 
     if want_logsums:
         joint_tours[logsum_column_name] = choices_df["logsum"]
-        assign_in_place(tours, joint_tours[[logsum_column_name]])
+        assign_in_place(
+            tours,
+            joint_tours[[logsum_column_name]],
+            state.settings.downcast_int,
+            state.settings.downcast_float,
+        )
 
     tracing.print_summary("destination", joint_tours.destination, describe=True)
 

diff --git a/activitysim/abm/models/joint_tour_frequency.py b/activitysim/abm/models/joint_tour_frequency.py
@@ -116,6 +116,11 @@ def joint_tour_frequency(
 
     # convert indexes to alternative names
     choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
+    cat_type = pd.api.types.CategoricalDtype(
+        model_spec.columns.tolist(),
+        ordered=False,
+    )
+    choices = choices.astype(cat_type)
 
     if estimator:
         estimator.write_choices(choices)
@@ -138,6 +143,12 @@ def joint_tour_frequency(
 
     joint_tours = process_joint_tours(state, choices, alternatives, temp_point_persons)
 
+    # convert purpose to pandas categoricals
+    purpose_type = pd.api.types.CategoricalDtype(
+        alternatives.columns.tolist(), ordered=False
+    )
+    joint_tours["tour_type"] = joint_tours["tour_type"].astype(purpose_type)
+
     tours = state.extend_table("tours", joint_tours)
 
     state.tracing.register_traceable_table("tours", joint_tours)
@@ -147,8 +158,8 @@ def joint_tour_frequency(
 
     # we expect there to be an alt with no tours - which we can use to backfill non-travelers
     no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
-    households["joint_tour_frequency"] = (
-        choices.reindex(households.index).fillna(no_tours_alt).astype(str)
+    households["joint_tour_frequency"] = choices.reindex(households.index).fillna(
+        no_tours_alt
     )
 
     households["num_hh_joint_tours"] = (

diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py
@@ -92,7 +92,7 @@ def get_tour_satisfaction(candidates, participate):
 
         x = (
             candidates[cols]
-            .groupby(["tour_id", "composition"])
+            .groupby(["tour_id", "composition"], observed=True)
             .agg(
                 participants=("adult", "size"),
                 adults=("adult", "sum"),
@@ -475,7 +475,12 @@ def joint_tour_participation(
     # update number_of_participants which was initialized to 1
     joint_tours["number_of_participants"] = participants.groupby("tour_id").size()
 
-    assign_in_place(tours, joint_tours[["person_id", "number_of_participants"]])
+    assign_in_place(
+        tours,
+        joint_tours[["person_id", "number_of_participants"]],
+        state.settings.downcast_int,
+        state.settings.downcast_float,
+    )
 
     state.add_table("tours", tours)
 

diff --git a/activitysim/abm/models/joint_tour_scheduling.py b/activitysim/abm/models/joint_tour_scheduling.py
@@ -161,7 +161,9 @@ def joint_tour_scheduling(
         choices.to_frame("tdd"), tdd_alts, left_on=["tdd"], right_index=True, how="left"
     )
 
-    assign_in_place(tours, choices)
+    assign_in_place(
+        tours, choices, state.settings.downcast_int, state.settings.downcast_float
+    )
     state.add_table("tours", tours)
 
     # updated df for tracing

diff --git a/activitysim/abm/models/mandatory_scheduling.py b/activitysim/abm/models/mandatory_scheduling.py
@@ -62,7 +62,9 @@ def mandatory_tour_scheduling(
         tour_segment_col,
     )
 
-    assign_in_place(tours, choices)
+    assign_in_place(
+        tours, choices, state.settings.downcast_int, state.settings.downcast_float
+    )
     state.add_table("tours", tours)
 
     # updated df for tracing

diff --git a/activitysim/abm/models/mandatory_tour_frequency.py b/activitysim/abm/models/mandatory_tour_frequency.py
@@ -30,7 +30,11 @@ def add_null_results(state, trace_label, mandatory_tour_frequency_settings):
     logger.info("Skipping %s: add_null_results", trace_label)
 
     persons = state.get_dataframe("persons")
-    persons["mandatory_tour_frequency"] = ""
+    persons["mandatory_tour_frequency"] = pd.categorical(
+        "",
+        categories=["", "work1", "work2", "school1", "school2", "work_and_school"],
+        ordered=False,
+    )
 
     tours = pd.DataFrame()
     tours["tour_category"] = None
@@ -134,6 +138,10 @@ def mandatory_tour_frequency(
 
     # convert indexes to alternative names
     choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
+    cat_type = pd.api.types.CategoricalDtype(
+        model_spec.columns.tolist() + [""], ordered=False
+    )
+    choices = choices.astype(cat_type)
 
     if estimator:
         estimator.write_choices(choices)
@@ -158,6 +166,12 @@ def mandatory_tour_frequency(
         state, persons=choosers, mandatory_tour_frequency_alts=alternatives
     )
 
+    # convert purpose to pandas categoricals
+    purpose_type = pd.api.types.CategoricalDtype(
+        alternatives.columns.tolist() + ["univ", "home", "escort"], ordered=False
+    )
+    mandatory_tours["tour_type"] = mandatory_tours["tour_type"].astype(purpose_type)
+
     tours = state.extend_table("tours", mandatory_tours)
     state.tracing.register_traceable_table("tours", mandatory_tours)
     state.get_rn_generator().add_channel("tours", mandatory_tours)
@@ -166,9 +180,7 @@ def mandatory_tour_frequency(
     persons = state.get_dataframe("persons")
 
     # need to reindex as we only handled persons with cdap_activity == 'M'
-    persons["mandatory_tour_frequency"] = (
-        choices.reindex(persons.index).fillna("").astype(str)
-    )
+    persons["mandatory_tour_frequency"] = choices.reindex(persons.index).fillna("")
 
     expressions.assign_columns(
         state,

diff --git a/activitysim/abm/models/non_mandatory_destination.py b/activitysim/abm/models/non_mandatory_destination.py
@@ -107,11 +107,21 @@ def non_mandatory_tour_destination(
             [pure_school_escort_tours, non_mandatory_tours]
         ).set_index(nm_tour_index)
 
-    assign_in_place(tours, non_mandatory_tours[["destination"]])
+    assign_in_place(
+        tours,
+        non_mandatory_tours[["destination"]],
+        state.settings.downcast_int,
+        state.settings.downcast_float,
+    )
 
     if want_logsums:
         non_mandatory_tours[logsum_column_name] = choices_df["logsum"]
-        assign_in_place(tours, non_mandatory_tours[[logsum_column_name]])
+        assign_in_place(
+            tours,
+            non_mandatory_tours[[logsum_column_name]],
+            state.settings.downcast_int,
+            state.settings.downcast_float,
+        )
 
     assert all(
         ~tours["destination"].isna()

diff --git a/activitysim/abm/models/non_mandatory_scheduling.py b/activitysim/abm/models/non_mandatory_scheduling.py
@@ -47,7 +47,9 @@ def non_mandatory_tour_scheduling(
         tour_segment_col,
     )
 
-    assign_in_place(tours, choices)
+    assign_in_place(
+        tours, choices, state.settings.downcast_int, state.settings.downcast_float
+    )
     state.add_table("tours", tours)
 
     # updated df for tracing

diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py
@@ -404,6 +404,14 @@ def non_mandatory_tour_frequency(
     )
     assert len(non_mandatory_tours) == extended_tour_counts.sum().sum()
 
+    # convert purpose to pandas categoricals
+    purpose_type = pd.api.types.CategoricalDtype(
+        alternatives.columns.tolist(), ordered=False
+    )
+    non_mandatory_tours["tour_type"] = non_mandatory_tours["tour_type"].astype(
+        purpose_type
+    )
+
     if estimator:
         # make sure they created the right tours
         survey_tours = estimation.manager.get_survey_table("tours").sort_index()

diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py
@@ -276,7 +276,12 @@ def run_parking_destination(
         if fail_some_trips_for_testing:
             parking_df = parking_df.drop(parking_df.index[0])
 
-        assign_in_place(trips, parking_df.to_frame(parking_location_column_name))
+        assign_in_place(
+            trips,
+            parking_df.to_frame(parking_location_column_name),
+            state.settings.downcast_int,
+            state.settings.downcast_float,
+        )
         trips[parking_location_column_name] = trips[
             parking_location_column_name
         ].fillna(-1)
@@ -398,7 +403,12 @@ def parking_location(
         trace_label=trace_label,
     )
 
-    assign_in_place(trips_df, parking_locations.to_frame(alt_destination_col_name))
+    assign_in_place(
+        trips_df,
+        parking_locations.to_frame(alt_destination_col_name),
+        state.settings.downcast_int,
+        state.settings.downcast_float,
+    )
 
     state.add_table("trips", trips_df)