Skip to content

Commit

Permalink
Add Int64 check to enforce_int_typing.
Browse files Browse the repository at this point in the history
  • Loading branch information
morriscb committed Jul 28, 2023
1 parent 80aac81 commit 04c9b3a
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def _get_ophys_session_table(self):
df["date_of_acquisition"] = pd.to_datetime(
df["date_of_acquisition"], utc="True"
)
df = enforce_df_int_typing(df, INTEGER_COLUMNS)
df = enforce_df_int_typing(df, INTEGER_COLUMNS, True)
self._ophys_session_table = df.set_index("ophys_session_id")

def get_ophys_session_table(self) -> pd.DataFrame:
Expand All @@ -188,7 +188,7 @@ def _get_behavior_session_table(self):
df["date_of_acquisition"] = pd.to_datetime(
df["date_of_acquisition"], utc="True"
)
df = enforce_df_int_typing(df, INTEGER_COLUMNS)
df = enforce_df_int_typing(df, INTEGER_COLUMNS, True)

self._behavior_session_table = df.set_index("behavior_session_id")

Expand Down Expand Up @@ -218,7 +218,7 @@ def _get_ophys_experiment_table(self):
df["date_of_acquisition"] = pd.to_datetime(
df["date_of_acquisition"], utc="True"
)
df = enforce_df_int_typing(df, INTEGER_COLUMNS)
df = enforce_df_int_typing(df, INTEGER_COLUMNS, True)
self._ophys_experiment_table = df.set_index("ophys_experiment_id")

def _get_ophys_cells_table(self):
Expand Down
14 changes: 11 additions & 3 deletions allensdk/core/dataframe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ def enforce_df_column_order(


def enforce_df_int_typing(input_df: pd.DataFrame,
int_columns: List[str]) -> pd.DataFrame:
int_columns: List[str],
use_pandas_type=False) -> pd.DataFrame:
"""Enforce integer typing for columns that may have lost int typing when
combined into the final DataFrame.
Expand All @@ -138,6 +139,10 @@ def enforce_df_int_typing(input_df: pd.DataFrame,
Columns to enforce int typing and fill any NaN/None values with the
value set in INT_NULL in this file. Requested columns not in the
dataframe are ignored.
use_pandas_type : bool
Instead of filling with the value INT_NULL to enforce integer typing,
use the pandas type Int64. This type can have issues converting to
numpy/array type values.
Returns
-------
Expand All @@ -147,6 +152,9 @@ def enforce_df_int_typing(input_df: pd.DataFrame,
"""
for col in int_columns:
if col in input_df.columns:
input_df[col] = \
input_df[col].fillna(INT_NULL).astype(int)
if use_pandas_type:
input_df[col] = input_df[col].astype('Int64')
else
input_df[col] = \
input_df[col].fillna(INT_NULL).astype(int)
return input_df

0 comments on commit 04c9b3a

Please sign in to comment.