Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue with df_util. Also make them not return the modified df/series #695

Merged
merged 1 commit into from
Jun 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,23 +59,21 @@ def convert_to_form(df, hed_schema, tag_form, columns=None):
""" Convert all tags in underlying dataframe to the specified form (in place).

Parameters:
df (pd.Dataframe): The dataframe to modify
df (pd.Dataframe or pd.Series): The dataframe or series to modify
hed_schema (HedSchema): The schema to use to convert tags.
tag_form(str): HedTag property to convert tags to.
columns (list): The columns to modify on the dataframe.

"""
if isinstance(df, pd.Series):
df = df.apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
df[:] = df.apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
else:
if columns is None:
columns = df.columns

for column in columns:
df[column] = df[column].apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))

return df


def shrink_defs(df, hed_schema, columns=None):
""" Shrink (in place) any def-expand tags found in the specified columns in the dataframe.
Expand All @@ -97,8 +95,6 @@ def shrink_defs(df, hed_schema, columns=None):
mask = df[column].str.contains('Def-expand/', case=False)
df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema))

return df


def expand_defs(df, hed_schema, def_dict, columns=None):
""" Expands any def tags found in the dataframe.
Expand All @@ -120,9 +116,7 @@ def expand_defs(df, hed_schema, def_dict, columns=None):

for column in columns:
mask = df[column].str.contains('Def/', case=False)
df[column][mask] = df[column][mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))

return df
df.loc[mask, column] = df.loc[mask, column].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))


def _convert_to_form(hed_string, hed_schema, tag_form):
Expand Down
80 changes: 40 additions & 40 deletions tests/models/test_df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,54 +14,54 @@ def setUp(self):
def test_shrink_defs_normal(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_shrink_defs_placeholder(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_shrink_defs_no_matching_tags(self):
df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Acceleration/25)"]})
expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Acceleration/25)"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_shrink_defs_multiple_columns(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"],
"column2": ["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"],
"column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
result = shrink_defs(df, self.schema, ['column1', 'column2'])
pd.testing.assert_frame_equal(result, expected_df)
shrink_defs(df, self.schema, ['column1', 'column2'])
pd.testing.assert_frame_equal(df, expected_df)

def test_shrink_defs_multiple_defs_same_line(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Acceleration/30"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Def/TestDefPlaceholder/123,Acceleration/30"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_shrink_defs_mixed_tags(self):
df = pd.DataFrame({"column1": [
"(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent,(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem,Acceleration/25"]})
expected_df = pd.DataFrame(
{"column1": ["Def/TestDefNormal,Event/SomeEvent,Def/TestDefPlaceholder/123,Item/SomeItem,Acceleration/25"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_shrink_defs_series_normal(self):
series = pd.Series(["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"])
expected_series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"])
result = shrink_defs(series, self.schema, None)
pd.testing.assert_series_equal(result, expected_series)
shrink_defs(series, self.schema, None)
pd.testing.assert_series_equal(series, expected_series)

def test_shrink_defs_series_placeholder(self):
series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"])
expected_series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
result = shrink_defs(series, self.schema, None)
pd.testing.assert_series_equal(result, expected_series)
shrink_defs(series, self.schema, None)
pd.testing.assert_series_equal(series, expected_series)


class TestExpandDefs(unittest.TestCase):
Expand All @@ -75,21 +75,21 @@ def test_expand_defs_normal(self):
df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]})
expected_df = pd.DataFrame(
{"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_expand_defs_placeholder(self):
df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
expected_df = pd.DataFrame({"column1": [
"(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_expand_defs_no_matching_tags(self):
df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Acceleration/25)"]})
expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Acceleration/25)"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_expand_defs_multiple_columns(self):
df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"],
Expand All @@ -98,20 +98,20 @@ def test_expand_defs_multiple_columns(self):
{"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"],
"column2": [
"(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1', 'column2'])
pd.testing.assert_frame_equal(result, expected_df)
expand_defs(df, self.schema, self.def_dict, ['column1', 'column2'])
pd.testing.assert_frame_equal(df, expected_df)

def test_expand_defs_series_normal(self):
series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"])
expected_series = pd.Series(["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"])
result = expand_defs(series, self.schema, self.def_dict, None)
pd.testing.assert_series_equal(result, expected_series)
expand_defs(series, self.schema, self.def_dict, None)
pd.testing.assert_series_equal(series, expected_series)

def test_expand_defs_series_placeholder(self):
series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"])
result = expand_defs(series, self.schema, self.def_dict, None)
pd.testing.assert_series_equal(result, expected_series)
expand_defs(series, self.schema, self.def_dict, None)
pd.testing.assert_series_equal(series, expected_series)


class TestConvertToForm(unittest.TestCase):
Expand All @@ -121,38 +121,38 @@ def setUp(self):
def test_convert_to_form_short_tags(self):
df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
expected_df = pd.DataFrame({"column1": ["Azure,See"]})
result = convert_to_form(df, self.schema, "short_tag", ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
convert_to_form(df, self.schema, "short_tag", ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_convert_to_form_long_tags(self):
df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Action/Perceive/See"]})
expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
result = convert_to_form(df, self.schema, "long_tag", ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
convert_to_form(df, self.schema, "long_tag", ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_convert_to_form_series_short_tags(self):
series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
expected_series = pd.Series(["Azure,See"])
result = convert_to_form(series, self.schema, "short_tag")
pd.testing.assert_series_equal(result, expected_series)
convert_to_form(series, self.schema, "short_tag")
pd.testing.assert_series_equal(series, expected_series)

def test_convert_to_form_series_long_tags(self):
series = pd.Series(["CSS-color/White-color/Azure,Action/Perceive/See"])
expected_series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
result = convert_to_form(series, self.schema, "long_tag")
pd.testing.assert_series_equal(result, expected_series)
convert_to_form(series, self.schema, "long_tag")
pd.testing.assert_series_equal(series, expected_series)

def test_convert_to_form_multiple_tags_short(self):
df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
expected_df = pd.DataFrame({"column1": ["Azure,Nose,Acceleration/4.5 m-per-s^2"]})
result = convert_to_form(df, self.schema, "short_tag", ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
convert_to_form(df, self.schema, "short_tag", ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_convert_to_form_multiple_tags_long(self):
df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Anatomical-item/Body-part/Head/Face/Nose,Rate-of-change/Acceleration/4.5 m-per-s^2"]})
expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Item/Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
result = convert_to_form(df, self.schema, "long_tag", ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
convert_to_form(df, self.schema, "long_tag", ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_basic_expand_detection(self):
# all simple cases with no duplicates
Expand Down
4 changes: 2 additions & 2 deletions tests/models/test_sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def test_set_hed_strings(self):

for column_data in sidecar:
hed_strings = column_data.get_hed_strings()
hed_strings = df_util.convert_to_form(hed_strings, self.hed_schema, "long_tag")
df_util.convert_to_form(hed_strings, self.hed_schema, "long_tag")
column_data.set_hed_strings(hed_strings)
sidecar_long = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/long_tag_test.json"))
self.assertEqual(sidecar.loaded_dict, sidecar_long.loaded_dict)
Expand All @@ -151,7 +151,7 @@ def test_set_hed_strings(self):

for column_data in sidecar:
hed_strings = column_data.get_hed_strings()
hed_strings = df_util.convert_to_form(hed_strings, self.hed_schema, "short_tag")
df_util.convert_to_form(hed_strings, self.hed_schema, "short_tag")
column_data.set_hed_strings(hed_strings)
sidecar_short = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/short_tag_test.json"))
self.assertEqual(sidecar.loaded_dict, sidecar_short.loaded_dict)
Expand Down