Skip to content

Commit

Permalink
Merge pull request #695 from IanCa/develop
Browse files Browse the repository at this point in the history
Fix issue with df_util.  Also make them not return the modified df/series
  • Loading branch information
VisLab authored Jun 15, 2023
2 parents c1a7dcb + da7f4f6 commit 5668230
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 51 deletions.
12 changes: 3 additions & 9 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,23 +59,21 @@ def convert_to_form(df, hed_schema, tag_form, columns=None):
""" Convert all tags in underlying dataframe to the specified form (in place).
Parameters:
df (pd.Dataframe): The dataframe to modify
df (pd.Dataframe or pd.Series): The dataframe or series to modify
hed_schema (HedSchema): The schema to use to convert tags.
tag_form(str): HedTag property to convert tags to.
columns (list): The columns to modify on the dataframe.
"""
if isinstance(df, pd.Series):
df = df.apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
df[:] = df.apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
else:
if columns is None:
columns = df.columns

for column in columns:
df[column] = df[column].apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))

return df


def shrink_defs(df, hed_schema, columns=None):
""" Shrink (in place) any def-expand tags found in the specified columns in the dataframe.
Expand All @@ -97,8 +95,6 @@ def shrink_defs(df, hed_schema, columns=None):
mask = df[column].str.contains('Def-expand/', case=False)
df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema))

return df


def expand_defs(df, hed_schema, def_dict, columns=None):
""" Expands any def tags found in the dataframe.
Expand All @@ -120,9 +116,7 @@ def expand_defs(df, hed_schema, def_dict, columns=None):

for column in columns:
mask = df[column].str.contains('Def/', case=False)
df[column][mask] = df[column][mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))

return df
df.loc[mask, column] = df.loc[mask, column].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))


def _convert_to_form(hed_string, hed_schema, tag_form):
Expand Down
80 changes: 40 additions & 40 deletions tests/models/test_df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,54 +14,54 @@ def setUp(self):
def test_shrink_defs_normal(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_shrink_defs_placeholder(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_shrink_defs_no_matching_tags(self):
df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Acceleration/25)"]})
expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Acceleration/25)"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_shrink_defs_multiple_columns(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"],
"column2": ["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"],
"column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
result = shrink_defs(df, self.schema, ['column1', 'column2'])
pd.testing.assert_frame_equal(result, expected_df)
shrink_defs(df, self.schema, ['column1', 'column2'])
pd.testing.assert_frame_equal(df, expected_df)

def test_shrink_defs_multiple_defs_same_line(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Acceleration/30"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Def/TestDefPlaceholder/123,Acceleration/30"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_shrink_defs_mixed_tags(self):
df = pd.DataFrame({"column1": [
"(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent,(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem,Acceleration/25"]})
expected_df = pd.DataFrame(
{"column1": ["Def/TestDefNormal,Event/SomeEvent,Def/TestDefPlaceholder/123,Item/SomeItem,Acceleration/25"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_shrink_defs_series_normal(self):
series = pd.Series(["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"])
expected_series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"])
result = shrink_defs(series, self.schema, None)
pd.testing.assert_series_equal(result, expected_series)
shrink_defs(series, self.schema, None)
pd.testing.assert_series_equal(series, expected_series)

def test_shrink_defs_series_placeholder(self):
series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"])
expected_series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
result = shrink_defs(series, self.schema, None)
pd.testing.assert_series_equal(result, expected_series)
shrink_defs(series, self.schema, None)
pd.testing.assert_series_equal(series, expected_series)


class TestExpandDefs(unittest.TestCase):
Expand All @@ -75,21 +75,21 @@ def test_expand_defs_normal(self):
df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]})
expected_df = pd.DataFrame(
{"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_expand_defs_placeholder(self):
df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
expected_df = pd.DataFrame({"column1": [
"(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_expand_defs_no_matching_tags(self):
df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Acceleration/25)"]})
expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Acceleration/25)"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_expand_defs_multiple_columns(self):
df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"],
Expand All @@ -98,20 +98,20 @@ def test_expand_defs_multiple_columns(self):
{"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"],
"column2": [
"(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1', 'column2'])
pd.testing.assert_frame_equal(result, expected_df)
expand_defs(df, self.schema, self.def_dict, ['column1', 'column2'])
pd.testing.assert_frame_equal(df, expected_df)

def test_expand_defs_series_normal(self):
series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"])
expected_series = pd.Series(["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"])
result = expand_defs(series, self.schema, self.def_dict, None)
pd.testing.assert_series_equal(result, expected_series)
expand_defs(series, self.schema, self.def_dict, None)
pd.testing.assert_series_equal(series, expected_series)

def test_expand_defs_series_placeholder(self):
series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"])
result = expand_defs(series, self.schema, self.def_dict, None)
pd.testing.assert_series_equal(result, expected_series)
expand_defs(series, self.schema, self.def_dict, None)
pd.testing.assert_series_equal(series, expected_series)


class TestConvertToForm(unittest.TestCase):
Expand All @@ -121,38 +121,38 @@ def setUp(self):
def test_convert_to_form_short_tags(self):
df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
expected_df = pd.DataFrame({"column1": ["Azure,See"]})
result = convert_to_form(df, self.schema, "short_tag", ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
convert_to_form(df, self.schema, "short_tag", ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_convert_to_form_long_tags(self):
df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Action/Perceive/See"]})
expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
result = convert_to_form(df, self.schema, "long_tag", ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
convert_to_form(df, self.schema, "long_tag", ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_convert_to_form_series_short_tags(self):
series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
expected_series = pd.Series(["Azure,See"])
result = convert_to_form(series, self.schema, "short_tag")
pd.testing.assert_series_equal(result, expected_series)
convert_to_form(series, self.schema, "short_tag")
pd.testing.assert_series_equal(series, expected_series)

def test_convert_to_form_series_long_tags(self):
series = pd.Series(["CSS-color/White-color/Azure,Action/Perceive/See"])
expected_series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
result = convert_to_form(series, self.schema, "long_tag")
pd.testing.assert_series_equal(result, expected_series)
convert_to_form(series, self.schema, "long_tag")
pd.testing.assert_series_equal(series, expected_series)

def test_convert_to_form_multiple_tags_short(self):
df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
expected_df = pd.DataFrame({"column1": ["Azure,Nose,Acceleration/4.5 m-per-s^2"]})
result = convert_to_form(df, self.schema, "short_tag", ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
convert_to_form(df, self.schema, "short_tag", ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_convert_to_form_multiple_tags_long(self):
df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Anatomical-item/Body-part/Head/Face/Nose,Rate-of-change/Acceleration/4.5 m-per-s^2"]})
expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Item/Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
result = convert_to_form(df, self.schema, "long_tag", ['column1'])
pd.testing.assert_frame_equal(result, expected_df)
convert_to_form(df, self.schema, "long_tag", ['column1'])
pd.testing.assert_frame_equal(df, expected_df)

def test_basic_expand_detection(self):
# all simple cases with no duplicates
Expand Down
4 changes: 2 additions & 2 deletions tests/models/test_sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def test_set_hed_strings(self):

for column_data in sidecar:
hed_strings = column_data.get_hed_strings()
hed_strings = df_util.convert_to_form(hed_strings, self.hed_schema, "long_tag")
df_util.convert_to_form(hed_strings, self.hed_schema, "long_tag")
column_data.set_hed_strings(hed_strings)
sidecar_long = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/long_tag_test.json"))
self.assertEqual(sidecar.loaded_dict, sidecar_long.loaded_dict)
Expand All @@ -151,7 +151,7 @@ def test_set_hed_strings(self):

for column_data in sidecar:
hed_strings = column_data.get_hed_strings()
hed_strings = df_util.convert_to_form(hed_strings, self.hed_schema, "short_tag")
df_util.convert_to_form(hed_strings, self.hed_schema, "short_tag")
column_data.set_hed_strings(hed_strings)
sidecar_short = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/short_tag_test.json"))
self.assertEqual(sidecar.loaded_dict, sidecar_short.loaded_dict)
Expand Down

0 comments on commit 5668230

Please sign in to comment.