Skip to content

Commit

Permalink
Merge pull request #625 from IanCa/dev_refactor
Browse files Browse the repository at this point in the history
Add some df tests.  Update hed_assemble.  Make the df utils also work on series.
  • Loading branch information
VisLab authored Mar 17, 2023
2 parents 49c3c65 + 4c79d1b commit 671e144
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 23 deletions.
54 changes: 33 additions & 21 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from functools import partial
import pandas as pd

from hed.models.sidecar import Sidecar
from hed.models.tabular_input import TabularInput
Expand Down Expand Up @@ -51,7 +52,7 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
for x in text_file_row] for text_file_row in tabular_file.dataframe_a.itertuples(index=False)], def_dict


def convert_to_form(df, hed_schema, tag_form, columns):
def convert_to_form(df, hed_schema, tag_form, columns=None):
""" Convert all tags in underlying dataframe to the specified form.
Converts in place
Expand All @@ -61,51 +62,62 @@ def convert_to_form(df, hed_schema, tag_form, columns):
tag_form(str): HedTag property to convert tags to.
columns (list): The columns to modify on the dataframe
"""
if columns is None:
columns = df.columns
if isinstance(df, pd.Series):
df = df.apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
else:
if columns is None:
columns = df.columns

for column in columns:
df[column] = df[column].apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
for column in columns:
df[column] = df[column].apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))

return df


def shrink_defs(df, hed_schema, columns):
def shrink_defs(df, hed_schema, columns=None):
""" Shrinks any def-expand tags found in the dataframe.
Converts in place
Parameters:
df (pd.Dataframe): The dataframe to modify
df (pd.Dataframe or pd.Series): The dataframe or series to modify
hed_schema (HedSchema or None): The schema to use to identify defs.
columns (list): The columns to modify on the dataframe
columns (list or None): The columns to modify on the dataframe
"""
if columns is None:
columns = df.columns
if isinstance(df, pd.Series):
mask = df.str.contains('Def-expand/', case=False)
df[mask] = df[mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
else:
if columns is None:
columns = df.columns

for column in columns:
mask = df[column].str.contains('Def-expand/', case=False)
df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
for column in columns:
mask = df[column].str.contains('Def-expand/', case=False)
df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema))

return df


def expand_defs(df, hed_schema, def_dict, columns):
def expand_defs(df, hed_schema, def_dict, columns=None):
""" Expands any def tags found in the dataframe.
Converts in place
Parameters:
df (pd.Dataframe): The dataframe to modify
df (pd.Dataframe or pd.Series): The dataframe or series to modify
hed_schema (HedSchema or None): The schema to use to identify defs
def_dict (DefinitionDict): The definitions to expand
columns (list): The columns to modify on the dataframe
columns (list or None): The columns to modify on the dataframe
"""
if columns is None:
columns = df.columns
if isinstance(df, pd.Series):
mask = df.str.contains('Def/', case=False)
df[mask] = df[mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
else:
if columns is None:
columns = df.columns

for column in columns:
mask = df[column].str.contains('Def/', case=False)
df[column][mask] = df[column][mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
for column in columns:
mask = df[column].str.contains('Def/', case=False)
df[column][mask] = df[column][mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))

return df

Expand Down
7 changes: 5 additions & 2 deletions hed/tools/analysis/analysis_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from hed.tools.util.data_util import separate_values
from hed.models.hed_tag import HedTag
from hed.models.hed_group import HedGroup
from hed.models.df_util import get_assembled, expand_defs
from hed.models import df_util


def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs=False):
Expand All @@ -29,7 +29,10 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs
hed_string_list = data_input.series_a
definitions = sidecar.get_def_dict(hed_schema=schema)
if expand_defs:
expand_defs(hed_string_list, schema, definitions, columns=None)
df_util.expand_defs(hed_string_list, schema, definitions)
# Keep in mind hed_string_list is now a Series. The rest of the function should probably
# also be modified

# hed_obj_list, defs = get_assembled(data_input, sidecar, schema, extra_def_dicts=None, join_columns=True,
# shrink_defs=False, expand_defs=True)
# hed_string_list = [str(hed) for hed in hed_obj_list]
Expand Down
114 changes: 114 additions & 0 deletions tests/models/test_df_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import unittest
import pandas as pd


from hed import load_schema_version
from hed.models.df_util import shrink_defs, expand_defs
from hed import DefinitionDict


class TestShrinkDefs(unittest.TestCase):
def setUp(self):
self.schema = load_schema_version()

def test_shrink_defs_normal(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_shrink_defs_placeholder(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_shrink_defs_no_matching_tags(self):
df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Age/25)"]})
expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Age/25)"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_shrink_defs_multiple_columns(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"],
"column2": ["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"],
"column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
result = shrink_defs(df, self.schema, ['column1', 'column2'])
pd.testing.assert_frame_equal(result, expected_df)

def test_shrink_defs_multiple_defs_same_line(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Age/30"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Def/TestDefPlaceholder/123,Age/30"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_shrink_defs_mixed_tags(self):
df = pd.DataFrame({"column1": [
"(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent,(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem,Age/25"]})
expected_df = pd.DataFrame(
{"column1": ["Def/TestDefNormal,Event/SomeEvent,Def/TestDefPlaceholder/123,Item/SomeItem,Age/25"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_shrink_defs_series_normal(self):
series = pd.Series(["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"])
expected_series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"])
result = shrink_defs(series, self.schema, None)
pd.testing.assert_series_equal(result, expected_series)

def test_shrink_defs_series_placeholder(self):
series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"])
expected_series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
result = shrink_defs(series, self.schema, None)
pd.testing.assert_series_equal(result, expected_series)


class TestExpandDefs(unittest.TestCase):
def setUp(self):
self.schema = load_schema_version()
self.def_dict = DefinitionDict(["(Definition/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2))",
"(Definition/TestDefPlaceholder/#,(Action/TestDef1/#,Action/TestDef2))"],
hed_schema=self.schema)

def test_expand_defs_normal(self):
df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]})
expected_df = pd.DataFrame(
{"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_expand_defs_placeholder(self):
df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
expected_df = pd.DataFrame({"column1": [
"(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_expand_defs_no_matching_tags(self):
df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Age/25)"]})
expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Age/25)"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_expand_defs_multiple_columns(self):
df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"],
"column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
expected_df = pd.DataFrame(
{"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"],
"column2": [
"(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1', 'column2'])
pd.testing.assert_frame_equal(result, expected_df)

def test_expand_defs_series_normal(self):
series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"])
expected_series = pd.Series(["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"])
result = expand_defs(series, self.schema, self.def_dict, None)
pd.testing.assert_series_equal(result, expected_series)

def test_expand_defs_series_placeholder(self):
series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"])
result = expand_defs(series, self.schema, self.def_dict, None)
pd.testing.assert_series_equal(result, expected_series)

0 comments on commit 671e144

Please sign in to comment.