Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add some df tests. Update hed_assemble. Make the df utils also work on series. #625

Merged
merged 1 commit into from
Mar 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 33 additions & 21 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from functools import partial
import pandas as pd

from hed.models.sidecar import Sidecar
from hed.models.tabular_input import TabularInput
Expand Down Expand Up @@ -51,7 +52,7 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
for x in text_file_row] for text_file_row in tabular_file.dataframe_a.itertuples(index=False)], def_dict


def convert_to_form(df, hed_schema, tag_form, columns):
def convert_to_form(df, hed_schema, tag_form, columns=None):
""" Convert all tags in underlying dataframe to the specified form.

Converts in place
Expand All @@ -61,51 +62,62 @@ def convert_to_form(df, hed_schema, tag_form, columns):
tag_form(str): HedTag property to convert tags to.
columns (list): The columns to modify on the dataframe
"""
if columns is None:
columns = df.columns
if isinstance(df, pd.Series):
df = df.apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
else:
if columns is None:
columns = df.columns

for column in columns:
df[column] = df[column].apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
for column in columns:
df[column] = df[column].apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))

return df


def shrink_defs(df, hed_schema, columns):
def shrink_defs(df, hed_schema, columns=None):
""" Shrinks any def-expand tags found in the dataframe.

Converts in place
Parameters:
df (pd.Dataframe): The dataframe to modify
df (pd.Dataframe or pd.Series): The dataframe or series to modify
hed_schema (HedSchema or None): The schema to use to identify defs.
columns (list): The columns to modify on the dataframe
columns (list or None): The columns to modify on the dataframe
"""
if columns is None:
columns = df.columns
if isinstance(df, pd.Series):
mask = df.str.contains('Def-expand/', case=False)
df[mask] = df[mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
else:
if columns is None:
columns = df.columns

for column in columns:
mask = df[column].str.contains('Def-expand/', case=False)
df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
for column in columns:
mask = df[column].str.contains('Def-expand/', case=False)
df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema))

return df


def expand_defs(df, hed_schema, def_dict, columns):
def expand_defs(df, hed_schema, def_dict, columns=None):
""" Expands any def tags found in the dataframe.

Converts in place

Parameters:
df (pd.Dataframe): The dataframe to modify
df (pd.Dataframe or pd.Series): The dataframe or series to modify
hed_schema (HedSchema or None): The schema to use to identify defs
def_dict (DefinitionDict): The definitions to expand
columns (list): The columns to modify on the dataframe
columns (list or None): The columns to modify on the dataframe
"""
if columns is None:
columns = df.columns
if isinstance(df, pd.Series):
mask = df.str.contains('Def/', case=False)
df[mask] = df[mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
else:
if columns is None:
columns = df.columns

for column in columns:
mask = df[column].str.contains('Def/', case=False)
df[column][mask] = df[column][mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
for column in columns:
mask = df[column].str.contains('Def/', case=False)
df[column][mask] = df[column][mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))

return df

Expand Down
7 changes: 5 additions & 2 deletions hed/tools/analysis/analysis_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from hed.tools.util.data_util import separate_values
from hed.models.hed_tag import HedTag
from hed.models.hed_group import HedGroup
from hed.models.df_util import get_assembled, expand_defs
from hed.models import df_util


def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs=False):
Expand All @@ -29,7 +29,10 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs
hed_string_list = data_input.series_a
definitions = sidecar.get_def_dict(hed_schema=schema)
if expand_defs:
expand_defs(hed_string_list, schema, definitions, columns=None)
df_util.expand_defs(hed_string_list, schema, definitions)
# Keep in mind hed_string_list is now a Series. The rest of the function should probably
# also be modified

# hed_obj_list, defs = get_assembled(data_input, sidecar, schema, extra_def_dicts=None, join_columns=True,
# shrink_defs=False, expand_defs=True)
# hed_string_list = [str(hed) for hed in hed_obj_list]
Expand Down
114 changes: 114 additions & 0 deletions tests/models/test_df_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import unittest
import pandas as pd


from hed import load_schema_version
from hed.models.df_util import shrink_defs, expand_defs
from hed import DefinitionDict


class TestShrinkDefs(unittest.TestCase):
def setUp(self):
self.schema = load_schema_version()

def test_shrink_defs_normal(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_shrink_defs_placeholder(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_shrink_defs_no_matching_tags(self):
df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Age/25)"]})
expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Age/25)"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_shrink_defs_multiple_columns(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"],
"column2": ["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"],
"column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
result = shrink_defs(df, self.schema, ['column1', 'column2'])
pd.testing.assert_frame_equal(result, expected_df)

def test_shrink_defs_multiple_defs_same_line(self):
df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Age/30"]})
expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Def/TestDefPlaceholder/123,Age/30"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_shrink_defs_mixed_tags(self):
df = pd.DataFrame({"column1": [
"(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent,(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem,Age/25"]})
expected_df = pd.DataFrame(
{"column1": ["Def/TestDefNormal,Event/SomeEvent,Def/TestDefPlaceholder/123,Item/SomeItem,Age/25"]})
result = shrink_defs(df, self.schema, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_shrink_defs_series_normal(self):
series = pd.Series(["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"])
expected_series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"])
result = shrink_defs(series, self.schema, None)
pd.testing.assert_series_equal(result, expected_series)

def test_shrink_defs_series_placeholder(self):
series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"])
expected_series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
result = shrink_defs(series, self.schema, None)
pd.testing.assert_series_equal(result, expected_series)


class TestExpandDefs(unittest.TestCase):
def setUp(self):
self.schema = load_schema_version()
self.def_dict = DefinitionDict(["(Definition/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2))",
"(Definition/TestDefPlaceholder/#,(Action/TestDef1/#,Action/TestDef2))"],
hed_schema=self.schema)

def test_expand_defs_normal(self):
df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]})
expected_df = pd.DataFrame(
{"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_expand_defs_placeholder(self):
df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
expected_df = pd.DataFrame({"column1": [
"(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_expand_defs_no_matching_tags(self):
df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Age/25)"]})
expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Age/25)"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1'])
pd.testing.assert_frame_equal(result, expected_df)

def test_expand_defs_multiple_columns(self):
df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"],
"column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
expected_df = pd.DataFrame(
{"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"],
"column2": [
"(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]})
result = expand_defs(df, self.schema, self.def_dict, ['column1', 'column2'])
pd.testing.assert_frame_equal(result, expected_df)

def test_expand_defs_series_normal(self):
series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"])
expected_series = pd.Series(["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"])
result = expand_defs(series, self.schema, self.def_dict, None)
pd.testing.assert_series_equal(result, expected_series)

def test_expand_defs_series_placeholder(self):
series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"])
result = expand_defs(series, self.schema, self.def_dict, None)
pd.testing.assert_series_equal(result, expected_series)