Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix to_excel function for new system #667

Merged
merged 1 commit into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,22 +206,33 @@ def to_excel(self, file, output_assembled=False):
raise ValueError("Empty file name or object passed in to BaseInput.save.")

dataframe = self._dataframe
old_columns = dataframe.columns

if output_assembled:
dataframe = self.dataframe_a
new_columns = dataframe.columns
else:
new_columns = old_columns

if self._loaded_workbook:
column_mapping = {} # assembled dataframe column number to original worksheet number
for new_c, column in enumerate(new_columns):
for old_c, old_column in enumerate(old_columns):
if column == old_column:
column_mapping[new_c] = old_c

old_worksheet = self.get_worksheet(self._worksheet_name)
# Excel spreadsheets are 1 based, then add another 1 for column names if present
adj_row_for_col_names = 1
if self._has_column_names:
adj_row_for_col_names += 1
adj_for_one_based_cols = 1
for row_number, text_file_row in dataframe.iterrows():
for column_number, column_text in enumerate(text_file_row):
for row_number in range(len(dataframe)):
for df_column_number, ws_column_number in column_mapping.items():
cell_value = dataframe.iat[row_number, df_column_number]

old_worksheet.cell(row_number + adj_row_for_col_names,
column_number + adj_for_one_based_cols).value = \
dataframe.iloc[row_number, column_number]
ws_column_number + adj_for_one_based_cols).value = cell_value
self._loaded_workbook.save(file)
else:
dataframe.to_excel(file, header=self._has_column_names)
Expand Down
57 changes: 51 additions & 6 deletions tests/models/test_spreadsheet_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
import pandas as pd


# TODO: Add tests about correct handling of 'n/a'


class Test(unittest.TestCase):
@classmethod
def setUpClass(cls):
Expand All @@ -20,7 +17,7 @@ def setUpClass(cls):
hed_xml_file = os.path.join(base, "schema_tests/HED8.0.0t.xml")
cls.hed_schema = schema.load_schema(hed_xml_file)
default = os.path.join(os.path.dirname(os.path.realpath(__file__)),
"../data/validator_tests/ExcelMultipleSheets.xlsx")
"../data/spreadsheet_validator_tests/ExcelMultipleSheets.xlsx")
cls.default_test_file_name = default
cls.generic_file_input = SpreadsheetInput(default)
base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/")
Expand Down Expand Up @@ -186,7 +183,6 @@ def test_definitions_identified(self):
'../data/model_tests/no_column_header_definition.tsv')
hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1])


def test_loading_dataframe_directly(self):
ds_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../data/model_tests/no_column_header_definition.tsv')
Expand All @@ -209,9 +205,58 @@ def test_ignoring_na_value_column(self):
events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../data/model_tests/na_value_column.tsv')
sidecar_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../data/model_tests/na_value_column.json')
'../data/model_tests/na_value_column.json')
hed_input = TabularInput(events_path, sidecar=sidecar_path)
self.assertTrue(hed_input.dataframe_a.loc[1, 'Value'] == 'n/a')

def test_to_excel_workbook(self):
excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3",
tag_columns=["HED tags"])
test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled.xlsx"
excel_book.convert_to_long(self.hed_schema)
excel_book.to_excel(test_output_name, True)
reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3")

self.assertTrue(excel_book.dataframe.equals(reloaded_df.dataframe))

excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3",
tag_columns=["HED tags"],
column_prefix_dictionary={
"Short label": "Label/",
"Description in text": "Description"
})
test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled_prefix.xlsx"
excel_book.convert_to_long(self.hed_schema)
excel_book.to_excel(test_output_name, True)
reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3",
tag_columns=["Short label", "Description in text", "HED tags"])

self.assertTrue(excel_book.dataframe_a.equals(reloaded_df.dataframe_a))

def test_to_excel_workbook_no_col_names(self):
excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3",
tag_columns=[4], has_column_names=False)
test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled_no_col_names.xlsx"
excel_book.convert_to_long(self.hed_schema)
excel_book.to_excel(test_output_name, True)
reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3", tag_columns=[4],
has_column_names=False)
self.assertTrue(excel_book.dataframe.equals(reloaded_df.dataframe))

excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3", has_column_names=False,
tag_columns=[4],
column_prefix_dictionary={
1: "Label/",
3: "Description"
})
test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled_prefix.xlsx"
excel_book.convert_to_long(self.hed_schema)
excel_book.to_excel(test_output_name, True)
reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3", tag_columns=[1, 3, 4],
has_column_names=False)

self.assertTrue(excel_book.dataframe_a.equals(reloaded_df.dataframe_a))


if __name__ == '__main__':
unittest.main()