Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improve error handling of table #308

Merged
merged 20 commits into from
May 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
f56ceb3
refactor: Added and specified Errors in `Table`
Marsmaennchen221 May 12, 2023
5137707
split, remove cloumns
robmeth May 12, 2023
0c73682
slice rows
robmeth May 12, 2023
b713c92
test: Added tests for Error in `Table.plot_lineplot` and `Table.plot_…
Marsmaennchen221 May 12, 2023
e282f1d
Merge branch '147-improve-error-handling-of-table' of https://github.…
Marsmaennchen221 May 12, 2023
005b3c8
check error messages
robmeth May 19, 2023
188a403
test: Tests from `Table` now assert the exact error messages
Marsmaennchen221 May 19, 2023
6f18c7f
Merge branch '147-improve-error-handling-of-table' of https://github.…
Marsmaennchen221 May 19, 2023
af62233
Merge branch 'main' of https://github.com/Safe-DS/Stdlib into 147-imp…
Marsmaennchen221 May 19, 2023
ba2858a
refactor: Fixed RUFF Linter problems
Marsmaennchen221 May 19, 2023
150e417
style: apply automated linter fixes
megalinter-bot May 19, 2023
61964a1
style: apply automated linter fixes
megalinter-bot May 19, 2023
0b1ba0c
feat: Changed `Table.__eq__` to handle empty tables with no columns
Marsmaennchen221 May 19, 2023
c473e34
Merge branch '147-improve-error-handling-of-table' of https://github.…
Marsmaennchen221 May 19, 2023
fe5a541
feat: Changed `Table.__eq__` to handle empty tables with no columns
Marsmaennchen221 May 19, 2023
ef92428
perf: Improved performance to not sort empty tables in the `Table.__e…
Marsmaennchen221 May 19, 2023
fb4c3a4
Update src/safeds/data/tabular/containers/_table.py
Marsmaennchen221 May 24, 2023
1da7952
Update src/safeds/data/tabular/containers/_table.py
Marsmaennchen221 May 24, 2023
c2492da
Merge branch 'main' into 147-improve-error-handling-of-table
PhilipGutberlet May 25, 2023
f6077b5
Merge branch 'main' into 147-improve-error-handling-of-table
PhilipGutberlet May 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 98 additions & 27 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
NonNumericColumnError,
SchemaMismatchError,
UnknownColumnNameError,
WrongFileExtensionError,
)

from ._column import Column
Expand Down Expand Up @@ -92,9 +93,12 @@ def from_csv_file(path: str | Path) -> Table:
------
FileNotFoundError
If the specified file does not exist.
ValueError
If the file could not be read.
WrongFileExtensionError
If the file is not a csv file.
"""
path = Path(path)
if path.suffix != ".csv":
raise WrongFileExtensionError(path, ".csv")
try:
return Table._from_pandas_dataframe(pd.read_csv(path))
except FileNotFoundError as exception:
Expand All @@ -105,6 +109,8 @@ def from_excel_file(path: str | Path) -> Table:
"""
Read data from an Excel file into a table.

Valid file extensions are `.xls`, '.xlsx', `.xlsm`, `.xlsb`, `.odf`, `.ods` and `.odt`.

Parameters
----------
path : str | Path
Expand All @@ -119,9 +125,13 @@ def from_excel_file(path: str | Path) -> Table:
------
FileNotFoundError
If the specified file does not exist.
ValueError
If the file could not be read.
WrongFileExtensionError
If the file is not an Excel file.
"""
path = Path(path)
excel_extensions = [".xls", ".xlsx", ".xlsm", ".xlsb", ".odf", ".ods", ".odt"]
if path.suffix not in excel_extensions:
raise WrongFileExtensionError(path, excel_extensions)
try:
return Table._from_pandas_dataframe(
pd.read_excel(path, engine="openpyxl", usecols=lambda colname: "Unnamed" not in colname),
Expand All @@ -148,9 +158,12 @@ def from_json_file(path: str | Path) -> Table:
------
FileNotFoundError
If the specified file does not exist.
ValueError
If the file could not be read.
WrongFileExtensionError
If the file is not a JSON file.
"""
path = Path(path)
if path.suffix != ".json":
raise WrongFileExtensionError(path, ".json")
try:
return Table._from_pandas_dataframe(pd.read_json(path))
except FileNotFoundError as exception:
Expand Down Expand Up @@ -197,14 +210,20 @@ def from_columns(columns: list[Column]) -> Table:
------
ColumnLengthMismatchError
If any of the column sizes does not match with the others.
DuplicateColumnNameError
If multiple columns have the same name.
"""
dataframe: DataFrame = pd.DataFrame()
column_names = []

for column in columns:
if column._data.size != columns[0]._data.size:
raise ColumnLengthMismatchError(
"\n".join(f"{column.name}: {column._data.size}" for column in columns),
)
if column.name in column_names:
raise DuplicateColumnNameError(column.name)
column_names.append(column.name)
dataframe[column.name] = column._data

return Table._from_pandas_dataframe(dataframe)
Expand Down Expand Up @@ -328,6 +347,8 @@ def __eq__(self, other: Any) -> bool:
return NotImplemented
if self is other:
return True
if self.number_of_rows == 0 and other.number_of_rows == 0:
return self.column_names == other.column_names
table1 = self.sort_columns()
table2 = other.sort_columns()
return table1._schema == table2._schema and table1._data.equals(table2._data)
Expand Down Expand Up @@ -463,7 +484,7 @@ def get_column_type(self, column_name: str) -> ColumnType:

Raises
------
ColumnNameError
UnknownColumnNameError
If the specified target column name does not exist.
"""
return self._schema.get_column_type(column_name)
Expand Down Expand Up @@ -627,6 +648,10 @@ def add_row(self, row: Row) -> Table:
table : Table
A new table with the added row at the end.

Raises
------
SchemaMismatchError
If the schema of the row does not match the table schema.
"""
if self._schema != row.schema:
raise SchemaMismatchError
Expand All @@ -650,6 +675,11 @@ def add_rows(self, rows: list[Row] | Table) -> Table:
-------
result : Table
A new table which combines the original table and the given rows.

Raises
------
SchemaMismatchError
If the schema of on of the row does not match the table schema.
"""
if isinstance(rows, Table):
rows = rows.to_rows()
Expand Down Expand Up @@ -705,7 +735,7 @@ def keep_only_columns(self, column_names: list[str]) -> Table:

Raises
------
ColumnNameError
UnknownColumnNameError
If any of the given columns does not exist.
"""
invalid_columns = []
Expand Down Expand Up @@ -737,7 +767,7 @@ def remove_columns(self, column_names: list[str]) -> Table:

Raises
------
ColumnNameError
UnknownColumnNameError
If any of the given columns does not exist.
"""
invalid_columns = []
Expand Down Expand Up @@ -851,7 +881,7 @@ def rename_column(self, old_name: str, new_name: str) -> Table:

Raises
------
ColumnNameError
UnknownColumnNameError
If the specified old target column name does not exist.
DuplicateColumnNameError
If the specified new target column name already exists.
Expand Down Expand Up @@ -960,7 +990,7 @@ def slice_rows(

Raises
------
ValueError
IndexOutOfBoundsError
If the index is out of bounds.
"""
if start is None:
Expand All @@ -969,8 +999,10 @@ def slice_rows(
if end is None:
end = self.number_of_rows

if start < 0 or end < 0 or start >= self.number_of_rows or end > self.number_of_rows or end < start:
raise ValueError("The given index is out of bounds")
if end < start:
raise IndexOutOfBoundsError(slice(start, end))
if start < 0 or end < 0 or start > self.number_of_rows or end > self.number_of_rows:
raise IndexOutOfBoundsError(start if start < 0 or start > self.number_of_rows else end)

new_df = self._data.iloc[start:end:step]
new_df.columns = self._schema.column_names
Expand Down Expand Up @@ -1053,10 +1085,13 @@ def split(self, percentage_in_first: float) -> tuple[Table, Table]:
A tuple containing the two resulting tables. The first table has the specified size, the second table
contains the rest of the data.


Raises
------
ValueError:
if the 'percentage_in_first' is not between 0 and 1
"""
if percentage_in_first <= 0 or percentage_in_first >= 1:
raise ValueError("the given percentage is not in range")
if percentage_in_first < 0 or percentage_in_first > 1:
raise ValueError("The given percentage is not between 0 and 1")
return (
self.slice_rows(0, round(percentage_in_first * self.number_of_rows)),
self.slice_rows(round(percentage_in_first * self.number_of_rows)),
Expand All @@ -1079,6 +1114,13 @@ def tag_columns(self, target_name: str, feature_names: list[str] | None = None)
-------
tagged_table : TaggedTable
A new tagged table with the given target and feature names.

Raises
------
ValueError
If the target column is also a feature column.
ValueError
If no feature columns are specified.
"""
from ._tagged_table import TaggedTable

Expand Down Expand Up @@ -1241,10 +1283,11 @@ def plot_lineplot(self, x_column_name: str, y_column_name: str) -> Image:
UnknownColumnNameError
If either of the columns do not exist.
"""
if not self.has_column(x_column_name):
raise UnknownColumnNameError([x_column_name])
if not self.has_column(y_column_name):
raise UnknownColumnNameError([y_column_name])
if not self.has_column(x_column_name) or not self.has_column(y_column_name):
raise UnknownColumnNameError(
([x_column_name] if not self.has_column(x_column_name) else [])
+ ([y_column_name] if not self.has_column(y_column_name) else []),
)

fig = plt.figure()
ax = sns.lineplot(
Expand Down Expand Up @@ -1288,10 +1331,11 @@ def plot_scatterplot(self, x_column_name: str, y_column_name: str) -> Image:
UnknownColumnNameError
If either of the columns do not exist.
"""
if not self.has_column(x_column_name):
raise UnknownColumnNameError([x_column_name])
if not self.has_column(y_column_name):
raise UnknownColumnNameError([y_column_name])
if not self.has_column(x_column_name) or not self.has_column(y_column_name):
raise UnknownColumnNameError(
([x_column_name] if not self.has_column(x_column_name) else [])
+ ([y_column_name] if not self.has_column(y_column_name) else []),
)

fig = plt.figure()
ax = sns.scatterplot(
Expand Down Expand Up @@ -1399,8 +1443,16 @@ def to_csv_file(self, path: str | Path) -> None:
----------
path : str | Path
The path to the output file.

Raises
------
WrongFileExtensionError
If the file is not a csv file.
"""
Path(path).parent.mkdir(parents=True, exist_ok=True)
path = Path(path)
if path.suffix != ".csv":
raise WrongFileExtensionError(path, ".csv")
path.parent.mkdir(parents=True, exist_ok=True)
data_to_csv = self._data.copy()
data_to_csv.columns = self._schema.column_names
data_to_csv.to_csv(path, index=False)
Expand All @@ -1409,19 +1461,30 @@ def to_excel_file(self, path: str | Path) -> None:
"""
Write the data from the table into an Excel file.

Valid file extensions are `.xls`, '.xlsx', `.xlsm`, `.xlsb`, `.odf`, `.ods` and `.odt`.
If the file and/or the directories do not exist, they will be created. If the file already exists, it will be
overwritten.

Parameters
----------
path : str | Path
The path to the output file.

Raises
------
WrongFileExtensionError
If the file is not an Excel file.
"""
path = Path(path)
excel_extensions = [".xls", ".xlsx", ".xlsm", ".xlsb", ".odf", ".ods", ".odt"]
if path.suffix not in excel_extensions:
raise WrongFileExtensionError(path, excel_extensions)

# Create Excel metadata in the file
tmp_table_file = openpyxl.Workbook()
tmp_table_file.save(path)

Path(path).parent.mkdir(parents=True, exist_ok=True)
path.parent.mkdir(parents=True, exist_ok=True)
data_to_excel = self._data.copy()
data_to_excel.columns = self._schema.column_names
data_to_excel.to_excel(path)
Expand All @@ -1437,8 +1500,16 @@ def to_json_file(self, path: str | Path) -> None:
----------
path : str | Path
The path to the output file.

Raises
------
WrongFileExtensionError
If the file is not a JSON file.
"""
Path(path).parent.mkdir(parents=True, exist_ok=True)
path = Path(path)
if path.suffix != ".json":
raise WrongFileExtensionError(path, ".json")
path.parent.mkdir(parents=True, exist_ok=True)
data_to_json = self._data.copy()
data_to_json.columns = self._schema.column_names
data_to_json.to_json(path)
Expand Down
2 changes: 1 addition & 1 deletion src/safeds/data/tabular/typing/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def get_column_type(self, column_name: str) -> ColumnType:

Raises
------
ColumnNameError
UnknownColumnNameError
If the specified column name does not exist.

Examples
Expand Down
2 changes: 2 additions & 0 deletions src/safeds/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
TransformerNotFittedError,
UnknownColumnNameError,
ValueNotPresentWhenFittedError,
WrongFileExtensionError,
)
from safeds.exceptions._ml import (
DatasetContainsTargetError,
Expand All @@ -31,6 +32,7 @@
"TransformerNotFittedError",
"UnknownColumnNameError",
"ValueNotPresentWhenFittedError",
"WrongFileExtensionError",
# ML exceptions
"DatasetContainsTargetError",
"DatasetMissesFeaturesError",
Expand Down
22 changes: 21 additions & 1 deletion src/safeds/exceptions/_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pathlib import Path


class UnknownColumnNameError(KeyError):
"""
Expand Down Expand Up @@ -47,7 +52,10 @@ class IndexOutOfBoundsError(IndexError):
"""

def __init__(self, index: int | slice):
super().__init__(f"There is no element at index '{index}'.")
if isinstance(index, int):
super().__init__(f"There is no element at index '{index}'.")
else:
super().__init__(f"There is no element in the range [{index.start}, {index.stop}]")


class ColumnSizeError(Exception):
Expand Down Expand Up @@ -92,3 +100,15 @@ class ValueNotPresentWhenFittedError(Exception):

def __init__(self, value: str, column: str) -> None:
super().__init__(f"Value not present in the table the transformer was fitted on: \n{value} in column {column}.")


class WrongFileExtensionError(Exception):
"""Exception raised when the file has the wrong file extension."""

def __init__(self, file: str | Path, file_extension: str | list[str]) -> None:
super().__init__(
(
f"The file {file} has a wrong file extension. Please provide a file with the following extension(s):"
f" {file_extension}"
),
)
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ def test_should_add_column(table1: Table, column: Column, expected: Table) -> No

def test_should_raise_error_if_column_name_exists() -> None:
table1 = Table({"col1": [1, 2, 1], "col2": [1, 2, 4]})
with pytest.raises(DuplicateColumnNameError):
with pytest.raises(DuplicateColumnNameError, match=r"Column 'col1' already exists."):
table1.add_column(Column("col1", ["a", "b", "c"]))


def test_should_raise_error_if_column_size_invalid() -> None:
table1 = Table({"col1": [1, 2, 1], "col2": [1, 2, 4]})
with pytest.raises(ColumnSizeError):
with pytest.raises(ColumnSizeError, match=r"Expected a column of size 3 but got column of size 4."):
table1.add_column(Column("col3", ["a", "b", "c", "d"]))
Loading