Skip to content

Commit

Permalink
better error handling (#60)
Browse files Browse the repository at this point in the history
  • Loading branch information
goFrendiAsgard authored Dec 3, 2023
1 parent 42c3a70 commit f191f0d
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 50 deletions.
40 changes: 32 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,21 +83,24 @@ download_dataset = CmdTask(
)

# 📊 Define a task named `show-stat` to show the statistics properties of the dataset.
# We use `@python_task` decorator since this task is better written in Python.
# This tasks depends on our previous tasks, `download_dataset` and `install_pandas`
# If this task failed, then it is failed. No need to retry
# @python_task` decorator turns a function into a Zrb Task (i.e., `show_stat` is now a Zrb Task).
# If this task failed, we don't want to retry
# We also want to register the task so that it is accessible from the CLI
@python_task(
name='show-stat',
upstreams=[download_dataset, install_pandas],
retry=0
)
def show_stat(*args, **kwargs):
import pandas as pd
df = pd.read_csv('dataset.csv')
return df.describe()

# Register `show_stat`, so that the task is accessible from the CLI (i.e., zrb show-stat)
runner.register(show_stat)
# Define dependencies: `show_stat` depends on both, `download_dataset` and `install_pandas`
download_dataset >> show_stat
install_pandas >> show_stat

# Register the tasks so that they are accessbie from the CLI
runner.register(install_pandas, download_dataset, show_stat)
```

> __📝 NOTE:__ It is possible (although less readable) to define `show_stat` as `CmdTask`:
Expand All @@ -107,7 +110,6 @@ runner.register(show_stat)
> ```python
> show_stat = CmdTask(
> name='show-stat',
> upstreams=[download_dataset, install_pandas],
> cmd='python -c "import pandas as pd; df=pd.read_csv(\'dataset.csv\'); print(df.describe())"',
> retry=0
> )
Expand All @@ -121,8 +123,23 @@ Once you write the definitions, Zrb will automatically load your `zrb_init.py` s
zrb show-stat
```
The command will give you the statistics property of the dataset:

```
sepal_length sepal_width petal_length petal_width
count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.054000 3.758667 1.198667
std 0.828066 0.433594 1.764420 0.763161
min 4.300000 2.000000 1.000000 0.100000
25% 5.100000 2.800000 1.600000 0.300000
50% 5.800000 3.000000 4.350000 1.300000
75% 6.400000 3.300000 5.100000 1.800000
max 7.900000 4.400000 6.900000 2.500000
```

<details>
<summary>Show output</summary>
<summary>See the full output</summary>

```
Url [https://raw.githubusercontent.com/state-alchemists/datasets/main/iris.csv]:
Expand Down Expand Up @@ -165,6 +182,13 @@ To run again: zrb project show-stats --url "https://raw.githubusercontent.com/st
```
</details>

Since you have registered `install_pandas` and `download_dataset` (i.e., `runner.register(install_pandas, download_dataset)`), then you can also execute those tasks as well:

```bash
zrb install-pandas
zrb download-dataset
```

> __📝 NOTE:__ When executing a task, you can also provide the parameter directly, for example you want to provide the `url`
>
> ```bash
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,39 @@ from zrb import CmdTask, runner

hello1 = CmdTask(
name='hello',
group=None,
cmd='echo "hello mars"'
)
runner.register(hello1)

hello2 = CmdTask(
name='hello',
group=None,
cmd='echo "hello world"'
)
runner.register(hello2)
```

You can see that `hello1` and `hello2` share the same name. Both of them also has the same `group` (i.e., not defined).
You can see that `hello1` and `hello2` share the same name and group.

In this case, `hello2` will override `hello1`.

This leads to tricky situation. For example, you believe that `zrb hello` should yield `hello mars`, yet it keep showing `hello world`.
The condition leads to a tricky situation since `hello2` overrides `hello1`. To avoid this situation, Zrb will throw a `ValueError` whenever it detects two tasks registered under the same name and group.

# Detecting the Problem

First of all, detecting the problem will be easier if you use the same convention to define task property:
- Use single quote instead of double quote for string value whenever possible
- Not using space between property name and property value (i.e., `name='hello'`)
You can detect the problem by reading the error message (i.e., `Task "..." has already been registered`):

```
...
File "/home/gofrendi/playground/getting-started/zrb_init.py", line 14, in <module>
runner.register(hello2)
File "<@beartype(zrb.action.runner.Runner.register) at 0x7f780fda8dc0>", line 22, in register
File "/home/gofrendi/zrb/.venv/lib/python3.10/site-packages/zrb/action/runner.py", line 35, in register
raise RuntimeError(f'Task "{cmd_name}" has already been registered')
RuntimeError: Task "zrb hello" has already been registered
```

The traceback also shows you that the cause of the error is at line `14` of `zrb_init.py` (i.e., `runner.register(hello2)`).

Once you do so, you can use `search` feature in your IDE/text editor (e.g., `name='hello'`). Make sure every task name is unique so that they don't accidentally overwrite each other.

# Avoiding the Problem

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"

[project]
name = "zrb"
version = "0.0.119"
version = "0.0.120"
authors = [
{ name="Go Frendi Gunawan", email="gofrendiasgard@gmail.com" },
]
Expand Down
77 changes: 44 additions & 33 deletions src/zrb/action/runner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from zrb.helper.typing import Any, Callable, Iterable, List, Mapping, Union
from zrb.helper.typing import Any, Callable, List, Mapping, Union
from zrb.helper.typecheck import typechecked
from zrb.helper.log import logger
from zrb.helper.accessories.color import colored
Expand All @@ -19,71 +19,82 @@ class Runner():

def __init__(self, env_prefix: str = ''):
logger.info(colored('Create runner', attrs=['dark']))
self._env_prefix = env_prefix
self._tasks: Iterable[AnyTask] = []
self._registered_groups: Mapping[str, click.Group] = {}
self._top_levels: List[CliSubcommand] = []
self._subcommands: Mapping[str, List[click.Group]] = {}
self.__env_prefix = env_prefix
self.__tasks: List[AnyTask] = []
self.__registered_groups: Mapping[str, click.Group] = {}
self.__top_levels: List[CliSubcommand] = []
self.__subcommands: Mapping[str, List[click.Group]] = {}
self.__registered_task_cmd_name: List[str] = []
logger.info(colored('Runner created', attrs=['dark']))

def register(self, task: AnyTask):
task._set_has_cli_interface()
cmd_name = task._get_full_cmd_name()
logger.debug(colored(f'Register task: {cmd_name}', attrs=['dark']))
self._tasks.append(task)
logger.debug(colored(f'Task registered: {cmd_name}', attrs=['dark']))
def register(self, *tasks: AnyTask):
for task in tasks:
task._set_has_cli_interface()
cmd_name = task._get_full_cmd_name()
if cmd_name in self.__registered_task_cmd_name:
raise RuntimeError(
f'Task "{cmd_name}" has already been registered'
)
logger.debug(
colored(f'Register task: "{cmd_name}"', attrs=['dark'])
)
self.__tasks.append(task)
self.__registered_task_cmd_name.append(cmd_name)
logger.debug(
colored(f'Task registered: "{cmd_name}"', attrs=['dark'])
)

def serve(self, cli: click.Group) -> click.Group:
for task in self._tasks:
subcommand = self._create_cli_subcommand(task)
if subcommand not in self._top_levels:
self._top_levels.append(subcommand)
for task in self.__tasks:
subcommand = self.__create_cli_subcommand(task)
if subcommand not in self.__top_levels:
self.__top_levels.append(subcommand)
cli.add_command(subcommand)
return cli

def _create_cli_subcommand(
def __create_cli_subcommand(
self, task: AnyTask
) -> Union[click.Group, click.Command]:
subcommand: CliSubcommand = self._create_cli_command(task)
subcommand: CliSubcommand = self.__create_cli_command(task)
task_group = task._group
while task_group is not None:
group = self._register_sub_command(task_group, subcommand)
group = self.__register_sub_command(task_group, subcommand)
if task_group._parent is None:
return group
subcommand = group
task_group = task_group._parent
return subcommand

def _register_sub_command(
def __register_sub_command(
self, task_group: TaskGroup, subcommand: CliSubcommand
) -> click.Group:
task_group_id = task_group.get_id()
group = self._get_cli_group(task_group)
if task_group_id not in self._subcommands:
self._subcommands[task_group_id] = []
if subcommand not in self._subcommands[task_group_id]:
group = self.__get_cli_group(task_group)
if task_group_id not in self.__subcommands:
self.__subcommands[task_group_id] = []
if subcommand not in self.__subcommands[task_group_id]:
group.add_command(subcommand)
self._subcommands[task_group_id].append(subcommand)
self.__subcommands[task_group_id].append(subcommand)
return group

def _get_cli_group(self, task_group: TaskGroup) -> click.Group:
def __get_cli_group(self, task_group: TaskGroup) -> click.Group:
task_group_id = task_group.get_id()
if task_group_id in self._registered_groups:
return self._registered_groups[task_group_id]
if task_group_id in self.__registered_groups:
return self.__registered_groups[task_group_id]
group_cmd_name = task_group.get_cmd_name()
group_description = task_group._description
group = click.Group(name=group_cmd_name, help=group_description)
self._registered_groups[task_group_id] = group
self.__registered_groups[task_group_id] = group
return group

def _create_cli_command(self, task: AnyTask) -> click.Command:
def __create_cli_command(self, task: AnyTask) -> click.Command:
task_inputs = task._get_combined_inputs()
task_cmd_name = task.get_cmd_name()
task_description = task.get_description()
task_function = task.to_function(
env_prefix=self._env_prefix, raise_error=True
env_prefix=self.__env_prefix, raise_error=True
)
callback = self._wrap_task_function(task_function)
callback = self.__wrap_task_function(task_function)
command = click.Command(
callback=callback, name=task_cmd_name, help=task_description
)
Expand All @@ -101,7 +112,7 @@ def _create_cli_command(self, task: AnyTask) -> click.Command:
command.params.append(click.Option(param_decl, **options))
return command

def _wrap_task_function(
def __wrap_task_function(
self, function: Callable[..., Any]
) -> Callable[..., Any]:
def wrapped_function(*args: Any, **kwargs: Any) -> Any:
Expand Down
4 changes: 4 additions & 0 deletions src/zrb/task/base_task/base_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ def __init__(
self.__is_execution_triggered: bool = False
self.__is_execution_started: bool = False

def __rshift__(self, other_task: AnyTask):
other_task.add_upstream(self)
return other_task

def copy(self) -> AnyTask:
return copy.deepcopy(self)

Expand Down
28 changes: 28 additions & 0 deletions test/task/test_task_shift_right_operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from zrb.task.task import Task


def test_consistent_task_shift_right_operator():
result = []
task_1 = Task(
name='task-1',
run=lambda *args, **kwargs: result.append(1)
)
task_2 = Task(
name='task-2',
run=lambda *args, **kwargs: result.append(2)
)
task_3 = Task(
name='task-3',
run=lambda *args, **kwargs: result.append(3)
)
task_4 = Task(
name='task-4',
run=lambda *args, **kwargs: result.append(4)
)
task_1 >> task_2 >> task_3 >> task_4
function = task_4.to_function()
function()
assert result[0] == 1
assert result[1] == 2
assert result[2] == 3
assert result[3] == 4

0 comments on commit f191f0d

Please sign in to comment.