Skip to content

Commit

Permalink
ResetFailed and ResetFailedUnit
Browse files Browse the repository at this point in the history
This PR will add bluechi the ability to reset failed all units or reset
one failed unit.

Solves: #932
Signed-off-by: Artiom Divak <adivak@redhat.com>
  • Loading branch information
ArtiomDivak authored and engelmi committed Sep 9, 2024
1 parent a890cbb commit 0184664
Show file tree
Hide file tree
Showing 17 changed files with 381 additions and 0 deletions.
16 changes: 16 additions & 0 deletions data/org.eclipse.bluechi.Node.xml
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,22 @@
<arg name="level" type="s" direction="in" />
</method>

<!--
ResetFailed:
Reset all the failed units on the node
-->
<method name="ResetFailed" />

<!--
ResetFailedUnit:
Reset the failed state of a specific unit on the node.
@name Name of the unit to reset the failed state for.
-->
<method name="ResetFailedUnit">
<arg name="name" type="s" direction="in" />
</method>


<!--
Name:
Expand Down
4 changes: 4 additions & 0 deletions data/org.eclipse.bluechi.internal.Agent.xml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@
<method name="SetLogLevel">
<arg name="level" type="s" direction="in" />
</method>
<method name="ResetFailed" />
<method name="ResetFailedUnit">
<arg name="name" type="s" direction="in" />
</method>

<signal name="JobDone">
<arg name="id" type="u" />
Expand Down
8 changes: 8 additions & 0 deletions doc/docs/api/description.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,14 @@ Object path: `/org/eclipse/bluechi/node/$name`
`ReloadUnit()`/`RestartUnit()` is similar to `StartUnit()` but can be used to reload/restart a unit instead. See
equivalent systemd methods for details.

* `ResetFailed()`

Equivalent to systemd method `ResetFailed`. This method will reset the failed state of all units on the node.

* `ResetFailedUnit(in s name)`

Equivalent to systemd method `ResetFailedUnit`. This method will reset the failed state for a specific unit on the node.

* `EnableUnitFiles(in as files, in b runtime, in b force, out b carries_install_info, out a(sss) changes);`

`EnableUnitFiles()` may be used to enable one or more units in the system (by creating symlinks to them in /etc/ or /run/).
Expand Down
2 changes: 2 additions & 0 deletions src/agent/agent.c
Original file line number Diff line number Diff line change
Expand Up @@ -1799,6 +1799,8 @@ static const sd_bus_vtable internal_agent_vtable[] = {
SD_BUS_METHOD("EnableUnitFiles", "asbb", "ba(sss)", agent_method_passthrough_to_systemd, 0),
SD_BUS_METHOD("DisableUnitFiles", "asb", "a(sss)", agent_method_passthrough_to_systemd, 0),
SD_BUS_METHOD("Reload", "", "", agent_method_passthrough_to_systemd, 0),
SD_BUS_METHOD("ResetFailed", "", "", agent_method_passthrough_to_systemd, 0),
SD_BUS_METHOD("ResetFailedUnit", "s", "", agent_method_passthrough_to_systemd, 0),
SD_BUS_VTABLE_END
};

Expand Down
17 changes: 17 additions & 0 deletions src/bindings/python/bluechi/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,23 @@ def reload_unit(self, name: str, mode: str) -> ObjPath:
mode,
)

def reset_failed(self) -> None:
"""
ResetFailed:
Reset all the failed units on the node
"""
self.get_proxy().ResetFailed()

def reset_failed_unit(self, name: str) -> None:
"""
ResetFailedUnit:
Reset the failed state of a specific unit on the node.
@name Name of the unit to reset the failed state for.
"""
self.get_proxy().ResetFailedUnit(
name,
)

def restart_unit(self, name: str, mode: str) -> ObjPath:
"""
RestartUnit:
Expand Down
1 change: 1 addition & 0 deletions src/client/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ const Method methods[] = {
{ "thaw", 2, 2, OPT_NONE, method_thaw, usage_bluechi },
{ "restart", 2, 2, OPT_NONE, method_restart, usage_bluechi },
{ "reload", 2, 2, OPT_NONE, method_reload, usage_bluechi },
{ "reset-failed", 0, ARG_ANY, OPT_NONE, method_reset_failed, usage_bluechi },
{ "monitor", 0, 2, OPT_NONE, method_monitor, usage_bluechi },
{ "metrics", 1, 1, OPT_NONE, method_metrics, usage_bluechi },
{ "enable", 2, ARG_ANY, OPT_FORCE | OPT_RUNTIME | OPT_NO_RELOAD, method_enable, usage_bluechi },
Expand Down
2 changes: 2 additions & 0 deletions src/client/method-help.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ void usage_bluechi() {
printf(" usage: reload nodename unitname\n");
printf(" - restart: restarts a specific systemd service (or timer, or slice) on a specific node\n");
printf(" usage: restart nodename unitname\n");
printf(" - reset-failed: reset failed node on all node or all units on a specific node or specidec units on a node \n");
printf(" usage: reset-failed [nodename] [unit1, unit2 ...]\n");
printf(" - enable: enables the specified systemd files on a specific node\n");
printf(" usage: enable nodename unitfilename...\n");
printf(" - disable: disables the specified systemd files on a specific node\n");
Expand Down
117 changes: 117 additions & 0 deletions src/client/method-unit-actions.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,113 @@ static int method_thaw_unit_on(Client *client, char *node_name, char *unit) {
return 0;
}

static int method_reset_failed_on_units(Client *client, Command *commands) {
char *node_name = commands->opargv[0];
int units_count = 1;
int r = 0;
_cleanup_sd_bus_error_ sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_sd_bus_message_ sd_bus_message *result = NULL;

r = assemble_object_path_string(NODE_OBJECT_PATH_PREFIX, node_name, &client->object_path);
if (r < 0) {
return r;
}

while (commands->opargv[units_count] != NULL) {
printf("%i %s\n", units_count, commands->opargv[units_count]);
r = sd_bus_call_method(
client->api_bus,
BC_INTERFACE_BASE_NAME,
client->object_path,
NODE_INTERFACE,
"ResetFailedUnit",
&error,
&result,
"s",
commands->opargv[units_count]);

if (r < 0) {
fprintf(stderr, "Failed to issue method call: %s\n", error.message);
return r;
}
units_count++;
}
return r;
}

static int method_reset_failed_on_node(Client *client, char *node_name) {

_cleanup_sd_bus_error_ sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_sd_bus_message_ sd_bus_message *result = NULL;
int r = 0;

r = assemble_object_path_string(NODE_OBJECT_PATH_PREFIX, node_name, &client->object_path);
if (r < 0) {
return r;
}

r = sd_bus_call_method(
client->api_bus,
BC_INTERFACE_BASE_NAME,
client->object_path,
NODE_INTERFACE,
"ResetFailed",
&error,
&result,
"");
if (r < 0) {
fprintf(stderr,
"Couldn't reset failed state of all units on node '%s': %s\n",
node_name,
error.message);
return r;
}

return r;
}

static int method_reset_failed_on_all_nodes(Client *client) {

int r = 0;

_cleanup_sd_bus_error_ sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_sd_bus_message_ sd_bus_message *result = NULL;
r = sd_bus_call_method(
client->api_bus,
BC_INTERFACE_BASE_NAME,
BC_OBJECT_PATH,
CONTROLLER_INTERFACE,
"ListNodes",
&error,
&result,
"",
NULL);
if (r < 0) {
fprintf(stderr, "Failed to issue method call: %s\n", error.message);
return r;
}

r = sd_bus_message_enter_container(result, SD_BUS_TYPE_ARRAY, "(soss)");
if (r < 0) {
fprintf(stderr, "Failed to open result array: %s\n", strerror(-r));
return r;
}
while (sd_bus_message_at_end(result, false) == 0) {
const char *name = NULL;

r = sd_bus_message_read(result, "(soss)", &name, NULL, NULL, NULL);
if (r < 0) {
fprintf(stderr, "Failed to read node information: %s\n", strerror(-r));
return r;
}

_cleanup_free_ char *node_name = strdup(name);
r = method_reset_failed_on_node(client, node_name);
}

return r;
}

int method_start(Command *command, void *userdata) {
return method_lifecycle_action_on(userdata, command->opargv[0], command->opargv[1], "StartUnit");
}
Expand All @@ -332,6 +439,16 @@ int method_thaw(Command *command, void *userdata) {
return method_thaw_unit_on(userdata, command->opargv[0], command->opargv[1]);
}

int method_reset_failed(Command *command, void *userdata) {
if (command->opargv[0] != NULL) {
if (command->opargv[1] != NULL) {
return method_reset_failed_on_units(userdata, command);
}
return method_reset_failed_on_node(userdata, command->opargv[0]);
}
return method_reset_failed_on_all_nodes(userdata);
}

int method_enable(Command *command, void *userdata) {
int r = 0;
r = method_enable_unit_on(
Expand Down
1 change: 1 addition & 0 deletions src/client/method-unit-actions.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
int method_start(Command *command, void *userdata);
int method_stop(Command *command, void *userdata);
int method_restart(Command *command, void *userdata);
int method_reset_failed(Command *command, void *userdata);
int method_reload(Command *command, void *userdata);
int method_freeze(Command *command, void *userdata);
int method_thaw(Command *command, void *userdata);
Expand Down
2 changes: 2 additions & 0 deletions src/controller/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ static const sd_bus_vtable node_vtable[] = {
SD_BUS_METHOD("ThawUnit", "s", "", node_method_passthrough_to_agent, 0),
SD_BUS_METHOD("RestartUnit", "ss", "o", node_method_restart_unit, 0),
SD_BUS_METHOD("ReloadUnit", "ss", "o", node_method_reload_unit, 0),
SD_BUS_METHOD("ResetFailed", "", "", node_method_passthrough_to_agent, 0),
SD_BUS_METHOD("ResetFailedUnit", "s", "", node_method_passthrough_to_agent, 0),
SD_BUS_METHOD("GetUnitProperties", "ss", "a{sv}", node_method_passthrough_to_agent, 0),
SD_BUS_METHOD("GetUnitProperty", "sss", "v", node_method_passthrough_to_agent, 0),
SD_BUS_METHOD("SetUnitProperties", "sba(sv)", "", node_method_set_unit_properties, 0),
Expand Down
15 changes: 15 additions & 0 deletions tests/bluechi_test/bluechictl.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,21 @@ def reload_unit(
expected_result,
)

def reset_failed(
self,
node_name: str = "",
units: List[str] = [],
check_result: bool = True,
expected_result: int = 0,
) -> Tuple[Optional[int], Union[Iterator[bytes], Any, Tuple[bytes, bytes]]]:
cmd = f"reset-failed {node_name} {' '.join(units)}".strip()
return self._run(
f"ResetFailed on node {node_name} for units {units}",
cmd,
check_result,
expected_result,
)

def stop_unit(
self,
node_name: str,
Expand Down
2 changes: 2 additions & 0 deletions tests/tests/tier0/bluechi-reset-failed-node/main.fmf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
summary: Test resetting the failed state of all units on a node via D-Bus API call.
id: fc5db6a7-1367-450b-b674-35b9541d4f3e
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#
# Copyright Contributors to the Eclipse BlueChi project
#
# SPDX-License-Identifier: LGPL-2.1-or-later

import logging
from typing import Dict

from bluechi_test.config import BluechiAgentConfig, BluechiControllerConfig
from bluechi_test.machine import BluechiAgentMachine, BluechiControllerMachine
from bluechi_test.service import Option, Section, SimpleRemainingService
from bluechi_test.test import BluechiTest

LOGGER = logging.getLogger(__name__)
NODE_FOO = "node-foo"
NODE_BAR = "node-bar"


def exec(ctrl: BluechiControllerMachine, nodes: Dict[str, BluechiAgentMachine]):

node_foo = nodes[NODE_FOO]
node_bar = nodes[NODE_BAR]

failed_service_node_bar = SimpleRemainingService(name="simple.service")
failed_service_node_bar.set_option(Section.Service, Option.ExecStart, "/s")
failed_service_node_foo = SimpleRemainingService(name="simple.service")
failed_service_node_foo.set_option(Section.Service, Option.ExecStart, "/s")

node_foo.install_systemd_service(failed_service_node_bar)
node_bar.install_systemd_service(failed_service_node_foo)

ctrl.bluechictl.start_unit(NODE_FOO, failed_service_node_foo.name)
ctrl.bluechictl.start_unit(NODE_BAR, failed_service_node_bar.name)

assert node_foo.wait_for_unit_state_to_be(failed_service_node_foo.name, "failed")
assert node_bar.wait_for_unit_state_to_be(failed_service_node_bar.name, "failed")

ctrl.bluechictl.reset_failed(node_name=NODE_FOO)

assert node_foo.wait_for_unit_state_to_be(failed_service_node_foo.name, "inactive")
assert node_bar.wait_for_unit_state_to_be(failed_service_node_bar.name, "failed")


def test_bluechi_reset_failed_node(
bluechi_test: BluechiTest,
bluechi_node_default_config: BluechiAgentConfig,
bluechi_ctrl_default_config: BluechiControllerConfig,
):
node_foo_cfg = bluechi_node_default_config.deep_copy()
node_foo_cfg.node_name = NODE_FOO

node_bar_cfg = bluechi_node_default_config.deep_copy()
node_bar_cfg.node_name = NODE_BAR

bluechi_test.add_bluechi_agent_config(node_foo_cfg)
bluechi_test.add_bluechi_agent_config(node_bar_cfg)

bluechi_ctrl_default_config.allowed_node_names = [NODE_FOO, NODE_BAR]
bluechi_test.set_bluechi_controller_config(bluechi_ctrl_default_config)

bluechi_test.run(exec)
2 changes: 2 additions & 0 deletions tests/tests/tier0/bluechi-reset-failed-units/main.fmf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
summary: Test resetting the failed state of a specific on a node via D-Bus API call.
id: 6c67e96d-db4b-4a54-980c-287e33450abf
Loading

0 comments on commit 0184664

Please sign in to comment.