Skip to content

Commit

Permalink
Merge pull request #1395 from webalexeu/feat/add_owner_node
Browse files Browse the repository at this point in the history
  • Loading branch information
jkroepke authored Apr 21, 2024
2 parents 72e7096 + fdf6f33 commit b1c272a
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 4 deletions.
11 changes: 9 additions & 2 deletions docs/collector.mscluster_resource.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Name | Description | Type | Labels
`IsAlivePollInterval` | Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name`
`LooksAlivePollInterval` | Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name`
`MonitorProcessId` | Provides the process ID of the resource host service that is currently hosting the resource. | gauge | `type`, `owner_group`, `name`
`OwnerNode` | The node hosting the resource. | gauge | `type`, `owner_group`, `node_name`, `name`
`PendingTimeout` | Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated. | gauge | `type`, `owner_group`, `name`
`ResourceClass` | Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown | gauge | `type`, `owner_group`, `name`
`RestartAction` | Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails. | gauge | `type`, `owner_group`, `name`
Expand All @@ -34,10 +35,16 @@ Name | Description | Type | Labels
`Subclass` | Provides the list of references to nodes that can be the owner of this resource. | gauge | `type`, `owner_group`, `name`

### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
Query the state of all cluster resource owned by node1
```
windows_mscluster_resource_owner_node{node_name="node1"}
```

## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
Counts the number of Network Name cluster resource
```
count(windows_mscluster_resource_state{type="Network Name"})
```

## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
11 changes: 9 additions & 2 deletions docs/collector.mscluster_resourcegroup.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,23 @@ Name | Description | Type | Labels
`FailoverThreshold` | The FailoverThreshold property specifies the maximum number of failover attempts. | gauge | `name`
`Flags` | Provides access to the flags set for the group. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `name`
`GroupType` | The Type of the resource group. | gauge | `name`
`OwnerNode` | The node hosting the resource group. | gauge | `node_name`, `name`
`Priority` | Priority value of the resource group | gauge | `name`
`ResiliencyPeriod` | The resiliency period for this group, in seconds. | gauge | `name`
`State` | The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending | gauge | `name`
`UpdateDomain` | | gauge | `name`

### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
Query the state of all cluster group owned by node1
```
windows_mscluster_resourcegroup_owner_node{node_name="node1"}
```

## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
Counts the number of cluster group by type
```
count_values("count", windows_mscluster_resourcegroup_group_type)
```

## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
7 changes: 7 additions & 0 deletions pkg/collector/mscluster_node/mscluster_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ type Config struct{}

var ConfigDefaults = Config{}

// Variable used by mscluster_resource and mscluster_resourcegroup
var NodeName []string

// A collector is a Prometheus collector for WMI MSCluster_Node metrics
type collector struct {
logger log.Logger
Expand Down Expand Up @@ -175,6 +178,8 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric)
return err
}

NodeName = []string{}

for _, v := range dst {

ch <- prometheus.MustNewConstMetric(
Expand Down Expand Up @@ -274,6 +279,8 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric)
float64(v.StatusInformation),
v.Name,
)

NodeName = append(NodeName, v.Name)
}

return nil
Expand Down
30 changes: 30 additions & 0 deletions pkg/collector/mscluster_resource/mscluster_resource.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package mscluster_resource

import (
"github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node"
"github.com/prometheus-community/windows_exporter/pkg/types"
"github.com/prometheus-community/windows_exporter/pkg/wmi"

Expand All @@ -26,6 +27,7 @@ type collector struct {
IsAlivePollInterval *prometheus.Desc
LooksAlivePollInterval *prometheus.Desc
MonitorProcessId *prometheus.Desc
OwnerNode *prometheus.Desc
PendingTimeout *prometheus.Desc
ResourceClass *prometheus.Desc
RestartAction *prometheus.Desc
Expand Down Expand Up @@ -102,6 +104,18 @@ func (c *collector) Build() error {
[]string{"type", "owner_group", "name"},
nil,
)
c.OwnerNode = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "owner_node"),
"The node hosting the resource. 0: Not hosted; 1: Hosted",
[]string{"type", "owner_group", "node_name", "name"},
nil,
)
c.OwnerNode = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "owner_node"),
"The node hosting the resource. 0: Not hosted; 1: Hosted",
[]string{"type", "owner_group", "node_name", "name"},
nil,
)
c.PendingTimeout = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "pending_timeout"),
"Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated.",
Expand Down Expand Up @@ -165,6 +179,7 @@ type MSCluster_Resource struct {
Name string
Type string
OwnerGroup string
OwnerNode string

Characteristics uint
DeadlockTimeout uint
Expand Down Expand Up @@ -244,6 +259,21 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric)
v.Type, v.OwnerGroup, v.Name,
)

if mscluster_node.NodeName != nil {
for _, node_name := range mscluster_node.NodeName {
isCurrentState := 0.0
if v.OwnerNode == node_name {
isCurrentState = 1.0
}
ch <- prometheus.MustNewConstMetric(
c.OwnerNode,
prometheus.GaugeValue,
isCurrentState,
v.Type, v.OwnerGroup, node_name, v.Name,
)
}
}

ch <- prometheus.MustNewConstMetric(
c.PendingTimeout,
prometheus.GaugeValue,
Expand Down
30 changes: 30 additions & 0 deletions pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package mscluster_resourcegroup

import (
"github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node"
"github.com/prometheus-community/windows_exporter/pkg/types"
"github.com/prometheus-community/windows_exporter/pkg/wmi"

Expand Down Expand Up @@ -31,6 +32,7 @@ type collector struct {
Flags *prometheus.Desc
GroupType *prometheus.Desc
PlacementOptions *prometheus.Desc
OwnerNode *prometheus.Desc
Priority *prometheus.Desc
ResiliencyPeriod *prometheus.Desc
State *prometheus.Desc
Expand Down Expand Up @@ -119,6 +121,18 @@ func (c *collector) Build() error {
[]string{"name"},
nil,
)
c.OwnerNode = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "owner_node"),
"The node hosting the resource group. 0: Not hosted; 1: Hosted",
[]string{"node_name", "name"},
nil,
)
c.OwnerNode = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "owner_node"),
"The node hosting the resource group. 0: Not hosted; 1: Hosted",
[]string{"node_name", "name"},
nil,
)
c.Priority = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "priority"),
"Priority value of the resource group",
Expand Down Expand Up @@ -155,6 +169,7 @@ type MSCluster_ResourceGroup struct {
FailoverThreshold uint
Flags uint
GroupType uint
OwnerNode string
Priority uint
ResiliencyPeriod uint
State uint
Expand Down Expand Up @@ -241,6 +256,21 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric)
v.Name,
)

if mscluster_node.NodeName != nil {
for _, node_name := range mscluster_node.NodeName {
isCurrentState := 0.0
if v.OwnerNode == node_name {
isCurrentState = 1.0
}
ch <- prometheus.MustNewConstMetric(
c.OwnerNode,
prometheus.GaugeValue,
isCurrentState,
node_name, v.Name,
)
}
}

ch <- prometheus.MustNewConstMetric(
c.Priority,
prometheus.GaugeValue,
Expand Down

0 comments on commit b1c272a

Please sign in to comment.