diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 792f41ba374..42b3c410f78 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -33,8 +33,6 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] *Metricbeat* - Setting period for counter cache for Prometheus remote_write at least to 60sec {pull}38553[38553] -- Add new metrics for the vSphere Datastore metricset. {pull}40441[40441] -- Update metrics for the vSphere Host metricset. {pull}40429[40429] *Osquerybeat* @@ -206,16 +204,20 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Add new memory/cgroup metrics to Kibana module {pull}37232[37232] - Add SSL support to mysql module {pull}37997[37997] - Add SSL support for aerospike module {pull}38126[38126] +- Add new metrics for the vSphere Host metricset. {pull}40429[40429] +- Add new metrics for the vSphere Datastore metricset. {pull}40441[40441] +- Add metrics for the vSphere Virtualmachine metricset. {pull}40485[40485] - Add new metricset network for the vSphere module. {pull}40559[40559] -- Add new metricset resourcepool for the vSphere module. {pull}40456[40456] - Add new metricset cluster for the vSphere module. {pull}40536[40536] -- Add metrics for the vSphere Virtualmachine metricset. {pull}40485[40485] +- Add new metricset resourcepool for the vSphere module. {pull}40456[40456] +- Add new metricset datastorecluster for vSphere module. {pull}40634[40634] - Add support for snapshot in vSphere virtualmachine metricset {pull}40683[40683] +- Add new metricset datastorecluster for vSphere module. {pull}40694[40694] - Update fields to use mapstr in vSphere virtualmachine metricset {pull}40707[40707] +- Add support for period based intervalID in vSphere host and datastore metricsets {pull}40678[40678] *Metricbeat* -- Add support for new metrics for vSphere module datastorecluster metricset. {pull}40694[40694] *Osquerybeat* diff --git a/metricbeat/docs/modules/vsphere.asciidoc b/metricbeat/docs/modules/vsphere.asciidoc index 762f7ba255e..e7e6e78205d 100644 --- a/metricbeat/docs/modules/vsphere.asciidoc +++ b/metricbeat/docs/modules/vsphere.asciidoc @@ -9,14 +9,79 @@ This file is generated! See scripts/mage/docs_collector.go [[metricbeat-module-vsphere]] == vSphere module -The vSphere module uses the https://github.com/vmware/govmomi[Govmomi] library to collect metrics from any Vmware SDK URL (ESXi/VCenter). This library is built for and tested against ESXi and vCenter 5.5, 6.0 and 6.5. +The vSphere module uses the https://github.com/vmware/govmomi[Govmomi] library to collect metrics from any VMware SDK URL (ESXi/VCenter). -By default it enables the metricsets `cluster`, `datastore`, `datastorecluster`, `host`, `network`, `resourcepool` and `virtualmachine`. +This module has been tested against ESXi and vCenter versions 5.5, 6.0, 6.5, and 7.0.3. + +By default, the vSphere module enables the following metricsets: + +1. cluster + +2. datastore + +3. datastorecluster + +4. host + +5. network + +6. resourcepool + +7. virtualmachine + +[float] +=== Supported Periods: +The Datastore and Host metricsets support performance data collection using the vSphere performance API. Given that the performance API imposes usage restrictions based on data collection intervals, users should configure the period optimally to ensure the receipt of real-time data. This configuration can be determined based on the https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html[Data Collection Intervals] and https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html[Data Collection Levels]. + +[IMPORTANT] + +Only host and datastore metricsets have limitation of system configured period from vSphere instance. Users can still collect summary metrics if performance metrics are not supported for the configured instance. + +[float] +==== Real-time data collection default interval: +- 20s + +[float] +==== Historical data collection default intervals: +- 300s +- 1800s +- 7200s +- 86400s + +[float] +=== Example: +If you need to configure multiple metricsets with different periods, you can achieve this by setting up multiple vSphere modules with different metricsets as demonstrated below: + +[source,yaml] +---- +- module: vsphere + metricsets: + - cluster + - datastorecluster + - network + - resourcepool + - virtualmachine + period: 10s + hosts: ["https://localhost/sdk"] + username: "user" + password: "password" + insecure: false + +- module: vsphere + metricsets: + - datastore + - host + period: 300s + hosts: ["https://localhost/sdk"] + username: "user" + password: "password" + insecure: false +---- [float] === Dashboard -The vsphere module comes with a predefined dashboard. For example: +The vSphere module includes a predefined dashboard. For example: image::./images/metricbeat_vsphere_dashboard.png[] image::./images/metricbeat_vsphere_vm_dashboard.png[] @@ -36,7 +101,17 @@ metricbeat.modules: - module: vsphere enabled: true metricsets: ["cluster", "datastore", "datastorecluster", "host", "network", "resourcepool", "virtualmachine"] - # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds. + + # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds by default. + # Supported Periods: + # The Datastore and Host metricsets support performance data collection using the vSphere performance API. + # Since the performance API has usage restrictions based on data collection intervals, + # users should ensure that the period is configured optimally to receive real-time data. + # users can still collect summary metrics if performance metrics are not supported for the configured instance. + # This configuration can be determined based on the Data Collection Intervals and Data Collection Levels. + # Reference Links: + # Data Collection Intervals: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html + # Data Collection Levels: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html period: 20s hosts: ["https://localhost/sdk"] @@ -44,7 +119,7 @@ metricbeat.modules: password: "password" # If insecure is true, don't verify the server's certificate chain insecure: false - # Get custom fields when using virtualmachine metric set. Default false. + # Get custom fields when using virtualmachine metricset. Default false. # get_custom_fields: false ---- diff --git a/metricbeat/metricbeat.reference.yml b/metricbeat/metricbeat.reference.yml index 890031ddb4c..4ce1591bed3 100644 --- a/metricbeat/metricbeat.reference.yml +++ b/metricbeat/metricbeat.reference.yml @@ -1008,7 +1008,17 @@ metricbeat.modules: - module: vsphere enabled: true metricsets: ["cluster", "datastore", "datastorecluster", "host", "network", "resourcepool", "virtualmachine"] - # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds. + + # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds by default. + # Supported Periods: + # The Datastore and Host metricsets support performance data collection using the vSphere performance API. + # Since the performance API has usage restrictions based on data collection intervals, + # users should ensure that the period is configured optimally to receive real-time data. + # users can still collect summary metrics if performance metrics are not supported for the configured instance. + # This configuration can be determined based on the Data Collection Intervals and Data Collection Levels. + # Reference Links: + # Data Collection Intervals: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html + # Data Collection Levels: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html period: 20s hosts: ["https://localhost/sdk"] @@ -1016,7 +1026,7 @@ metricbeat.modules: password: "password" # If insecure is true, don't verify the server's certificate chain insecure: false - # Get custom fields when using virtualmachine metric set. Default false. + # Get custom fields when using virtualmachine metricset. Default false. # get_custom_fields: false #------------------------------- Windows Module ------------------------------- diff --git a/metricbeat/module/vsphere/_meta/config.reference.yml b/metricbeat/module/vsphere/_meta/config.reference.yml index 91a32da7677..9ec81a4ecef 100644 --- a/metricbeat/module/vsphere/_meta/config.reference.yml +++ b/metricbeat/module/vsphere/_meta/config.reference.yml @@ -1,7 +1,17 @@ - module: vsphere enabled: true metricsets: ["cluster", "datastore", "datastorecluster", "host", "network", "resourcepool", "virtualmachine"] - # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds. + + # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds by default. + # Supported Periods: + # The Datastore and Host metricsets support performance data collection using the vSphere performance API. + # Since the performance API has usage restrictions based on data collection intervals, + # users should ensure that the period is configured optimally to receive real-time data. + # users can still collect summary metrics if performance metrics are not supported for the configured instance. + # This configuration can be determined based on the Data Collection Intervals and Data Collection Levels. + # Reference Links: + # Data Collection Intervals: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html + # Data Collection Levels: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html period: 20s hosts: ["https://localhost/sdk"] @@ -9,5 +19,5 @@ password: "password" # If insecure is true, don't verify the server's certificate chain insecure: false - # Get custom fields when using virtualmachine metric set. Default false. + # Get custom fields when using virtualmachine metricset. Default false. # get_custom_fields: false diff --git a/metricbeat/module/vsphere/_meta/config.yml b/metricbeat/module/vsphere/_meta/config.yml index 173be03fc4f..24f94e29287 100644 --- a/metricbeat/module/vsphere/_meta/config.yml +++ b/metricbeat/module/vsphere/_meta/config.yml @@ -7,7 +7,17 @@ # - network # - resourcepool # - virtualmachine - # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds. + + # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds by default. + # Supported Periods: + # The Datastore and Host metricsets support performance data collection using the vSphere performance API. + # Since the performance API has usage restrictions based on data collection intervals, + # users should ensure that the period is configured optimally to receive real-time data. + # users can still collect summary metrics if performance metrics are not supported for the configured instance. + # This configuration can be determined based on the Data Collection Intervals and Data Collection Levels. + # Reference Links: + # Data Collection Intervals: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html + # Data Collection Levels: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html period: 20s hosts: ["https://localhost/sdk"] @@ -15,5 +25,5 @@ password: "password" # If insecure is true, don't verify the server's certificate chain insecure: false - # Get custom fields when using virtualmachine metric set. Default false. + # Get custom fields when using virtualmachine metricset. Default false. # get_custom_fields: false diff --git a/metricbeat/module/vsphere/_meta/docs.asciidoc b/metricbeat/module/vsphere/_meta/docs.asciidoc index 353cfda1e7b..9815541831b 100644 --- a/metricbeat/module/vsphere/_meta/docs.asciidoc +++ b/metricbeat/module/vsphere/_meta/docs.asciidoc @@ -1,11 +1,76 @@ -The vSphere module uses the https://github.com/vmware/govmomi[Govmomi] library to collect metrics from any Vmware SDK URL (ESXi/VCenter). This library is built for and tested against ESXi and vCenter 5.5, 6.0 and 6.5. +The vSphere module uses the https://github.com/vmware/govmomi[Govmomi] library to collect metrics from any VMware SDK URL (ESXi/VCenter). -By default it enables the metricsets `cluster`, `datastore`, `datastorecluster`, `host`, `network`, `resourcepool` and `virtualmachine`. +This module has been tested against ESXi and vCenter versions 5.5, 6.0, 6.5, and 7.0.3. + +By default, the vSphere module enables the following metricsets: + +1. cluster + +2. datastore + +3. datastorecluster + +4. host + +5. network + +6. resourcepool + +7. virtualmachine + +[float] +=== Supported Periods: +The Datastore and Host metricsets support performance data collection using the vSphere performance API. Given that the performance API imposes usage restrictions based on data collection intervals, users should configure the period optimally to ensure the receipt of real-time data. This configuration can be determined based on the https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html[Data Collection Intervals] and https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html[Data Collection Levels]. + +[IMPORTANT] + +Only host and datastore metricsets have limitation of system configured period from vSphere instance. Users can still collect summary metrics if performance metrics are not supported for the configured instance. + +[float] +==== Real-time data collection default interval: +- 20s + +[float] +==== Historical data collection default intervals: +- 300s +- 1800s +- 7200s +- 86400s + +[float] +=== Example: +If you need to configure multiple metricsets with different periods, you can achieve this by setting up multiple vSphere modules with different metricsets as demonstrated below: + +[source,yaml] +---- +- module: vsphere + metricsets: + - cluster + - datastorecluster + - network + - resourcepool + - virtualmachine + period: 10s + hosts: ["https://localhost/sdk"] + username: "user" + password: "password" + insecure: false + +- module: vsphere + metricsets: + - datastore + - host + period: 300s + hosts: ["https://localhost/sdk"] + username: "user" + password: "password" + insecure: false +---- [float] === Dashboard -The vsphere module comes with a predefined dashboard. For example: +The vSphere module includes a predefined dashboard. For example: image::./images/metricbeat_vsphere_dashboard.png[] image::./images/metricbeat_vsphere_vm_dashboard.png[] diff --git a/metricbeat/module/vsphere/cluster/cluster.go b/metricbeat/module/vsphere/cluster/cluster.go index 10184c55c90..5c462d84933 100644 --- a/metricbeat/module/vsphere/cluster/cluster.go +++ b/metricbeat/module/vsphere/cluster/cluster.go @@ -75,7 +75,7 @@ func (m *ClusterMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) er } defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Debug(fmt.Errorf("error trying to logout from vSphere: %w", err)) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -91,7 +91,7 @@ func (m *ClusterMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) er defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Errorf("error trying to destroy view from vSphere: %w", err) + m.Logger().Errorf("error trying to destroy view from vSphere: %v", err) } }() diff --git a/metricbeat/module/vsphere/datastore/_meta/data.json b/metricbeat/module/vsphere/datastore/_meta/data.json index 1e1a1691219..9ab349a293c 100644 --- a/metricbeat/module/vsphere/datastore/_meta/data.json +++ b/metricbeat/module/vsphere/datastore/_meta/data.json @@ -1,64 +1,48 @@ { - "@timestamp": "2017-10-12T08:05:34.853Z", - "event": { - "dataset": "vsphere.datastore", - "duration": 115000, - "module": "vsphere" - }, - "metricset": { - "name": "datastore", - "period": 10000 - }, - "service": { - "address": "127.0.0.1:33365", - "type": "vsphere" - }, - "vsphere": { - "datastore": { - "iops": 0, - "host": { - "count": 1, - "names": [ - "DC3_H0" - ] - }, - "status": "green", - "vm": { - "count": 6, - "names": [ - "DC3_H0_VM0" - ] - }, - "read": { - "bytes": 0, - "latency": { - "total": { - "ms": 0 - } + "@timestamp": "2017-10-12T08:05:34.853Z", + "event": { + "dataset": "vsphere.datastore", + "duration": 115000, + "module": "vsphere" + }, + "metricset": { + "name": "datastore", + "period": 10000 + }, + "service": { + "address": "127.0.0.1:55632", + "type": "vsphere" + }, + "vsphere": { + "datastore": { + "capacity": { + "free": { + "bytes": 10973641441280 + }, + "total": { + "bytes": 10995116277760 + }, + "used": { + "bytes": 21474836480, + "pct": 0.001953125 + } + }, + "fstype": "OTHER", + "host": { + "count": 1, + "names": [ + "localhost_localdomain" + ] + }, + "name": "LocalDS_0", + "status": "green", + "vm": { + "count": 2, + "names": [ + "ha-host_VM0", + "ha-host_VM1" + ] + } } - }, - "write": { - "bytes": 337000, - "latency": { - "total": { - "ms": 0 - } - } - }, - "capacity": { - "free": { - "bytes": 37120094208 - }, - "total": { - "bytes": 74686664704 - }, - "used": { - "bytes": 37566570496, - "pct": 0.502988996026061 - } - }, - "fstype": "local", - "name": "LocalDS_0" } - } } \ No newline at end of file diff --git a/metricbeat/module/vsphere/datastore/datastore.go b/metricbeat/module/vsphere/datastore/datastore.go index ee2d08feff1..3014ec0e3f5 100644 --- a/metricbeat/module/vsphere/datastore/datastore.go +++ b/metricbeat/module/vsphere/datastore/datastore.go @@ -86,7 +86,7 @@ func (m *DataStoreMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) } defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Debugf("error trying to log out from vSphere: %w", err) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -102,7 +102,7 @@ func (m *DataStoreMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Debugf("error trying to destroy view from vSphere: %w", err) + m.Logger().Debugf("error trying to destroy view from vSphere: %v", err) } }() @@ -140,48 +140,18 @@ func (m *DataStoreMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) default: assetNames, err := getAssetNames(ctx, pc, &dst[i]) if err != nil { - m.Logger().Errorf("Failed to retrieve object from host %s: %w", dst[i].Name, err) - continue + m.Logger().Errorf("Failed to retrieve object from datastore %s: %v", dst[i].Name, err) } - spec := types.PerfQuerySpec{ - Entity: dst[i].Reference(), - MetricId: metricIds, - MaxSample: 1, - IntervalId: 20, // right now we are only grabbing real time metrics from the performance manager - } - - // Query performance data - samples, err := perfManager.Query(ctx, []types.PerfQuerySpec{spec}) - if err != nil { - m.Logger().Debugf("Failed to query performance data for host %s: %v", dst[i].Name, err) - continue - } - - if len(samples) == 0 { - m.Logger().Debugf("No samples returned from performance manager") - continue - } - - results, err := perfManager.ToMetricSeries(ctx, samples) + metricMap, err := m.getPerfMetrics(ctx, perfManager, dst[i], metricIds) if err != nil { - m.Logger().Debugf("Failed to query performance data to metric series for host %s: %v", dst[i].Name, err) - continue - } - - metricMap := make(map[string]interface{}) - for _, result := range results[0].Value { - if len(result.Value) > 0 { - metricMap[result.Name] = result.Value[0] - continue - } - m.Logger().Debugf("For host %s,Metric %v: No result found", dst[i].Name, result.Name) + m.Logger().Errorf("Failed to retrieve performance metrics from datastore %s: %v", dst[i].Name, err) } reporter.Event(mb.Event{ MetricSetFields: m.mapEvent(dst[i], &metricData{ perfMetrics: metricMap, - assetNames: *assetNames, + assetNames: assetNames, }), }) } @@ -190,13 +160,12 @@ func (m *DataStoreMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) return nil } -func getAssetNames(ctx context.Context, pc *property.Collector, ds *mo.Datastore) (*assetNames, error) { - +func getAssetNames(ctx context.Context, pc *property.Collector, ds *mo.Datastore) (assetNames, error) { outputVmNames := make([]string, 0, len(ds.Vm)) if len(ds.Vm) > 0 { var objects []mo.ManagedEntity if err := pc.Retrieve(ctx, ds.Vm, []string{"name"}, &objects); err != nil { - return nil, err + return assetNames{}, err } for _, ob := range objects { if ob.Reference().Type == "VirtualMachine" { @@ -220,7 +189,7 @@ func getAssetNames(ctx context.Context, pc *property.Collector, ds *mo.Datastore if len(hsRefs) > 0 { err := pc.Retrieve(ctx, hsRefs, []string{"name"}, &hosts) if err != nil { - return nil, err + return assetNames{}, err } } @@ -230,8 +199,57 @@ func getAssetNames(ctx context.Context, pc *property.Collector, ds *mo.Datastore } } - return &assetNames{ + return assetNames{ outputHostNames: outputHostNames, outputVmNames: outputVmNames, }, nil } + +func (m *DataStoreMetricSet) getPerfMetrics(ctx context.Context, perfManager *performance.Manager, dst mo.Datastore, metricIds []types.PerfMetricId) (metricMap map[string]interface{}, err error) { + metricMap = make(map[string]interface{}) + + period := m.Module().Config().Period + refreshRate := int32(period.Seconds()) + + spec := types.PerfQuerySpec{ + Entity: dst.Reference(), + MetricId: metricIds, + MaxSample: 1, + IntervalId: refreshRate, // using refreshRate as interval + } + + // Query performance data + samples, err := perfManager.Query(ctx, []types.PerfQuerySpec{spec}) + if err != nil { + if strings.Contains(err.Error(), "ServerFaultCode: A specified parameter was not correct: querySpec.interval") { + return metricMap, fmt.Errorf("failed to query performance data: use one of the system's supported interval. consider adjusting period: %w", err) + } + + return metricMap, fmt.Errorf("failed to query performance data: %w", err) + } + + if len(samples) == 0 { + m.Logger().Debug("No samples returned from performance manager") + return metricMap, nil + } + + results, err := perfManager.ToMetricSeries(ctx, samples) + if err != nil { + return metricMap, fmt.Errorf("failed to convert performance data to metric series: %w", err) + } + + if len(results) == 0 { + m.Logger().Debug("No results returned from metric series conversion") + return metricMap, nil + } + + for _, result := range results[0].Value { + if len(result.Value) > 0 { + metricMap[result.Name] = result.Value[0] + continue + } + m.Logger().Debugf("For datastore %s, Metric %s: No result found", dst.Name, result.Name) + } + + return metricMap, nil +} diff --git a/metricbeat/module/vsphere/datastore/datastore_test.go b/metricbeat/module/vsphere/datastore/datastore_test.go index e94989f1055..d9a9a548780 100644 --- a/metricbeat/module/vsphere/datastore/datastore_test.go +++ b/metricbeat/module/vsphere/datastore/datastore_test.go @@ -19,6 +19,7 @@ package datastore import ( "testing" + "time" mbtest "github.com/elastic/beats/v7/metricbeat/mb/testing" @@ -105,5 +106,6 @@ func getConfig(ts *simulator.Server) map[string]interface{} { "username": "user", "password": "pass", "insecure": true, + "period": time.Second * 20, } } diff --git a/metricbeat/module/vsphere/datastorecluster/datastorecluster.go b/metricbeat/module/vsphere/datastorecluster/datastorecluster.go index dd26fa7ba04..3cadd20a9f1 100644 --- a/metricbeat/module/vsphere/datastorecluster/datastorecluster.go +++ b/metricbeat/module/vsphere/datastorecluster/datastorecluster.go @@ -76,7 +76,7 @@ func (m *DatastoreClusterMetricSet) Fetch(ctx context.Context, reporter mb.Repor defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Errorf("error trying to logout from vSphere: %w", err) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -89,7 +89,7 @@ func (m *DatastoreClusterMetricSet) Fetch(ctx context.Context, reporter mb.Repor defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Errorf("error trying to destroy view from vSphere: %w", err) + m.Logger().Errorf("error trying to destroy view from vSphere: %v", err) } }() @@ -107,7 +107,7 @@ func (m *DatastoreClusterMetricSet) Fetch(ctx context.Context, reporter mb.Repor assetNames, err := getAssetNames(ctx, pc, &datastoreCluster[i]) if err != nil { - m.Logger().Errorf("Failed to retrieve object from host %s: %w", datastoreCluster[i].Name, err) + m.Logger().Errorf("Failed to retrieve object from host %s: v", datastoreCluster[i].Name, err) } reporter.Event(mb.Event{MetricSetFields: m.mapEvent(datastoreCluster[i], &metricData{assetNames: assetNames})}) diff --git a/metricbeat/module/vsphere/host/_meta/data.json b/metricbeat/module/vsphere/host/_meta/data.json index cb8d7896ce5..568d08f97c9 100644 --- a/metricbeat/module/vsphere/host/_meta/data.json +++ b/metricbeat/module/vsphere/host/_meta/data.json @@ -1,55 +1,46 @@ { - "@timestamp": "2022-09-06T06:41:22.128Z", + "@timestamp": "2017-10-12T08:05:34.853Z", + "event": { + "dataset": "vsphere.host", + "duration": 115000, + "module": "vsphere" + }, "metricset": { "name": "host", "period": 10000 }, "service": { - "address": "https://localhost:8989/sdk", + "address": "127.0.0.1:55538", "type": "vsphere" }, - "event": { - "module": "vsphere", - "duration": 23519250, - "dataset": "vsphere.host" - }, "vsphere": { "host": { "cpu": { - "used": { - "mhz": 67 + "free": { + "mhz": 4521 }, "total": { "mhz": 4588 }, - "free": { - "mhz": 4521 + "used": { + "mhz": 67 } }, + "datastore": { + "count": 1, + "names": [ + "LocalDS_0" + ] + }, "disk": { - "capacity": { - "usage": { - "bytes": 0 - } - }, - "devicelatency": { - "average": { - "ms": 0 - } - }, - "latency": { - "total": { - "ms": 18 - } + "read": { + "bytes": 159744 }, "total": { - "bytes": 262000 - }, - "read": { - "bytes": 13000 + "bytes": 401408 }, "write": { - "bytes": 248000 + "bytes": 259072 } }, "memory": { @@ -63,80 +54,49 @@ "bytes": 1472200704 } }, + "name": "localhost.localdomain", "network": { "bandwidth": { + "received": { + "bytes": 270336 + }, "total": { - "bytes": 372000 + "bytes": 532480 }, "transmitted": { - "bytes": 0 - }, - "received": { - "bytes": 371000 + "bytes": 249856 } }, + "count": 1, + "names": [ + "VM Network" + ], "packets": { - "received": { - "count": 9463 - }, - "errors": { - "transmitted": { - "count": 0 - }, - "received": { - "count": 0 - }, - "total": { - "count": 0 - } - }, "multicast": { - "total": { - "count": 6679 - }, - "transmitted": { - "count": 0 - }, "received": { - "count": 6679 + "count": 61 } }, - "dropped": { - "received": { - "count": 0 - }, - "total": { - "count": 0 - }, - "transmitted": { - "count": 0 - } + "received": { + "count": 4569 }, "transmitted": { - "count": 54 + "count": 4578 } } }, + "network_names": [ + "VM Network" + ], + "status": "gray", + "uptime": 77229, "vm": { "count": 2, "names": [ - "DC0_H0_VM0", - "DC0_H0_VM1" + "ha-host_VM0", + "ha-host_VM1" ] - }, - "datastore": { - "count": 1, - "names": [ - "LocalDS_0" - ] - }, - "network_count": 1, - "network_names": [ - "VM Network" - ], - "name": "DC0_H0", - "status": "green", - "uptime": 1728865 + } } } -} +} \ No newline at end of file diff --git a/metricbeat/module/vsphere/host/host.go b/metricbeat/module/vsphere/host/host.go index 2b7e26de72e..6248f3e6cba 100644 --- a/metricbeat/module/vsphere/host/host.go +++ b/metricbeat/module/vsphere/host/host.go @@ -100,7 +100,7 @@ func (m *HostMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Errorf("error trying to log out from vSphere: %w", err) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -116,7 +116,7 @@ func (m *HostMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Errorf("error trying to destroy view from vSphere: %w", err) + m.Logger().Errorf("error trying to destroy view from vSphere: %v", err) } }() @@ -154,40 +154,12 @@ func (m *HostMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error default: assetNames, err := getAssetNames(ctx, pc, &hst[i]) if err != nil { - m.Logger().Errorf("Failed to retrieve object from host %s: %w", hst[i].Name, err) + m.Logger().Errorf("Failed to retrieve object from host %s: %v", hst[i].Name, err) } - spec := types.PerfQuerySpec{ - Entity: hst[i].Reference(), - MetricId: metricIds, - MaxSample: 1, - IntervalId: 20, // right now we are only grabbing real time metrics from the performance manager - } - - // Query performance data - samples, err := perfManager.Query(ctx, []types.PerfQuerySpec{spec}) - if err != nil { - m.Logger().Errorf("Failed to query performance data from host %s: %v", hst[i].Name, err) - continue - } - - if len(samples) == 0 { - m.Logger().Debug("No samples returned from performance manager") - continue - } - - results, err := perfManager.ToMetricSeries(ctx, samples) + metricMap, err := m.getPerfMetrics(ctx, perfManager, hst[i], metricIds) if err != nil { - m.Logger().Errorf("Failed to convert performance data to metric series for host %s: %v", hst[i].Name, err) - } - - metricMap := make(map[string]interface{}) - for _, result := range results[0].Value { - if len(result.Value) > 0 { - metricMap[result.Name] = result.Value[0] - continue - } - m.Logger().Debugf("For host %s,Metric %v: No result found", hst[i].Name, result.Name) + m.Logger().Errorf("Failed to retrieve performance metrics from host %s: %v", hst[i].Name, err) } reporter.Event(mb.Event{ @@ -240,3 +212,52 @@ func getAssetNames(ctx context.Context, pc *property.Collector, hs *mo.HostSyste outputVmNames: outputVmNames, }, nil } + +func (m *HostMetricSet) getPerfMetrics(ctx context.Context, perfManager *performance.Manager, hst mo.HostSystem, metricIds []types.PerfMetricId) (metricMap map[string]interface{}, err error) { + metricMap = make(map[string]interface{}) + + period := m.Module().Config().Period + refreshRate := int32(period.Seconds()) + + spec := types.PerfQuerySpec{ + Entity: hst.Reference(), + MetricId: metricIds, + MaxSample: 1, + IntervalId: refreshRate, + } + + // Query performance data + samples, err := perfManager.Query(ctx, []types.PerfQuerySpec{spec}) + if err != nil { + if strings.Contains(err.Error(), "ServerFaultCode: A specified parameter was not correct: querySpec.interval") { + return metricMap, fmt.Errorf("failed to query performance data: use one of the system's supported interval. consider adjusting period: %w", err) + } + + return metricMap, fmt.Errorf("failed to query performance data: %w", err) + } + + if len(samples) == 0 { + m.Logger().Debug("No samples returned from performance manager") + return metricMap, nil + } + + results, err := perfManager.ToMetricSeries(ctx, samples) + if err != nil { + return metricMap, fmt.Errorf("failed to convert performance data to metric series: %w", err) + } + + if len(results) == 0 { + m.Logger().Debug("No results returned from metric series conversion") + return metricMap, nil + } + + for _, result := range results[0].Value { + if len(result.Value) > 0 { + metricMap[result.Name] = result.Value[0] + continue + } + m.Logger().Debugf("For host %s, Metric %s: No result found", hst.Name, result.Name) + } + + return metricMap, nil +} diff --git a/metricbeat/module/vsphere/host/host_test.go b/metricbeat/module/vsphere/host/host_test.go index 12692702b7a..5a69e0e546f 100644 --- a/metricbeat/module/vsphere/host/host_test.go +++ b/metricbeat/module/vsphere/host/host_test.go @@ -19,6 +19,7 @@ package host import ( "testing" + "time" mbtest "github.com/elastic/beats/v7/metricbeat/mb/testing" "github.com/elastic/elastic-agent-libs/mapstr" @@ -191,5 +192,6 @@ func getConfig(ts *simulator.Server) map[string]interface{} { "username": "user", "password": "pass", "insecure": true, + "period": time.Second * 20, } } diff --git a/metricbeat/module/vsphere/network/network.go b/metricbeat/module/vsphere/network/network.go index 1adbf8f5d33..34e690009cd 100644 --- a/metricbeat/module/vsphere/network/network.go +++ b/metricbeat/module/vsphere/network/network.go @@ -79,7 +79,7 @@ func (m *NetworkMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) er defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Errorf("error trying to logout from vSphere: %w", err) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -92,7 +92,7 @@ func (m *NetworkMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) er defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Errorf("error trying to destroy view from vSphere: %w", err) + m.Logger().Errorf("error trying to destroy view from vSphere: %v", err) } }() diff --git a/metricbeat/module/vsphere/resourcepool/resourcepool.go b/metricbeat/module/vsphere/resourcepool/resourcepool.go index ed54b6907fd..1c83e3d890d 100644 --- a/metricbeat/module/vsphere/resourcepool/resourcepool.go +++ b/metricbeat/module/vsphere/resourcepool/resourcepool.go @@ -80,7 +80,7 @@ func (m *ResourcePoolMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Errorf("error trying to log out from vSphere: %w", err) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -96,7 +96,7 @@ func (m *ResourcePoolMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Errorf("error trying to destroy view from vSphere: %w", err) + m.Logger().Errorf("error trying to destroy view from vSphere: %v", err) } }() @@ -115,7 +115,7 @@ func (m *ResourcePoolMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV default: assetNames, err := getAssetNames(ctx, pc, &rps[i]) if err != nil { - m.Logger().Errorf("Failed to retrieve object from resource pool %s: %w", rps[i].Name, err) + m.Logger().Errorf("Failed to retrieve object from resource pool %s: %v", rps[i].Name, err) } reporter.Event(mb.Event{ diff --git a/metricbeat/module/vsphere/test_vsphere.py b/metricbeat/module/vsphere/test_vsphere.py index edc06a16c9f..acf795b79dd 100644 --- a/metricbeat/module/vsphere/test_vsphere.py +++ b/metricbeat/module/vsphere/test_vsphere.py @@ -24,7 +24,7 @@ def test_datastore(self): "name": "vsphere", "metricsets": ["datastore"], "hosts": self.get_hosts(), - "period": "5s", + "period": "20s", "username": "user", "password": "pass", "extras": { @@ -55,7 +55,7 @@ def test_host(self): "name": "vsphere", "metricsets": ["host"], "hosts": self.get_hosts(), - "period": "5s", + "period": "20s", "username": "user", "password": "pass", "extras": { diff --git a/metricbeat/module/vsphere/virtualmachine/virtualmachine.go b/metricbeat/module/vsphere/virtualmachine/virtualmachine.go index e704061f859..bcc42b51653 100644 --- a/metricbeat/module/vsphere/virtualmachine/virtualmachine.go +++ b/metricbeat/module/vsphere/virtualmachine/virtualmachine.go @@ -104,7 +104,7 @@ func (m *MetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error { defer func() { if err := client.Logout(ctx); err != nil { - m.Logger().Debugf("Error logging out from vsphere: %v", err) + m.Logger().Errorf("error trying to logout from vSphere: %v", err) } }() @@ -130,7 +130,7 @@ func (m *MetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) error { defer func() { if err := v.Destroy(ctx); err != nil { - m.Logger().Debug("Error destroying view from vsphere %w", err) + m.Logger().Debugf("Error destroying view from vsphere %v", err) } }() diff --git a/metricbeat/modules.d/vsphere.yml.disabled b/metricbeat/modules.d/vsphere.yml.disabled index d24ed10b4b0..fecff30bcce 100644 --- a/metricbeat/modules.d/vsphere.yml.disabled +++ b/metricbeat/modules.d/vsphere.yml.disabled @@ -10,7 +10,17 @@ # - network # - resourcepool # - virtualmachine - # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds. + + # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds by default. + # Supported Periods: + # The Datastore and Host metricsets support performance data collection using the vSphere performance API. + # Since the performance API has usage restrictions based on data collection intervals, + # users should ensure that the period is configured optimally to receive real-time data. + # users can still collect summary metrics if performance metrics are not supported for the configured instance. + # This configuration can be determined based on the Data Collection Intervals and Data Collection Levels. + # Reference Links: + # Data Collection Intervals: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html + # Data Collection Levels: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html period: 20s hosts: ["https://localhost/sdk"] @@ -18,5 +28,5 @@ password: "password" # If insecure is true, don't verify the server's certificate chain insecure: false - # Get custom fields when using virtualmachine metric set. Default false. + # Get custom fields when using virtualmachine metricset. Default false. # get_custom_fields: false diff --git a/x-pack/metricbeat/metricbeat.reference.yml b/x-pack/metricbeat/metricbeat.reference.yml index 738d7ef2830..9723830e84c 100644 --- a/x-pack/metricbeat/metricbeat.reference.yml +++ b/x-pack/metricbeat/metricbeat.reference.yml @@ -1569,7 +1569,17 @@ metricbeat.modules: - module: vsphere enabled: true metricsets: ["cluster", "datastore", "datastorecluster", "host", "network", "resourcepool", "virtualmachine"] - # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds. + + # Real-time data collection – An ESXi Server collects data for each performance counter every 20 seconds by default. + # Supported Periods: + # The Datastore and Host metricsets support performance data collection using the vSphere performance API. + # Since the performance API has usage restrictions based on data collection intervals, + # users should ensure that the period is configured optimally to receive real-time data. + # users can still collect summary metrics if performance metrics are not supported for the configured instance. + # This configuration can be determined based on the Data Collection Intervals and Data Collection Levels. + # Reference Links: + # Data Collection Intervals: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-247646EA-A04B-411A-8DD4-62A3DCFCF49B.html + # Data Collection Levels: https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.monitoring.doc/GUID-25800DE4-68E5-41CC-82D9-8811E27924BC.html period: 20s hosts: ["https://localhost/sdk"] @@ -1577,7 +1587,7 @@ metricbeat.modules: password: "password" # If insecure is true, don't verify the server's certificate chain insecure: false - # Get custom fields when using virtualmachine metric set. Default false. + # Get custom fields when using virtualmachine metricset. Default false. # get_custom_fields: false #------------------------------- Windows Module -------------------------------