Skip to content

Commit

Permalink
featrue: mesos方案优化scheduler metrics数据 TencentBlueKing#532
Browse files Browse the repository at this point in the history
  • Loading branch information
zmberg committed Jul 31, 2020
1 parent 917302e commit 0844396
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 10 deletions.
18 changes: 13 additions & 5 deletions bcs-mesos/bcs-scheduler/src/manager/store/etcd/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ func (s *managerStore) StartStoreObjectMetrics() {
store.StorageOperatorTotal.Reset()
store.ClusterMemoryResouceRemain.Reset()
store.ClusterCpuResouceRemain.Reset()
store.ClusterMemoryResouceTotal.Reset()
store.ClusterCpuResouceTotal.Reset()

// handle service metrics
services, err := s.ListAllServices()
Expand Down Expand Up @@ -246,8 +248,12 @@ func (s *managerStore) StartStoreObjectMetrics() {
if err != nil {
blog.Errorf("list all agent error %s", err.Error())
}
var clusterCpu float64
var clusterMem float64
var (
clusterCpu float64
clusterMem float64
remainCpu float64
remainMem float64
)
for _, agent := range agents {
info := agent.GetAgentInfo()
if info.IP == "" {
Expand Down Expand Up @@ -280,14 +286,16 @@ func (s *managerStore) StartStoreObjectMetrics() {

//if ip-resources is zero, then ignore it
if s.pm==nil || ipValue>0{
clusterCpu += info.CpuTotal-info.CpuUsed
clusterMem += info.MemTotal-info.MemUsed
remainCpu += info.CpuTotal-info.CpuUsed
remainMem += info.MemTotal-info.MemUsed
}
clusterCpu += info.CpuTotal
clusterMem += info.MemTotal

store.ReportAgentInfoMetrics(info.IP, s.clusterId, info.CpuTotal, info.CpuTotal-info.CpuUsed,
info.MemTotal, info.MemTotal-info.MemUsed, ipValue)
}
store.ReportClusterInfoMetrics(s.clusterId, clusterCpu, clusterMem)
store.ReportClusterInfoMetrics(s.clusterId, remainCpu, clusterCpu, remainMem, clusterMem)
}
}

Expand Down
20 changes: 18 additions & 2 deletions bcs-mesos/bcs-scheduler/src/manager/store/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,20 @@ var (
Help: "Cluster memory resource remain",
}, []string{"clusterId"})

ClusterCpuResouceTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: types.MetricsNamespaceScheduler,
Subsystem: types.MetricsSubsystemScheduler,
Name: "cluster_cpu_resource_total",
Help: "Cluster cpu resource total",
}, []string{"clusterId"})

ClusterMemoryResouceTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: types.MetricsNamespaceScheduler,
Subsystem: types.MetricsSubsystemScheduler,
Name: "cluster_memory_resource_total",
Help: "Cluster memory resource total",
}, []string{"clusterId"})

StorageOperatorTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: types.MetricsNamespaceScheduler,
Subsystem: types.MetricsSubsystemScheduler,
Expand All @@ -134,7 +148,7 @@ var (
func init() {
prometheus.MustRegister(ObjectResourceInfo, TaskgroupInfo, AgentCpuResourceTotal, AgentMemoryResourceTotal,
StorageOperatorTotal, StorageOperatorLatencyMs, StorageOperatorFailedTotal, AgentCpuResourceRemain, AgentMemoryResourceRemain,
AgentIpResourceRemain, ClusterCpuResouceRemain, ClusterMemoryResouceRemain)
AgentIpResourceRemain, ClusterCpuResouceRemain, ClusterMemoryResouceRemain, ClusterCpuResouceTotal, ClusterMemoryResouceTotal)
}

func ReportObjectResourceInfoMetrics(resource, ns, name, status string) {
Expand Down Expand Up @@ -183,9 +197,11 @@ func ReportAgentInfoMetrics(ip, clusterId string, totalCpu, remainCpu, totalMem,
AgentIpResourceRemain.WithLabelValues(ip, clusterId).Set(remainIp)
}

func ReportClusterInfoMetrics(clusterId string, remainCpu, remainMem float64) {
func ReportClusterInfoMetrics(clusterId string, remainCpu, totalCpu, remainMem, totalMem float64) {
ClusterCpuResouceRemain.WithLabelValues(clusterId).Set(remainCpu)
ClusterMemoryResouceRemain.WithLabelValues(clusterId).Set(remainMem)
ClusterCpuResouceTotal.WithLabelValues(clusterId).Set(totalCpu)
ClusterMemoryResouceTotal.WithLabelValues(clusterId).Set(totalMem)
}

func ReportStorageOperatorMetrics(operator string, started time.Time, failed bool) {
Expand Down
14 changes: 11 additions & 3 deletions bcs-mesos/bcs-scheduler/src/manager/store/zk/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ func (s *managerStore) StartStoreObjectMetrics() {
store.StorageOperatorTotal.Reset()
store.ClusterMemoryResouceRemain.Reset()
store.ClusterCpuResouceRemain.Reset()
store.ClusterMemoryResouceTotal.Reset()
store.ClusterCpuResouceTotal.Reset()

// handle service metrics
services, err := s.ListAllServices()
Expand Down Expand Up @@ -131,8 +133,12 @@ func (s *managerStore) StartStoreObjectMetrics() {
store.ReportObjectResourceInfoMetrics(store.ObjectResourceSecret, secret.NameSpace, secret.Name, "")
}

var clusterCpu float64
var clusterMem float64
var (
clusterCpu float64
clusterMem float64
remainCpu float64
remainMem float64
)
// handle agents metrics
agents, err := s.ListAllAgents()
if err != nil {
Expand Down Expand Up @@ -173,11 +179,13 @@ func (s *managerStore) StartStoreObjectMetrics() {
clusterCpu += info.CpuTotal-info.CpuUsed
clusterMem += info.MemTotal-info.MemUsed
}
clusterCpu += info.CpuTotal
clusterMem += info.MemTotal

store.ReportAgentInfoMetrics(info.IP, s.clusterId, info.CpuTotal, info.CpuTotal-info.CpuUsed,
info.MemTotal, info.MemTotal-info.MemUsed, ipValue)
}
store.ReportClusterInfoMetrics(s.clusterId, clusterCpu, clusterMem)
store.ReportClusterInfoMetrics(s.clusterId, remainCpu, clusterCpu, remainMem, clusterMem)
}
}

Expand Down

0 comments on commit 0844396

Please sign in to comment.