-
Notifications
You must be signed in to change notification settings - Fork 0
/
metrics_prom.py
110 lines (91 loc) · 4.21 KB
/
metrics_prom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import time
import requests
import json
import csv
import argparse
from datetime import datetime, timedelta
from oscar_python.client import Client
QUERY_ENDPOINT = "/api/v1/query?query="
TIME = "5d"
END_DATE = datetime.today()
START_DATE = END_DATE - timedelta(days=5)
parser = argparse.ArgumentParser(description="Command-line to retreive Prometheus metrics from OSCAR", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("PROMETHEUS_ENDPOINT", action="store", help="Prometheus endpoint")
parser.add_argument("VO", help="VO from which get the metrics")
parser.add_argument("OSCAR_AUTH", help="JSON string with the OSCAR cluster authentication info (BasicAuth/oidc-token)")
parser.add_argument("-t", "--time", dest=TIME, help="")
args = parser.parse_args()
PROMETHEUS_ENDPOINT = args.PROMETHEUS_ENDPOINT
VO = args.VO
OSCAR_CLUSTER_AUTH = args.OSCAR_AUTH
PROMETHEUS_ENDPOINT = PROMETHEUS_ENDPOINT+QUERY_ENDPOINT
try:
OSCAR_CLUSTER_AUTH = json.loads(OSCAR_CLUSTER_AUTH)
except TypeError:
print("Error parsing OSCAR cluster authentication!")
exit(0)
def get_sync_query(svc_name):
return "sum(rate(container_cpu_usage_seconds_total{pod=~'"+svc_name+".*', container='oscar-container'}["+TIME+"]))"
def get_exposed_query(svc_name):
return "sum(rate(container_cpu_usage_seconds_total{pod=~'"+svc_name+".*', container='"+svc_name+"'}["+TIME+"]))"
def get_async_query():
return "sum by (pod) (rate(container_cpu_usage_seconds_total{namespace='oscar-svc'}["+TIME+"])) * on (pod) group_left () kube_pod_status_phase{phase='Succeeded'}"
def get_cluster_services(oscar_client):
cluster_services_response = oscar_client.list_services()
return json.loads(cluster_services_response.text)
def has_jobs(svc_name):
res = oscar_client.list_jobs(svc_name)
if json.loads(res.text) == {}:
return False, None
return True, json.loads(res.text)
def query(cpu_usage_query):
send_url=PROMETHEUS_ENDPOINT+cpu_usage_query
response = requests.request("GET", send_url)
return json.loads(response.text)
def generate_file_name():
return f"/app/metrics/prometheus-metrics/metric-{END_DATE}.csv"
def extract_metrics(cluster_services):
with open(generate_file_name(), 'w', newline='') as file:
writer = csv.writer(file)
fields = ["service_name", "pod_name", "cpu_usage_seconds", "vo", "start_date", "end_date"]
writer.writerow(fields)
for svc in cluster_services:
svc_vo = svc["vo"]
svc_name = svc["name"]
if svc_vo!="" and svc_vo == VO:
result = {}
jobs, job_list = has_jobs(svc_name)
if jobs and job_list is not None:
cpu_usage_query = get_async_query()
result = query(cpu_usage_query)
metrics = result["data"]["result"]
for (k,v) in job_list.items():
if len(metrics) > 0:
for m in metrics:
pod_name = m["metric"]["pod"]
if k in pod_name:
value = m["value"][1]
writer.writerow([svc_name,pod_name, value, svc_vo, START_DATE, END_DATE])
else:
if "expose" in svc_name:
cpu_usage_query = get_exposed_query(svc_name)
else:
cpu_usage_query = get_sync_query(svc_name)
result = query(cpu_usage_query)
metrics = result["data"]["result"]
if len(metrics) > 0:
for m in metrics:
#pod_name = m["metric"]["pod"]
value = m["value"][1]
writer.writerow([svc_name,svc_name, value, svc_vo, START_DATE, END_DATE])
######## MAIN ##########
if __name__ == "__main__":
print("[*] Getting metrics from Prometheus DB")
try:
oscar_client = Client(OSCAR_CLUSTER_AUTH)
except:
print("Error creating OSCAR client")
exit(0)
cluster_services = get_cluster_services(oscar_client)
extract_metrics(cluster_services)
print("Success!")