From ccb3c70ee95759b48e85153a0a57de3e3be9847a Mon Sep 17 00:00:00 2001 From: Teingi Date: Wed, 4 Dec 2024 11:47:28 +0800 Subject: [PATCH 1/4] fixed: gather obstack result file empty check retry adjustment --- handler/gather/gather_obstack2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/handler/gather/gather_obstack2.py b/handler/gather/gather_obstack2.py index 1dd6b8d9..6c80c1fb 100644 --- a/handler/gather/gather_obstack2.py +++ b/handler/gather/gather_obstack2.py @@ -183,14 +183,14 @@ def __handle_from_node(self, local_stored_path, node): resp["gather_pack_path"] = "{0}/{1}.zip".format(local_stored_path, remote_dir_name) return resp - @Util.retry(5, 2) + @Util.retry(10, 5) def is_ready(self, ssh_client, pid, remote_dir_name): try: self.stdio.verbose("Check whether the directory /tmp/{dir_name} or " "file /tmp/{dir_name}/observer_{pid}_obstack.txt is empty".format(dir_name=remote_dir_name, pid=pid)) is_empty_dir_res = is_empty_dir(ssh_client, "/tmp/{0}".format(remote_dir_name), self.stdio) is_empty_file_res = is_empty_file(ssh_client, "/tmp/{dir_name}/observer_{pid}_obstack.txt".format(dir_name=remote_dir_name, pid=pid), self.stdio) if is_empty_dir_res or is_empty_file_res: - self.stdio.verbose( + self.stdio.warn( "The server {host_ip} directory /tmp/{dir_name} or file /tmp/{dir_name}/observer_{pid}_obstack.txt" " is empty, waiting for the collection to complete".format(host_ip=ssh_client.get_name(), dir_name=remote_dir_name, pid=pid) ) raise From 1a7967038da1c21f99b6727e225439b8b5d20972 Mon Sep 17 00:00:00 2001 From: Teingi Date: Wed, 4 Dec 2024 14:26:36 +0800 Subject: [PATCH 2/4] Optimization analyze log result presentation --- handler/analyzer/analyze_log.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/handler/analyzer/analyze_log.py b/handler/analyzer/analyze_log.py index 62d693a8..b84ef6ae 100644 --- a/handler/analyzer/analyze_log.py +++ b/handler/analyzer/analyze_log.py @@ -432,11 +432,11 @@ def __get_overall_summary(node_summary_tuples, is_files=False): :param node_summary_tuple :return: a string indicating the overall summary """ - field_names = ["Node", "Status", "FileName", "ErrorCode", "Message", "Count"] + field_names = ["Node", "Status", "FileName", "First Found Time", "ErrorCode", "Message", "Count"] t = [] t_details = [] field_names_details = field_names - field_names_details.extend(["Cause", "Solution", "First Found Time", "Last Found Time", "Trace_IDS"]) + field_names_details.extend(["Last Found Time", "Cause", "Solution", "Trace_IDS"]) for tup in node_summary_tuples: is_empty = True node = tup[0] @@ -452,24 +452,24 @@ def __get_overall_summary(node_summary_tuples, is_files=False): error_code_info = OB_RET_DICT.get(ret_key, "") if len(error_code_info) > 3: is_empty = False - t.append([node, "Error:" + tup[2] if is_err else "Completed", ret_value["file_name"], ret_key, error_code_info[1], ret_value["count"]]) + t.append([node, "Error:" + tup[2] if is_err else "Completed", ret_value["file_name"], ret_value["first_found_time"], ret_key, error_code_info[1], ret_value["count"]]) t_details.append( [ node, "Error:" + tup[2] if is_err else "Completed", ret_value["file_name"], + ret_value["first_found_time"], ret_key, error_code_info[1], ret_value["count"], + ret_value["last_found_time"], error_code_info[2], error_code_info[3], - ret_value["first_found_time"], - ret_value["last_found_time"], str(ret_value["trace_id_list"]), ] ) if is_empty: - t.append([node, "PASS", None, None, None, None]) + t.append([node, "PASS", None, None, None, None, None]) t_details.append([node, "PASS", None, None, None, None, None, None, None, None, None]) title = "\nAnalyze OceanBase Offline Log Summary:\n" if is_files else "\nAnalyze OceanBase Online Log Summary:\n" t.sort(key=lambda x: (x[0], x[1], x[2], x[3]), reverse=False) From 3dc7187e67c9a55ed35a126cfa72dd8df1fe75ee Mon Sep 17 00:00:00 2001 From: Teingi Date: Wed, 4 Dec 2024 21:31:09 +0800 Subject: [PATCH 3/4] gather plan_monitor V2 --- handler/gather/gather_plan_monitor.py | 70 ++++++- handler/meta/html_meta.py | 270 +++++++++++++++++++++++++- handler/meta/sql_meta.py | 10 +- 3 files changed, 345 insertions(+), 5 deletions(-) diff --git a/handler/gather/gather_plan_monitor.py b/handler/gather/gather_plan_monitor.py index 81ec817a..8479df02 100644 --- a/handler/gather/gather_plan_monitor.py +++ b/handler/gather/gather_plan_monitor.py @@ -148,6 +148,8 @@ def handle_plan_monitor_from_ob(cluster_name): sql_plan_monitor_detail_v2 = str(sql_plan_monitor_detail_template).replace("##REPLACE_TRACE_ID##", trace_id).replace("##REPLACE_ORDER_BY##", "PROCESS_NAME ASC, PLAN_LINE_ID ASC, FIRST_REFRESH_TIME ASC") sql_plan_monitor_dfo_op = self.sql_plan_monitor_dfo_op_sql(tenant_id, plan_id, trace_id, svr_ip, svr_port) + sql_ash_top_event = self.sql_ash_top_event_sql(tenant_id, trace_id) + sql_plan_monitor_db_time = self.sql_plan_monitor_db_time_sql(tenant_id, trace_id) full_audit_sql_by_trace_id_sql = self.full_audit_sql_by_trace_id_sql(trace_id) plan_explain_sql = self.plan_explain_sql(tenant_id, plan_id, svr_ip, svr_port) @@ -167,6 +169,9 @@ def handle_plan_monitor_from_ob(cluster_name): # 输出表结构的信息 self.stdio.verbose("[sql plan monitor report task] report table schema") self.report_schema(user_sql, tenant_name) + # ASH 统计 + self.stdio.verbose("[ash report task] report ash, sql: [{0}]".format(sql_ash_top_event)) + self.report_ash_obversion4(sql_ash_top_event) self.init_monitor_stat() # 输出sql_audit的详细信息 self.stdio.verbose("[sql plan monitor report task] report sql_audit details") @@ -174,6 +179,9 @@ def handle_plan_monitor_from_ob(cluster_name): # 输出算子信息 表+图 self.stdio.verbose("[sql plan monitor report task] report sql plan monitor dfo") self.report_sql_plan_monitor_dfo_op(sql_plan_monitor_dfo_op) + # db time + self.stdio.verbose("[db time display task] report db time display") + self.report_db_time_display_op(sql_plan_monitor_db_time) # 输出算子信息按 svr 级汇总 表+图 self.stdio.verbose("[sql plan monitor report task] report sql plan monitor group by server") self.report_sql_plan_monitor_svr_agg(sql_plan_monitor_svr_agg_v1, sql_plan_monitor_svr_agg_v2) @@ -324,7 +332,10 @@ def report_pre(self, s): self.__report(pre) def report_header(self): - header = GlobalHtmlMeta().get_value(key="sql_plan_monitor_report_header") + if self.ob_major_version >= 4: + header = GlobalHtmlMeta().get_value(key="sql_plan_monitor_report_header_obversion4") + else: + header = GlobalHtmlMeta().get_value(key="sql_plan_monitor_report_header") with open(self.report_file_path, 'w') as f: f.write(header) self.stdio.verbose("report header complete") @@ -627,6 +638,7 @@ def report_svr_agg_graph_data_obversion4(self, ident, cursor, title=''): def report_fast_preview(self): content = ''' + + + + + + +

SQL Monitor Report

+ + ''', +) + html_dict.set_value( "sql_plan_monitor_report_footer", ''' diff --git a/handler/meta/sql_meta.py b/handler/meta/sql_meta.py index 1cdd61af..b4053f73 100644 --- a/handler/meta/sql_meta.py +++ b/handler/meta/sql_meta.py @@ -892,7 +892,7 @@ def rm_value(self, key): TRUNCATE(AVG(DB_TIME-USER_IO_WAIT_TIME)/1000000000.0/2.5, 2) MY_CPU_TIME, TRUNCATE(AVG(USER_IO_WAIT_TIME)/1000000000.0/2.5, 2) MY_IO_TIME FROM oceanbase.gv$sql_plan_monitor -WHERE trace_id = '##REPLACE_TRACE_ID##' +WHERE CON_ID = ##REPLACE_TENANT_ID## AND trace_id = '##REPLACE_TRACE_ID##' GROUP BY PLAN_LINE_ID, PLAN_OPERATION, PLAN_DEPTH ORDER BY PLAN_LINE_ID ''', @@ -1432,3 +1432,11 @@ def rm_value(self, key): SELECT tenant_name FROM oceanbase.DBA_OB_TENANTS where TENANT_TYPE != 'META'; ''', ) + +sql_dict.set_value( + "ash_top_event_mysql", + ''' + SELECT SQL_PLAN_LINE_ID OP_ID, IF(EVENT = '', 'CPU + WAIT FOR CPU', EVENT) AS EVENT, WAIT_CLASS, COUNT(1) EVENT_CNT, ROUND(COUNT(1) * 100 / SUM(COUNT(1)) OVER (), 2) AS Percent, MAX(P1TEXT) P1TEXT, MAX(P1) P1, MAX(P2TEXT) P2TEXT, MAX(P2) P2, MAX(P3TEXT) P3TEXT, MAX(P3) P3 + FROM OCEANBASE.GV$ACTIVE_SESSION_HISTORY A WHERE CON_ID = ##REPLACE_TENANT_ID## AND TRACE_ID = '##REPLACE_TRACE_ID##' GROUP BY A.SQL_PLAN_LINE_ID, A.EVENT, A.WAIT_CLASS ORDER BY 1 ASC, 4 DESC + ''', +) From 6eeeb6a1013eb11c6f1e7d7bd174de9536c457e5 Mon Sep 17 00:00:00 2001 From: Teingi Date: Thu, 5 Dec 2024 10:48:42 +0800 Subject: [PATCH 4/4] fix --- handler/gather/gather_plan_monitor.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/handler/gather/gather_plan_monitor.py b/handler/gather/gather_plan_monitor.py index 8479df02..f856c6b2 100644 --- a/handler/gather/gather_plan_monitor.py +++ b/handler/gather/gather_plan_monitor.py @@ -828,21 +828,13 @@ def sql_plan_monitor_detail_template_sql(self): else: sql = GlobalSqlMeta().get_value(key="sql_plan_monitor_detail_template_oracle") return sql - + def sql_ash_top_event_sql(self, tenant_id, trace_id): - sql = ( - str(GlobalSqlMeta().get_value(key="ash_top_event_mysql")) - .replace("##REPLACE_TENANT_ID##", str(tenant_id)) - .replace("##REPLACE_TRACE_ID##", trace_id) - ) + sql = str(GlobalSqlMeta().get_value(key="ash_top_event_mysql")).replace("##REPLACE_TENANT_ID##", str(tenant_id)).replace("##REPLACE_TRACE_ID##", trace_id) return sql - + def sql_plan_monitor_db_time_sql(self, tenant_id, trace_id): - sql = ( - str(GlobalSqlMeta().get_value(key="sql_plan_monitor_db_time_mysql_template_obversion4")) - .replace("##REPLACE_TENANT_ID##", str(tenant_id)) - .replace("##REPLACE_TRACE_ID##", trace_id) - ) + sql = str(GlobalSqlMeta().get_value(key="sql_plan_monitor_db_time_mysql_template_obversion4")).replace("##REPLACE_TENANT_ID##", str(tenant_id)).replace("##REPLACE_TRACE_ID##", trace_id) return sql # sql audit 细节 @@ -975,7 +967,6 @@ def reportsql_plan_monitor_detail_svr_priority(self, sql): self.report_detail_graph_data("detail_serial_v2", cursor_sql_plan_monitor_detail_v2, '线程优先视图') self.stdio.verbose("report SQL_PLAN_MONITOR details server priority complete") - def report_ash_obversion4(self, ash_top_event_sql): ash_report = "" try: @@ -984,7 +975,7 @@ def report_ash_obversion4(self, ash_top_event_sql): self.stdio.verbose("execute SQL: %s", ash_top_event_sql) s = from_db_cursor(cursor) s.align = 'l' - ash_report = ash_report + "
%s\n%s
" %(ash_top_event_sql, s) + ash_report = ash_report + "
%s\n%s
" % (ash_top_event_sql, s) self.__report("

ASH 信息

") self.stdio.verbose("ash report complete") else: @@ -1008,4 +999,3 @@ def report_db_time_display_obversion4(self, sql_plan_monitor_db_time): self.stdio.exception("DB Time display> %s" % sql_plan_monitor_db_time) self.stdio.exception(repr(e)) pass -