Skip to content

Commit

Permalink
Add metric framework
Browse files Browse the repository at this point in the history
  • Loading branch information
不涸 committed Feb 22, 2022
1 parent 18b374b commit 3aad232
Show file tree
Hide file tree
Showing 33 changed files with 138 additions and 354 deletions.
1 change: 0 additions & 1 deletion ci/requirements-wheel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,3 @@ scipy==1.7.2; python_version>='3.10'
cython==0.29.26
requests>=2.4.0
cloudpickle>=1.5.0
prometheus-client==0.11.0
4 changes: 0 additions & 4 deletions mars/deploy/oscar/base_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,3 @@ scheduling:
max_workers: 100
scheduler_backlog_timeout: 60
worker_idle_timeout: 120

global_config:
metric:
backend: console
2 changes: 0 additions & 2 deletions mars/deploy/oscar/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ async def _start_supervisor_pool(self):
n_process=self._n_supervisor_process,
modules=supervisor_modules,
subprocess_start_method=self._subprocess_start_method,
_global_config=self._config.get("global_config", {}),
)
self.supervisor_address = self._supervisor_pool.external_address

Expand All @@ -217,7 +216,6 @@ async def _start_worker_pools(self):
band_to_slot,
modules=worker_modules,
subprocess_start_method=self._subprocess_start_method,
_global_config=self._config.get("global_config", {}),
)
self._worker_pools.append(worker_pool)

Expand Down
10 changes: 1 addition & 9 deletions mars/deploy/oscar/ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,7 @@ async def new_worker(self, worker_address, band_to_slot=None):
start_time = time.time()
band_to_slot = band_to_slot or self._band_to_slot
worker_pool = await create_worker_actor_pool(
worker_address,
self._band_to_slot,
modules=self._worker_modules,
_global_config=self._config.get("global_config", {}),
worker_address, self._band_to_slot, modules=self._worker_modules
)
logger.info(
"Create worker node %s succeeds in %.4f seconds.",
Expand Down Expand Up @@ -405,10 +402,6 @@ def __init__(
self.web_address = None

async def start(self):
# init metrics to guarantee metrics use in driver
from mars.metric import init_metrics

init_metrics(self._config.get("global_config", {}))
address_to_resources = dict()
supervisor_standalone = (
self._config.get("cluster", {})
Expand Down Expand Up @@ -482,7 +475,6 @@ async def start(self):
main_pool_cpus=0,
sub_pool_cpus=0,
modules=supervisor_modules,
_global_config=self._config.get("global_config", {}),
)
logger.info("Create supervisor on node %s succeeds.", self.supervisor_address)
self._cluster_backend = await RayClusterBackend.create(
Expand Down
4 changes: 0 additions & 4 deletions mars/deploy/oscar/rayconfig.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,3 @@ scheduling:
enabled: false
scheduler_backlog_timeout: 20
worker_idle_timeout: 40

global_config:
metric:
backend: ray
20 changes: 0 additions & 20 deletions mars/deploy/oscar/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
new_isolation,
stop_isolation,
)
from ...metric import Metrics
from ...services.cluster import AbstractClusterAPI, ClusterAPI
from ...services.lifecycle import AbstractLifecycleAPI, LifecycleAPI
from ...services.meta import MetaAPI, AbstractMetaAPI
Expand Down Expand Up @@ -756,13 +755,6 @@ def __init__(
register_asyncio_task_timeout_detector()
)

# metrics
self._tileable_graph_gen_time = Metrics.gauge(
"mars.tileable_graph_gen_time_secs",
"Time consuming in seconds to generate a tileable graph",
("address", "session_id"),
)

@classmethod
async def _init(
cls, address: str, session_id: str, new: bool = True, timeout: float = None
Expand Down Expand Up @@ -932,19 +924,7 @@ async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
]

# build tileable graph
start_time = time.time()
tileable_graph = gen_submit_tileable_graph(self, tileables)
cost_time_secs = time.time() - start_time
logger.info(
"Time consuming to generate a tileable graph is %ss with address "
"%s, session id %s",
cost_time_secs,
self.address,
self._session_id,
)
self._tileable_graph_gen_time.record(
cost_time_secs, {"address": self.address, "session_id": self._session_id}
)

# submit task
task_id = await self._task_api.submit_tileable_graph(
Expand Down
1 change: 0 additions & 1 deletion mars/deploy/oscar/supervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ async def create_actor_pool(self):
modules=self.args.load_modules,
logging_conf=self.logging_conf,
subprocess_start_method="forkserver" if os.name == "nt" else "spawn",
_global_config=self.config.get("global_config", {}),
)

async def start_services(self):
Expand Down
1 change: 0 additions & 1 deletion mars/deploy/oscar/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ async def create_actor_pool(self):
logging_conf=self.logging_conf,
cuda_devices=self.cuda_devices,
subprocess_start_method="forkserver" if os.name != "nt" else "spawn",
_global_config=self.config.get("global_config", {}),
)

async def start_services(self):
Expand Down
14 changes: 14 additions & 0 deletions mars/metric/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,16 @@
# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .api import Metrics
from .api import init_metrics
18 changes: 14 additions & 4 deletions mars/metric/api.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from typing import Dict, Any, Optional, Tuple
from .backends.console import console_metric
from .backends.prometheus import prometheus_metric
from .backends.ray import ray_metric

logger = logging.getLogger(__name__)

_metric_backend = "console"
_backends_cls = {
"console": console_metric,
"prometheus": prometheus_metric,
"ray": ray_metric,
}


Expand Down
13 changes: 13 additions & 0 deletions mars/metric/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
13 changes: 13 additions & 0 deletions mars/metric/backends/console/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14 changes: 14 additions & 0 deletions mars/metric/backends/console/console_metric.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from typing import Optional, Dict, Tuple

Expand Down
13 changes: 13 additions & 0 deletions mars/metric/backends/console/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14 changes: 14 additions & 0 deletions mars/metric/backends/console/tests/test_console_metric.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ..console_metric import CounterImpl, GaugeImpl, MeterImpl, HistogramImpl


Expand Down
14 changes: 14 additions & 0 deletions mars/metric/backends/metric.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import time

from abc import ABC, abstractmethod
Expand Down
Empty file.
31 changes: 0 additions & 31 deletions mars/metric/backends/prometheus/prometheus_metric.py

This file was deleted.

Empty file.
70 changes: 0 additions & 70 deletions mars/metric/backends/prometheus/tests/test_prometheus_metric.py

This file was deleted.

Empty file.
Loading

0 comments on commit 3aad232

Please sign in to comment.