Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix CPU share estimation (JupyterLab 3.x) #8

Merged
merged 3 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Build

on:
push:
branches: [main]
branches: [main, v0.1.x]
pull_request:
branches: '*'

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/check-release.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: Check Release
on:
push:
branches: [master, main]
branches: [master, main, v0.1.x]
pull_request:
branches:
- '*'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: Lint

on:
push:
branches: [main]
branches: [main, v0.1.x]
pull_request:
branches: [main]
branches: [main, v0.1.x]

jobs:
lint:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ repos:
- id: reorder-python-imports
language_version: python3
- repo: https://github.com/psf/black
rev: 23.9.1
rev: 24.1.1
hooks:
- id: black
- repo: https://github.com/PyCQA/flake8
Expand Down
13 changes: 0 additions & 13 deletions jupyter_power_usage/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,3 @@
# Copyright 2023 IDRIS / jupyter
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from jupyter_server.utils import url_path_join as ujoin

from ._version import __version__
Expand Down
17 changes: 0 additions & 17 deletions jupyter_power_usage/config.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,4 @@
# Copyright 2023 IDRIS / jupyter
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings

from traitlets import Enum
from traitlets import Int
from traitlets import validate
from traitlets.config import Configurable

# Minimum measurement period in millisec.
Expand Down
74 changes: 58 additions & 16 deletions jupyter_power_usage/metrics.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,3 @@
# Copyright 2023 IDRIS / jupyter
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import time

Expand All @@ -30,6 +17,9 @@
# Source: https://arxiv.org/pdf/2306.08323.pdf
DEFAULT_DRAM_CONSUMPTION = 0.375

# Minimum share of procs in current scope
CPU_SHARE_THRESHOLD = 0.001


class CpuPowerUsage:
"""Extract CPU power usage using RAPL metrics"""
Expand Down Expand Up @@ -61,6 +51,8 @@ def __init__(self, server_app: ServerApp):

# Setup first readings
self.rapl_readings_t = counters
self.total_cpu_time_t = self.get_total_cpu_time(psutil.cpu_times())
self.procs_cpu_time_t = self.total_cpu_time_t
self.time_t = time.time()

def power_usage_available(self):
Expand All @@ -71,6 +63,34 @@ def get_power_limit(self):
"""Get CPU power limit"""
return self._power_limit

@staticmethod
def get_total_cpu_time(cpu_times, proc=False):
"""Get total CPU time at current time

Total cpu time excluding iowait, steal and idle. CPU is doing nothing in
these modes and so we exclude them.

If proc=True, sum only user and system cpu_times. Rest are not available
on cpu_times()

Example:
user=146611.61, nice=37933.89, system=74662.47, idle=3519011.28,
iowait=58120.7, irq=0.0, softirq=2281.55, steal=0.0, guest=0.0,
guest_nice=0.0
"""
if proc:
return cpu_times.user + cpu_times.system
else:
return (
cpu_times.user
+ cpu_times.nice
+ cpu_times.system
+ cpu_times.irq
+ cpu_times.softirq
+ cpu_times.guest
+ cpu_times.guest_nice
)

@staticmethod
def read_energy_counter(path):
"""Read energy counter file and return value"""
Expand All @@ -96,10 +116,31 @@ def get_cpu_share(self, pids):
if self.config.measurement_scope == 'system':
return 1, 1

# CPU share is sum of all process's cpu percents
cpu_share = sum(
[psutil.Process(pid=p).cpu_percent(interval=0.05) / 100 for p in pids]
# CPU share of current scope is rate(procs_cpu_time) / rate(total_cpu_time)
# This will give the share of cpu time of processes in current scope to TOTAL
# cpu time. We dont need to account for number of CPUs as it is a ratio
#
# Total CPU time of all processes in the current scope
procs_cpu_times = sum(
[
self.get_total_cpu_time(psutil.Process(pid=p).cpu_times(), proc=True)
for p in pids
]
)
# Total CPU time of the host excluding times in IOwait, idle, steal
total_cpu_time = self.get_total_cpu_time(psutil.cpu_times())
cpu_share = (procs_cpu_times - self.procs_cpu_time_t) / (
total_cpu_time - self.total_cpu_time_t
)

# cpu_share can be negative when there is a lot of CPU activity and if all
# activity disappears suddently, it tends to go negative. Use a threshold to
# avoid negative values
cpu_share = max(cpu_share, CPU_SHARE_THRESHOLD)

# Update the times at t which will be used in next cycle
self.procs_cpu_time_t = procs_cpu_times
self.total_cpu_time_t = total_cpu_time

# Memory share if sum of all process's memory / total memory consumption
# We choose RSS here to estimate the share. Sum of all RSS will be more than
Expand Down Expand Up @@ -160,6 +201,7 @@ def get_power_usage(self, pids):
# import random
# cpu_power_usage = random.uniform(20, 30)
# dram_power_usage = random.uniform(5, 10)

cpu_share, mem_share = self.get_cpu_share(pids)

# Set current measurements as previous measurements for next reading
Expand Down
19 changes: 3 additions & 16 deletions jupyter_power_usage/utils.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,3 @@
# Copyright 2023 IDRIS / jupyter
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re
import subprocess
Expand Down Expand Up @@ -179,9 +166,9 @@ def filter_rapl_domains():
filtered_domains_power_limits[unq_dom_name] = read_power_limit_uw_counter(
dom_dict['energy_uj']
)
filtered_domains_overflow_counters[
unq_dom_name
] = read_max_energy_uj_counter(dom_dict['energy_uj'])
filtered_domains_overflow_counters[unq_dom_name] = (
read_max_energy_uj_counter(dom_dict['energy_uj'])
)
return (
filtered_domains,
filtered_domains_power_limits,
Expand Down
Loading