Skip to content

Commit

Permalink
Merge pull request #97 from ORNL/amd
Browse files Browse the repository at this point in the history
Amd
  • Loading branch information
renan-souza authored Feb 14, 2024
2 parents fd4b4a6 + 79858c3 commit 9bf2b2f
Show file tree
Hide file tree
Showing 10 changed files with 220 additions and 111 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ R. Souza, T. Skluzacek, S. Wilkinson, M. Ziatdinov, and R. da Silva
```

## Disclaimer & Get in Touch

Please note that this a research software. We encourage you to give it a try and use it with your own stack. We
are continuously working on improving documentation and adding more examples and notebooks, but we are still far from
a good documentation covering the whole system. If you are interested in working with FlowCept in your own scientific
project, we can give you a jump start if you reach out to us. Feel free to [create an issue](https://github.com/ORNL/flowcept/issues/new),
[create a new discussion thread](https://github.com/ORNL/flowcept/discussions/new/choose) or drop us an email (we trust you'll find a way to reach out to us :wink: ).

## Acknowledgement

This research uses resources of the Oak Ridge Leadership Computing Facility
Expand Down
1 change: 1 addition & 0 deletions extra_requirements/amd-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pyamdgpuinfo==2.1.6
1 change: 0 additions & 1 deletion extra_requirements/analytics-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
seaborn==0.13.2
h2o==3.44.0.3
plotly==5.18.0
scipy==1.10.1
1 change: 1 addition & 0 deletions extra_requirements/data_augmentation-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
h2o==3.44.0.3
7 changes: 3 additions & 4 deletions flowcept/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@
model_explainer,
model_profiler,
)
except Exception as _exp:
flowcept.commons.logger.exception(_exp)

except:
pass

if Vocabulary.Settings.ZAMBEZE_KIND in flowcept.configs.ADAPTERS:
try:
Expand Down Expand Up @@ -50,7 +49,7 @@
MLFlowInterceptor,
)
except Exception as _exp:
flowcept.commons.loggerr.error(
flowcept.commons.logger.error(
flowcept.commons._get_adapter_exception_msg(
Vocabulary.Settings.MLFLOW_KIND
)
Expand Down
31 changes: 16 additions & 15 deletions flowcept/commons/flowcept_dataclasses/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,26 +53,26 @@ class Process:
executable: str
cmd_line: List[str]

@dataclass(init=False)
class GPU:
@dataclass
class GPUMetrics:
total: int
free: int
used: int
usage_percent: float
temperature: float
power_usage: float

gpu_sums: GPUMetrics
per_gpu: Dict[int, GPUMetrics] = None
# @dataclass(init=False)
# class GPU:
# @dataclass
# class GPUMetrics:
# total: int
# free: int
# used: int
# usage_percent: float
# temperature: float
# power_usage: float
#
# gpu_sums: GPUMetrics
# per_gpu: Dict[int, GPUMetrics] = None

cpu: CPU = None
process: Process = None
memory: Memory = None
disk: Disk = None
network: Network = None
gpu: GPU = None
gpu: Dict = None # TODO: use dataclasses

def to_dict(self):
ret = {}
Expand All @@ -87,6 +87,7 @@ def to_dict(self):
if self.network is not None:
ret["network"] = self.network.__dict__
if self.gpu is not None:
ret["gpu"] = asdict(self.gpu, dict_factory=remove_none_values)
# ret["gpu"] = asdict(self.gpu, dict_factory=remove_none_values)
ret["gpu"] = self.gpu

return ret
5 changes: 3 additions & 2 deletions flowcept/commons/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,9 @@ def default(self, obj):
def _get_adapter_exception_msg(adapter_kind):
return (
f"You have an adapter for {adapter_kind} in"
f" {SETTINGS_PATH} but we couldn't import its interceptor. "
f" Consider fixing the following exception or remove that adapter "
f" {SETTINGS_PATH} but we couldn't import its interceptor."
f" Consider fixing the following exception (e.g., try installing the"
f" adapter requirements -- see the README file remove that adapter"
f" from the settings."
f" Exception:"
)
Expand Down
20 changes: 20 additions & 0 deletions flowcept/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,26 @@

TELEMETRY_CAPTURE = settings["project"].get("telemetry_capture", None)


##################################
# GPU TELEMETRY CAPTURE SETTINGS #
#################################

N_GPUS = dict()
if TELEMETRY_CAPTURE.get("gpu", False):
try:
from pynvml import nvmlDeviceGetCount

N_GPUS["nvidia"] = nvmlDeviceGetCount()
except:
pass
try:
import pyamdgpuinfo

N_GPUS["amd"] = pyamdgpuinfo.detect_gpus()
except:
pass

######################
# SYS METADATA #
######################
Expand Down
Loading

0 comments on commit 9bf2b2f

Please sign in to comment.