Skip to content

Commit

Permalink
Support bytes server values. Prefer decimal prefixes for memory (#2752)
Browse files Browse the repository at this point in the history
* Use bytes properties from server

* Add changelog

Co-authored-by: Andrew Svetlov <andrew.svetlov@gmail.com>
  • Loading branch information
romasku and asvetlov authored Jul 1, 2022
1 parent 4b6ebc2 commit a69ed38
Show file tree
Hide file tree
Showing 32 changed files with 347 additions and 328 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.D/2752.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Updated code to use byte-based values from server instead of megabytes.
2 changes: 1 addition & 1 deletion CLI.md
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ Name | Description|
|_\--credits-per-hour AMOUNT_|Price of running job of this preset for an hour in credits \[default: 0]|
|_\-g, --gpu NUMBER_|Number of GPUs|
|_\--gpu-model MODEL_|GPU model|
|_\-m, --memory AMOUNT_|Memory amount \[default: 1G]|
|_\-m, --memory AMOUNT_|Memory amount \[default: 1GB]|
|_\--preemptible-node / --non-preemptible-node_|Use a lower\-cost preemptible instance \[default: non-preemptible-node]|
|_\-p, --scheduler / -P, --no-scheduler_|Use round robin scheduler for jobs \[default: no-scheduler]|
|_\--tpu-sw-version VERSION_|TPU software version|
Expand Down
2 changes: 1 addition & 1 deletion neuro-cli/docs/admin.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ Add new resource preset
| _--credits-per-hour AMOUNT_ | Price of running job of this preset for an hour in credits _\[default: 0\]_ |
| _-g, --gpu NUMBER_ | Number of GPUs |
| _--gpu-model MODEL_ | GPU model |
| _-m, --memory AMOUNT_ | Memory amount _\[default: 1G\]_ |
| _-m, --memory AMOUNT_ | Memory amount _\[default: 1GB\]_ |
| _--preemptible-node / --non-preemptible-node_ | Use a lower-cost preemptible instance _\[default: non-preemptible-node\]_ |
| _-p, --scheduler / -P, --no-scheduler_ | Use round robin scheduler for jobs _\[default: no-scheduler\]_ |
| _--tpu-sw-version VERSION_ | TPU software version |
Expand Down
32 changes: 15 additions & 17 deletions neuro-cli/src/neuro_cli/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from neuro_cli.formatters.config import BalanceFormatter

from .click_types import MEGABYTE
from .click_types import MEMORY
from .defaults import JOB_CPU_NUMBER, JOB_MEMORY_AMOUNT, PRESET_PRICE
from .formatters.admin import (
ClustersFormatter,
Expand Down Expand Up @@ -398,7 +398,7 @@ async def generate_gcp(session: PromptSession[str]) -> str:
storage:
id: premium_lrs
instances:
- size_mb: {file_share_size_mb}
- size: {file_share_size}
"""


Expand All @@ -417,10 +417,8 @@ async def generate_azure(session: PromptSession[str]) -> str:
"Azure client secret: ", default=os.environ.get("AZURE_CLIENT_SECRET", "")
)
args["resource_group"] = await session.prompt_async("Azure resource group: ")
args["file_share_size_gb"] = await session.prompt_async(
"Azure Files storage size (Gib): "
)
args["file_share_size_mb"] = args["file_share_size_gb"] * 1024
file_share_size_gb = await session.prompt_async("Azure Files storage size (Gb): ")
args["file_share_size"] = file_share_size_gb * 10**9
return AZURE_TEMPLATE.format_map(args)


Expand All @@ -443,19 +441,19 @@ async def generate_azure(session: PromptSession[str]) -> str:
min_size: 3
max_size: 3
disk_type: {storage_profile_name}
disk_size_gb: 40
disk_size: 40_000_000_000
- id: {platform_node_pool_id}
role: platform
name: platform
min_size: 3
max_size: 3
disk_type: {storage_profile_name}
disk_size_gb: 100
disk_size: 100_000_000_000
storage:
profile_name: {storage_profile_name}
size_gib: {storage_size_gb}
size: {storage_size}
instances:
- size_mb: {storage_size_mb}
- size: {storage_size}
"""


Expand Down Expand Up @@ -510,8 +508,8 @@ async def generate_vcd(root: Root, session: PromptSession[str]) -> str:
"Storage profile: ",
default=(cloud_provider.get("storage_profile_names") or [""])[0],
)
args["storage_size_gb"] = await session.prompt_async("Storage size (Gib): ")
args["storage_size_mb"] = args["storage_size_gb"] * 1024
storage_size_gb = await session.prompt_async("Storage size (Gb): ")
args["storage_size"] = storage_size_gb * 10**9
args["kubernetes_node_pool_id"] = cloud_provider["kubernetes_node_pool_id"]
args["platform_node_pool_id"] = cloud_provider["platform_node_pool_id"]
return VCD_TEMPLATE.format_map(args)
Expand Down Expand Up @@ -931,7 +929,7 @@ async def _sync_local_config() -> None:
"-m",
"--memory",
metavar="AMOUNT",
type=MEGABYTE,
type=MEMORY,
help="Memory amount",
default=JOB_MEMORY_AMOUNT,
show_default=True,
Expand Down Expand Up @@ -991,7 +989,7 @@ async def add_resource_preset(
presets[preset_name] = Preset(
credits_per_hour=_parse_finite_decimal(credits_per_hour),
cpu=cpu,
memory_mb=memory,
memory=memory,
gpu=gpu,
gpu_model=gpu_model,
tpu_type=tpu_type,
Expand Down Expand Up @@ -1027,7 +1025,7 @@ async def add_resource_preset(
"-m",
"--memory",
metavar="AMOUNT",
type=MEGABYTE,
type=MEMORY,
help="Memory amount",
)
@option(
Expand Down Expand Up @@ -1086,7 +1084,7 @@ async def update_resource_preset(
if credits_per_hour is not None
else None,
"cpu": cpu,
"memory_mb": memory,
"memory": memory,
"gpu": gpu,
"gpu_model": gpu_model,
"tpu_type": tpu_type,
Expand Down Expand Up @@ -1306,7 +1304,7 @@ async def get_org_clusters(root: Root, cluster_name: str) -> None:
@option(
"--storage-size",
metavar="AMOUNT",
type=MEGABYTE,
type=MEMORY,
help="Storage size, ignored for storage types with elastic storage size",
)
async def add_org_cluster(
Expand Down
12 changes: 6 additions & 6 deletions neuro-cli/src/neuro_cli/click_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@
from .asyncio_utils import asyncgeneratorcontextmanager
from .parse_utils import (
JobTableFormat,
parse_memory,
parse_ps_columns,
parse_top_columns,
to_megabytes,
)
from .root import Root

Expand Down Expand Up @@ -253,18 +253,18 @@ def convert(
LOCAL_REMOTE_PORT = LocalRemotePortParamType()


class MegabyteType(click.ParamType):
name = "megabyte"
class MemoryType(click.ParamType):
name = "memory_amount"

def convert(
self, value: str, param: Optional[click.Parameter], ctx: Optional[click.Context]
) -> int:
if isinstance(value, int):
return int(value / (1024**2))
return to_megabytes(value)
return int(value)
return parse_memory(value)


MEGABYTE = MegabyteType()
MEMORY = MemoryType()


class JobNameType(click.ParamType):
Expand Down
2 changes: 1 addition & 1 deletion neuro-cli/src/neuro_cli/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
JOB_GPU_MODEL = "nvidia-tesla-k80"
JOB_CPU_NUMBER = 0.1
PRESET_PRICE = 0
JOB_MEMORY_AMOUNT = "1G"
JOB_MEMORY_AMOUNT = "1GB"
JOB_DEBUG_LOCAL_PORT = 31234
JOB_SSH_USER = "root"

Expand Down
13 changes: 6 additions & 7 deletions neuro-cli/src/neuro_cli/formatters/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,15 +245,14 @@ def _format_node_pools(node_pools: Iterable[_NodePool]) -> Table:
row = [
node_pool.machine_type,
str(node_pool.available_cpu),
format_size(node_pool.available_memory_mb * 1024**2),
format_size(node_pool.available_memory),
]
if node_pool.disk_type:
row.append(
f"{format_size(node_pool.disk_size_gb * 1024 ** 3)} "
f"{node_pool.disk_type.upper()}"
f"{format_size(node_pool.disk_size)} " f"{node_pool.disk_type.upper()}"
)
else:
row.append(format_size(node_pool.disk_size_gb * 1024**3))
row.append(format_size(node_pool.disk_size))
if has_preemptible:
row.append("√" if node_pool.is_preemptible else "×")
row.append(_gpu(node_pool))
Expand All @@ -275,7 +274,7 @@ def _format_storage(storage: _Storage) -> Table:
table.add_column("Name", style="bold", justify="left")
table.add_column("Type", style="bold", justify="left")
for instance in storage.instances:
if instance.size_mb is not None:
if instance.size is not None:
table.add_column("Size", style="bold", justify="left")
has_size = True
break
Expand All @@ -284,10 +283,10 @@ def _format_storage(storage: _Storage) -> Table:
for instance in storage.instances:
row = [instance.name or "<default>", storage.description]
if has_size:
if instance.size_mb is None:
if instance.size is None:
row.append("")
else:
row.append(format_size(instance.size_mb * 1024**2))
row.append(format_size(instance.size))
table.add_row(*row)
return table

Expand Down
2 changes: 1 addition & 1 deletion neuro-cli/src/neuro_cli/formatters/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def _format_presets(
row = [
name,
str(preset.cpu),
format_size(preset.memory_mb * 1024**2),
format_size(preset.memory),
"√" if preset.scheduler_enabled else "×",
"√" if preset.preemptible_node else "×",
gpu,
Expand Down
4 changes: 1 addition & 3 deletions neuro-cli/src/neuro_cli/formatters/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,7 @@ def __call__(self, job_status: JobDescription) -> RenderableType:
resources = Table(box=None, show_header=False, show_edge=False)
resources.add_column()
resources.add_column(style="bold", justify="right")
resources.add_row(
"Memory", format_size(job_status.container.resources.memory_mb * 1024**2)
)
resources.add_row("Memory", format_size(job_status.container.resources.memory))
resources.add_row("CPU", f"{job_status.container.resources.cpu:0.1f}")
if job_status.container.resources.gpu:
resources.add_row(
Expand Down
43 changes: 21 additions & 22 deletions neuro-cli/src/neuro_cli/parse_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,32 @@ def parse_memory(memory: str) -> int:
returns value in bytes"""

# Mega, Giga, Tera, etc
prefixes = "MGTPEZY"

prefix_to_factor = {
"": 1,
# Binary
"Ki": 2**10,
"Mi": 2**20,
"Gi": 2**30,
"Ti": 2**40,
"Pi": 2**50,
# Decimal
"k": 10**3,
"M": 10**6,
"G": 10**9,
"T": 10**12,
"P": 10**15,
}

value_error = ValueError(f"Unable parse value: {memory}")

if not memory:
raise value_error

pattern = rf"""^
(?P<value>\d+)
(?P<units>(kB|kb|K|k)|((?P<prefix>[{prefixes}])
(?P<unit>[bB]?)))
(?P<prefix>{"|".join(prefix_to_factor.keys())})?
[bB]?
$"""
regex = re.compile(pattern, re.VERBOSE)
match = regex.fullmatch(memory)
Expand All @@ -48,26 +64,9 @@ def parse_memory(memory: str) -> int:
groups = match.groupdict()

value = int(groups["value"])
unit = groups["unit"]
prefix = groups["prefix"]
units = groups["units"]

if units == "kB" or units == "kb":
return value * 1000

if units == "K" or units == "k":
return value * 1024

# Our prefix string starts with Mega
# so for index 0 the power should be 2
power = 2 + prefixes.index(prefix)
multiple = 1000 if unit else 1024

return value * multiple**power

prefix = groups["prefix"] or ""

def to_megabytes(value: str) -> int:
return int(parse_memory(value) / (1024**2))
return value * prefix_to_factor[prefix]


@dataclasses.dataclass(frozen=True)
Expand Down
18 changes: 9 additions & 9 deletions neuro-cli/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,27 +64,27 @@ def cluster_config() -> Cluster:
"gpu-small": Preset(
credits_per_hour=Decimal("10"),
cpu=7,
memory_mb=30 * 1024,
memory=30 * 2**30,
gpu=1,
gpu_model="nvidia-tesla-k80",
),
"gpu-large": Preset(
credits_per_hour=Decimal("10"),
cpu=7,
memory_mb=60 * 1024,
memory=60 * 2**30,
gpu=1,
gpu_model="nvidia-tesla-v100",
),
"cpu-small": Preset(
credits_per_hour=Decimal("10"), cpu=7, memory_mb=2 * 1024
credits_per_hour=Decimal("10"), cpu=7, memory=2 * 2**30
),
"cpu-large": Preset(
credits_per_hour=Decimal("10"), cpu=7, memory_mb=14 * 1024
credits_per_hour=Decimal("10"), cpu=7, memory=14 * 2**30
),
"cpu-large-p": Preset(
credits_per_hour=Decimal("10"),
cpu=7,
memory_mb=14 * 1024,
memory=14 * 2**30,
scheduler_enabled=True,
preemptible_node=True,
),
Expand Down Expand Up @@ -122,22 +122,22 @@ def go(
"gpu-small": Preset(
credits_per_hour=Decimal("10"),
cpu=7,
memory_mb=30 * 1024,
memory=30 * 2**30,
gpu=1,
gpu_model="nvidia-tesla-k80",
),
"gpu-large": Preset(
credits_per_hour=Decimal("10"),
cpu=7,
memory_mb=60 * 1024,
memory=60 * 2**30,
gpu=1,
gpu_model="nvidia-tesla-v100",
),
"cpu-small": Preset(
credits_per_hour=Decimal("10"), cpu=7, memory_mb=2 * 1024
credits_per_hour=Decimal("10"), cpu=7, memory=2 * 2**30
),
"cpu-large": Preset(
credits_per_hour=Decimal("10"), cpu=7, memory_mb=14 * 1024
credits_per_hour=Decimal("10"), cpu=7, memory=14 * 2**30
),
},
name="default",
Expand Down
2 changes: 1 addition & 1 deletion neuro-cli/tests/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ async def arun_job_and_wait_state(
env = {}
async with api_get(timeout=CLIENT_TIMEOUT, path=self._nmrc_path) as client:
preset = client.presets["cpu-micro"]
resources = Resources(memory_mb=preset.memory_mb, cpu=preset.cpu)
resources = Resources(memory=preset.memory, cpu=preset.cpu)
container = Container(
image=client.parse.remote_image(image),
command=command,
Expand Down
Loading

0 comments on commit a69ed38

Please sign in to comment.