Merge pull request #114 from ClarkSource/sanitization

[RFC] feat(resource sanitization): add filters
ClarkSource · May 11, 2022 · 7870227 · 7870227
2 parents 0f0ef80 + d7d1a2a
commit 7870227
Show file tree

Hide file tree

Showing 7 changed files with 163 additions and 35 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,20 +3,20 @@ repos:
     hooks:
       - id: check-hooks-apply
       - id: check-useless-excludes
-  - repo: git://github.com/pre-commit/pre-commit-hooks
-    rev: v3.1.0
+  - repo: git@github.com:pre-commit/pre-commit-hooks
+    rev: v4.2.0
     hooks:
       - id: check-merge-conflict
-  - repo: git://github.com/Lucas-C/pre-commit-hooks.git
-    rev: v1.1.9
+  - repo: git@github.com:Lucas-C/pre-commit-hooks.git
+    rev: v1.1.13
     hooks:
       - id: insert-license
         files: \.py$
   - repo: git@github.com:thlorenz/doctoc
-    rev: v1.4.0
+    rev: v2.1.0
     hooks:
       - id: doctoc
   - repo: git@github.com:PyCQa/pylint
-    rev: pylint-2.5.3
+    rev: v2.13.7
     hooks:
       - id: pylint
diff --git a/Dockerfile b/Dockerfile
@@ -7,7 +7,7 @@ ARG KUBECTL_SHA="1ab07643807a45e2917072f7ba5f11140b40f19675981b199b810552d6af5c5
 
 # Download and install tools
 RUN apk update && apk upgrade && \
-    apk add --no-cache openssl curl tar gzip bash ca-certificates py3-wheel
+  apk add --no-cache openssl curl tar gzip bash ca-certificates py3-wheel gcc musl-dev
 
 RUN \
   echo -e "${KUBECTL_SHA}  /tmp/kubectl" >> /tmp/CHECKSUMS && \
@@ -32,7 +32,7 @@ RUN \
   which python && \
   pip install --use-feature=in-tree-build /app && \
   which k8t && \
-  apk del git && \
+  apk del git gcc musl-dev && \
   rm -rf /app /var/cache/apk
 
 USER 65534

diff --git a/k8t/engine.py b/k8t/engine.py
@@ -14,7 +14,7 @@
 from jinja2 import Environment, DictLoader, FileSystemLoader, StrictUndefined
 
 from k8t.filters import (b64decode, b64encode, envvar, get_secret, hashf,
-                         random_password, sanitize_label, to_bool)
+                         random_password, sanitize_label, sanitize_cpu, sanitize_memory, standardize_cpu, standardize_memory, to_bool)
 from k8t.project import find_files
 from k8t.util import read_file
 
@@ -43,6 +43,10 @@ def build(path: str, cluster: str, environment: str, template_overrides: List[st
     env.filters["hash"] = hashf
     env.filters["bool"] = to_bool
     env.filters["sanitize_label"] = sanitize_label
+    env.filters["sanitize_cpu"] = sanitize_cpu
+    env.filters["sanitize_memory"] = sanitize_memory
+    env.filters["standardize_cpu"] = standardize_cpu
+    env.filters["standardize_memory"] = standardize_memory
 
     # Global functions
     env.globals["random_password"] = random_password

diff --git a/k8t/filters.py b/k8t/filters.py
@@ -18,7 +18,7 @@
 import string
 from typing import Any, Optional
 
-from k8t import config, secret_providers
+from k8t import config, secret_providers, util
 
 try:
     from secrets import choice
@@ -29,12 +29,10 @@
 
 
 def random_password(length: int) -> str:
-    return "".join(
-        choice(string.ascii_lowercase + string.digits) for _ in range(length)
-    )
+    return "".join(choice(string.ascii_lowercase + string.digits) for _ in range(length))
 
 
-def envvar(key: str, default=None) -> str:
+def envvar(key: str, default: Any = None) -> str:
     return os.environ.get(key, default)
 
 
@@ -48,7 +46,7 @@ def b64encode(value: Any) -> str:
     elif isinstance(value, bytes):
         result = base64.b64encode(value).decode()
     else:
-        raise TypeError("invalid input: {}".format(value))
+        raise TypeError(f"invalid input: {value}")
 
     return result
 
@@ -61,7 +59,7 @@ def b64decode(value: Any) -> str:
     elif isinstance(value, bytes):
         result = base64.b64decode(value).decode()
     else:
-        raise TypeError("invalid input: {}".format(value))
+        raise TypeError(f"invalid input: {value}")
 
     return result
 
@@ -70,19 +68,19 @@ def hashf(value, method="sha256"):
     try:
         hash_method = getattr(hashlib, method)()
     except AttributeError as no_hash_method:
-        raise RuntimeError("No such hash method: {}".format(method)) from no_hash_method
+        raise RuntimeError(f"No such hash method: {method}") from no_hash_method
 
     if isinstance(value, str):
         hash_method.update(value.encode())
     elif isinstance(value, bytes):
         hash_method.update(value)
     else:
-        raise TypeError("invalid input: {}".format(value))
+        raise TypeError(f"invalid input: {value}")
 
     return hash_method.hexdigest()
 
 
-def get_secret(key: str, length: int = None) -> str:
+def get_secret(key: str, length: Optional[int] = None) -> str:
     provider_name = config.CONFIG.get("secrets", {}).get("provider")
 
     if not provider_name:
@@ -92,7 +90,7 @@ def get_secret(key: str, length: int = None) -> str:
     try:
         provider = getattr(secret_providers, provider_name)
     except AttributeError as no_secret_provider:
-        raise NotImplementedError("secret provider {} does not exist.".format(provider_name)) from no_secret_provider
+        raise NotImplementedError(f"secret provider {provider_name} does not exist.") from no_secret_provider
 
     return provider(key, length)
 
@@ -104,7 +102,7 @@ def to_bool(value: Any) -> Optional[bool]:
     if isinstance(value, str):
         value = value.lower()
 
-    if value in ('yes', 'on', '1', 'true', 1):
+    if value in ("yes", "on", "1", "true", 1):
         return True
 
     return False
@@ -117,4 +115,62 @@ def sanitize_label(value: str) -> str:
     TODO i'm sure there is a smarter way to do this.
     """
 
-    return re.sub(r'(^[^a-z0-9A-Z]|[^a-z0-9A-Z]$|[^a-z0-9A-Z_.-])', 'X', value[:63])
+    return re.sub(r"(^[^a-z0-9A-Z]|[^a-z0-9A-Z]$|[^a-z0-9A-Z_.-])", "X", value[:63])
+
+
+def sanitize_cpu(value: str) -> str:
+    """
+    sanitize cpu resource values to millicores.
+    """
+    return f"{standardize_cpu(value)}m"
+
+
+def sanitize_memory(value: str) -> str:
+    """
+    sanitize memory resource values to megabyte.
+    """
+    return f"{standardize_memory(value)}M"
+
+
+def standardize_cpu(value: str) -> int:
+    """
+    standardize cpu values to millicores.
+    """
+
+    value_millis: int
+
+    if re.fullmatch(r"^[0-9]+(\.[0-9]+)?$", value):
+        value_millis = int(float(value) * 1000)
+    elif re.fullmatch(r"^[0-9]+m$", value):
+        value_millis = int(value[:-1])
+    else:
+        raise ValueError(f"invalid cpu value: {value}")
+
+    if value_millis < 1:
+        raise ValueError(f"invalud cpu value: {value_millis} is less than 1")
+
+    return value_millis
+
+
+def standardize_memory(value: str) -> int:
+    """
+    standardize memory values to a common notation.
+
+    https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-memory
+    """
+
+    value_mb: int
+
+    if re.fullmatch(r"^[0-9]+([EPTGMk]i?)?$", value):
+        value_mb = util.memory_to_mb(f"{value}B")
+    elif re.fullmatch(r"^[0-9]+m$", value):
+        value_mb = util.memory_to_mb(f"{int(value[:-1]) / 1000}B")
+    elif re.fullmatch(r"^[0-9]+e[0-9]+$", value):
+        value_mb = util.memory_to_mb(f"{float(value)}B")
+    else:
+        raise ValueError(f"invalid memory value: {value}")
+
+    if value_mb < 1:
+        raise ValueError(f"invalid memory value: {value_mb} is less than one MB")
+
+    return value_mb
diff --git a/k8t/util.py b/k8t/util.py
@@ -10,14 +10,15 @@
 import copy
 import json
 import logging
+import math
+import bitmath
 import os
 import shutil
 from functools import reduce
 from typing import Any, Dict, List, Tuple
 
-from ruamel.yaml import YAML # pylint: disable=E0401
 from click import secho  # pylint: disable=E0401
-
+from ruamel.yaml import YAML  # pylint: disable=E0401
 from simple_tools.interaction import confirm  # pylint: disable=E0401
 
 LOGGER = logging.getLogger(__name__)
@@ -98,16 +99,14 @@ def deep_merge(*dicts, method="ltr"):
     if not dicts:
         return {}
 
-    return reduce(
-        lambda a, b: merge(a, b, method=method) if b is not None else a, dicts
-    )
+    return reduce(lambda a, b: merge(a, b, method=method) if b is not None else a, dicts)
 
 
 def load_yaml(path: str) -> dict:
     LOGGER.debug("loading values file: %s", path)
 
     with open(path, "r") as stream:
-        yaml = YAML(typ='safe', pure=True)
+        yaml = YAML(typ="safe", pure=True)
         return yaml.load(stream) or dict()
 
 
@@ -127,7 +126,7 @@ def to_json(input: dict) -> str:
 def to_yaml(input: dict) -> str:
     yaml = YAML()
     yaml.scalarstring.walk_tree(input)
-    return yaml.round_trip_dump(input, default_flow_style = False, allow_unicode = True, explicit_start=True)
+    return yaml.round_trip_dump(input, default_flow_style=False, allow_unicode=True, explicit_start=True)
 
 
 def envvalues() -> Dict:
@@ -141,9 +140,7 @@ def envvalues() -> Dict:
     return values
 
 
-def list_files(
-    directory: str, include_files=False, include_directories=False
-) -> List[str]:
+def list_files(directory: str, include_files=False, include_directories=False) -> List[str]:
     result = []
 
     for _, dirs, files in os.walk(directory):
@@ -157,6 +154,13 @@ def list_files(
 
     return result
 
+
 def read_file(path: str) -> str:
-    with open(path, 'rb') as stream:
+    with open(path, "rb") as stream:
         return stream.read().decode()
+
+
+def memory_to_mb(value: str) -> int:
+    parsed = bitmath.parse_string(value)  # all memory values are in bytes
+
+    return int(parsed.to_MB())
diff --git a/setup.cfg b/setup.cfg
@@ -22,6 +22,7 @@ install_requires =
   click
   coloredlogs
   simple_tools
+  bitmath
 
 [options.entry_points]
 console_scripts =

diff --git a/tests/filters.py b/tests/filters.py
@@ -12,13 +12,23 @@
 # Author: Aljosha Friemann <aljosha.friemann@clark.de>
 
 import random
+import bitmath
 
 import pytest  # pylint: disable=E0401
 from mock import patch  # pylint: disable=E0401
 
 from k8t import config, secret_providers
-from k8t.filters import (b64decode, b64encode, get_secret, hashf,
-                         random_password, sanitize_label, to_bool)
+from k8t.filters import (
+    b64decode,
+    b64encode,
+    get_secret,
+    hashf,
+    random_password,
+    sanitize_cpu,
+    sanitize_label,
+    sanitize_memory,
+    to_bool,
+)
 
 
 def test_b64encode():
@@ -107,3 +117,56 @@ def test_sanitize_label():
 
     # check length
     assert len(sanitize_label("x" * 65)) == 63
+
+
+def test_sanitize_cpu():
+    assert sanitize_cpu("200m") == "200m"
+    assert sanitize_cpu("0.5") == "500m"
+    assert sanitize_cpu("1") == "1000m"
+    assert sanitize_cpu("92") == "92000m"
+    assert sanitize_cpu("1.8") == "1800m"
+    assert sanitize_cpu("3000m") == "3000m"
+
+    assert sanitize_cpu("0.1") == "100m"
+    assert sanitize_cpu("0.01") == "10m"
+    assert sanitize_cpu("0.001") == "1m"
+
+    with pytest.raises(ValueError):
+        assert sanitize_cpu("0.0001") == "0.1m"
+
+
+def test_sanitize_memory():
+    def compare(size: str, value: int, precision: float = 0.008) -> bool:
+        # precision of 0.008 results in (value +- 1.0e+00)
+        return int(bitmath.parse_string(f"{size}B")) == pytest.approx(value, precision)
+
+    assert sanitize_memory("200M") == "200M"
+
+    with pytest.raises(ValueError):
+        assert sanitize_memory("100000000m") == "0.1M"
+
+    assert sanitize_memory("1289748") == "1M"
+    with pytest.raises(ValueError):
+        assert sanitize_memory("128974") == "0M"
+
+    # precision can vary between systems, values should be approximately 129MB
+    assert compare(sanitize_memory("129e6"), 129)
+    assert compare(sanitize_memory("129M"), 129)
+    assert compare(sanitize_memory("128974848"), 129)
+    assert compare(sanitize_memory("128974848000m"), 129)
+    assert compare(sanitize_memory("123Mi"), 129)
+
+    assert sanitize_memory("300000000000m") == "300M"
+    assert sanitize_memory("20000000000m") == "20M"
+    assert sanitize_memory("20005000000m") == "20M"
+    assert sanitize_memory("1000000000m") == "1M"
+
+    assert sanitize_memory("2M") == "2M"
+    assert sanitize_memory("2G") == "2000M"
+    assert sanitize_memory("2T") == "2000000M"
+    assert sanitize_memory("2P") == "2000000000M"
+
+    assert sanitize_memory("2Mi") == "2M"
+    assert sanitize_memory("2Gi") == "2147M"
+    assert sanitize_memory("2Ti") == "2199023M"
+    assert sanitize_memory("2Pi") == "2251799813M"