Skip to content

Commit

Permalink
Merge with Master, fixed conflicts after merge of #189
Browse files Browse the repository at this point in the history
  • Loading branch information
fjwillemsen committed Oct 3, 2023
2 parents 91d2f57 + 2d5bff2 commit a7fdedd
Show file tree
Hide file tree
Showing 13 changed files with 623 additions and 42 deletions.
64 changes: 64 additions & 0 deletions examples/cuda/accuracy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env python

import numpy
from pprint import pprint
from kernel_tuner import tune_kernel
from kernel_tuner.accuracy import TunablePrecision, AccuracyObserver


def tune():
kernel_string = """
#include <cuda_fp16.h>
using half = __half;
template <typename T>
__global__ void vector_add(int n, const T* left, const T* right, T* output) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < n) {
output[i] = left[i] + right[i];
}
}
"""

size = 100000000

n = numpy.int32(size)
a = numpy.random.randn(size).astype(numpy.float64)
b = numpy.random.randn(size).astype(numpy.float64)
c = numpy.zeros_like(b)

args = [
n,
TunablePrecision("float_type", a),
TunablePrecision("float_type", b),
TunablePrecision("float_type", c),
]

answer = [None, None, None, a + b]

tune_params = dict()
tune_params["block_size_x"] = [32, 64, 128, 256, 512, 1024]
tune_params["float_type"] = ["float", "double", "half"]

observers = [
AccuracyObserver("RMSE", "error_rmse"),
AccuracyObserver("MRE", "error_relative"),
]

results, env = tune_kernel(
"vector_add<float_type>",
kernel_string,
size,
args,
tune_params,
answer=answer,
observers=observers,
lang="CUDA",
)

pprint(results)


if __name__ == "__main__":
tune()
308 changes: 308 additions & 0 deletions kernel_tuner/accuracy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
from collections import UserDict
from typing import Dict
import numpy as np
import logging
import re

from kernel_tuner.observers import OutputObserver


class Tunable(UserDict):
def __init__(self, param_key: str, arrays: Dict):
"""The ``Tunable`` object can be used as an input argument when tuning
kernels. It is a container that holds several arrays internally and
selects one array during benchmarking based on the value of a tunable parameter.
Example
-------
Consider this example::
arg = Tunable("matrix_layout", dict("c"=matrix, "f"=matrix.transpose()))
In this example, we create a Tunable object that selects either matrix
or matrix.transpose() for benchmarking, depending on the value of the
tunable parameter "matrix_layout". The first argument is the name of the tunable
paramater. The second argument is a dictionary that maps the tunable parameter
values "c" and "f" to the arrays ``matrix`` and ``matrix.transpose()``, respectively.
During benchmarking, the Tunable object selects the array passed to the kernel based
on the value of "matrix_layout".
:param param_key: : The tunable parameter used to select the array for benchmarking.
:param arrays: A dictionary that maps the value of that tunable parameter to options.
"""
if isinstance(arrays, (tuple, list)):
arrays = dict(enumerate(arrays))

super().__init__(arrays)
self.param_key = param_key

def select_for_configuration(self, params):
if callable(self.param_key):
option = self.param_key(params)
elif self.param_key in params:
option = params[self.param_key]
else:
option = eval(self.param_key, params, params)

if option not in self.data:
list = ", ".join(map(str, self.data.keys()))
raise KeyError(
f"'{option}' is not a valid parameter value, should be one of: {list}"
)

return self.data[option]

def __call__(self, params):
return self.select_for_configuration(params)


def _find_bfloat16_if_available():
# Try to get bfloat16 if available.
try:
from bfloat16 import bfloat16
return bfloat16
except ImportError:
pass

try:
from tensorflow import bfloat16
return bfloat16.as_numpy_dtype
except ImportError:
pass

logging.warning(
"could not find `bfloat16` data type for numpy, "
+ "please install either the package `bfloat16` or `tensorflow`"
)

return None


def _to_float_dtype(x: str) -> np.dtype:
"""Convert a string to a numpy data type (``dtype``). This function recognizes
common names (such as ``f16`` or ``kfloat``), and uses ``np.dtype(x)`` as a
fallback.
"""
if isinstance(x, str):
x = x.lower()

if x in ("bfloat16", "bf16", "kbfloat16", "__nv_bfloat16"):
result = _find_bfloat16_if_available()
if result is not None:
return result

if x in ("half", "f16", "float16", "__half", "khalf", 16):
return np.half
if x in ("float", "single", "f32", "float32", "kfloat", 32):
return np.float32
if x in ("double", "f64", "float64", "kdouble", 64):
return np.float64

return np.dtype(x)


class TunablePrecision(Tunable):
def __init__(
self, param_key: str, array: np.ndarray, dtypes: Dict[str, np.dtype] = None
):
"""The ``Tunable`` object can be used as an input argument when tuning
kernels. It is a container that internally holds several arrays
containing the same data, but stored in using different levels of
precision. During benchamrking, one array is selected based on the value
of the tunable parameter called ``param_key``.
Example
-------
Consider this example::
arg = TunablePrecision("matrix_type", matrix)
This creates a ``TunablePrecision`` argument that selects the required
floating-point precision for ``matrix`` based on the tunable parameter
``"matrix_type"``.
:param param_key: The tunable parameter used to select the level of precision.
:param array: The input array. It will automatically be converted to
all data types given by ``dtypes``.
:param dtypes: Dictionary that maps names to numpy data types. The default
types are ``double``, ``float``, and ``half``.
"""
# If no dtypes are given, generate a default list
if not dtypes:
dtypes = dict(half=np.half, float=np.single, double=np.double)

bfloat16 = _find_bfloat16_if_available()
if bfloat16 is not None:
dtypes["bfloat16"] = bfloat16


# If dtype is a list, convert it to a dictionary
if isinstance(dtypes, (list, tuple)):
dtypes = dict((name, _to_float_dtype(name)) for name in dtypes)

arrays = dict()
for precision, dtype in dtypes.items():
# We convert the array into a `np.ndarray` by using `np.array`.
# However, if the value is a numpy scalar, then we do not want to
# convert it into an array but instead keep the original value
if not isinstance(array, np.generic):
array = np.array(array)

arrays[precision] = array.astype(dtype)

super().__init__(param_key, arrays)


def error_metric_from_name(user_key, EPS=1e-8):
"""Find the error metric function for the given name.
Returns an function that takes two parameters (the ground-truth and the
estimated values) as numpy arrays and returns the error between the two
according to the given error metric.
Valid values for the ``key`` are:
* MSE (mean square error)
* RSME (Root mean square error)
* NRMSE (normalized root mean square error)
* RMSRE (root mean square relative error)
* RMSLE (root mean square log error)
* MAE (mean absolute error)
* MRE (mean relative error)
* MALE (mean absolute log error)
* max (maximum absolute error)
* max rel (maximum relative error)
The value of `EPS` is used for relative errors to prevent division by zero.
``
"""

# Prepocess the provided name:
# - convert to lowercase
# - remove the word "error"
# - remove underscores and dashes
# - strip whitespaces
# - replace common abreviations
key = user_key.lower()
key = re.sub(r"\berror\b", " ", key)
key = re.sub(r"[\s_-]+", " ", key)
key = key.strip()

replacements = {
"average": "mean",
"avg": "mean",
"square": "squared",
"sq": "squared",
"max": "maximum",
"rel": "relative",
"abs": "absolute",
"log": "logarithmic",
}

for pattern, replacement in replacements.items():
key = re.sub(rf"\b{pattern}\b", replacement, key)

# Select the right metric
if key in ("mse", "mean squared"):

def metric(a, b):
return np.average(np.square(a - b))

elif key in ("rmse", "root mean squared"):

def metric(a, b):
return np.sqrt(np.average(np.square(a - b)))

elif key in ("nrmse", "normalized root mean squared"):

def metric(a, b):
return np.sqrt(np.average(np.square(a - b)) / np.average(np.square(a)))

elif key in ("mae", "absolute", "mean absolute"):

def metric(a, b):
return np.average(np.abs(a - b))

elif key in ("mre", "relative", "mean relative"):

def metric(a, b):
return np.average(np.abs(a - b) / np.maximum(np.abs(a), EPS))

elif key in ("rmsre", "root mean squared relative"):

def metric(a, b):
return np.sqrt(np.average(np.square(a - b) / np.square(np.maximum(a, EPS))))

elif key in ("male", "mean absolute logarithmic"):

def metric(a, b):
return np.average(np.abs(np.log10(a + EPS) - np.log10(b + EPS)))

elif key in ("rmsle", "root mean squared logarithmic"):

def metric(a, b):
return np.sqrt(np.average(np.square(np.log10(a + EPS) - np.log10(b + EPS))))

elif key in ("maximum absolute", "maximum"):

def metric(a, b):
return np.amax(np.abs(a - b))

elif key in ("maximum relative",):

def metric(a, b):
return np.amax(np.abs(a - b) / np.maximum(np.abs(a), EPS))

else:
raise ValueError(f"invalid error metric provided: {user_key}")

# cast both arguments to f64 before passing them to the metric
return lambda a, b: metric(
a.astype(np.float64, copy=False), b.astype(np.float64, copy=False)
)


class AccuracyObserver(OutputObserver):
"""``AccuracyObserver`` measures the error on the output produced by a kernel
by comparing the output against a reference output.
By default, it uses the root mean-squared error (RMSE) and uses the
metric name ``"error"``.
"""

def __init__(self, metric=None, key="error", *, atol=1e-8):
"""Create a new ``AccuracyObserver``.
:param metric: The error metric. This should be a string that is
accepted by ``error_metric_from_name`` such as ``"absolute error"``
or ``"relative error"``. Alternatively, it can be a
function that accepts two numpy arrays as arguments
(the reference output and the kernel output)
:param key: The name of this metric in the results.
:param atol: The tolerance used in relative metrics to prevent
division by zero. It is ignored by absolute error metrics.
"""

# Default metric is RMSE
if not metric:
metric = "rmse"

# If it is a string, convert it to a function
if isinstance(metric, str):
metric = error_metric_from_name(metric, atol)

self.key = key
self.metric = metric
self.result = None

def process_output(self, answers, outputs):
errors = []

for answer, output in zip(answers, outputs):
if answer is not None:
errors.append(self.metric(answer, output))

self.result = max(errors)

def get_results(self):
return dict([(self.key, self.result)])
6 changes: 4 additions & 2 deletions kernel_tuner/backends/c.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,22 @@
class CFunctions(CompilerBackend):
"""Class that groups the code for running and compiling C functions"""

def __init__(self, iterations=7, compiler_options=None, compiler=None):
def __init__(self, iterations=7, compiler_options=None, compiler=None, observers=None):
"""instantiate CFunctions object used for interacting with C code
:param iterations: Number of iterations used while benchmarking a kernel, 7 by default.
:type iterations: int
"""
self.observers = observers or []
self.observers.append(CRuntimeObserver(self))

self.iterations = iterations
self.max_threads = 1024
self.compiler_options = compiler_options
# if no compiler is specified, use g++ by default
self.compiler = compiler or "g++"
self.lib = None
self.using_openmp = False
self.observers = [CRuntimeObserver(self)]
self.last_result = None

try:
Expand Down
Loading

0 comments on commit a7fdedd

Please sign in to comment.