Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor bugfix #219

Merged
merged 9 commits into from
Oct 4, 2023
Merged
47 changes: 21 additions & 26 deletions kernel_tuner/core.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,27 @@
""" Module for grouping the core functionality needed by most runners """

import time
from collections import namedtuple
import logging
import re
import numpy as np
import time
from collections import namedtuple

import numpy as np

try:
import cupy as cp
except ImportError:
cp = np

import kernel_tuner.util as util
from kernel_tuner.accuracy import Tunable
from kernel_tuner.observers.nvml import NVMLObserver
from kernel_tuner.observers.observer import ContinuousObserver, OutputObserver
from kernel_tuner.backends.c import CFunctions
from kernel_tuner.backends.cupy import CupyFunctions
from kernel_tuner.backends.pycuda import PyCudaFunctions
from kernel_tuner.backends.hip import HipFunctions
from kernel_tuner.backends.nvcuda import CudaFunctions
from kernel_tuner.backends.opencl import OpenCLFunctions
from kernel_tuner.backends.c import CFunctions
from kernel_tuner.backends.opencl import OpenCLFunctions
from kernel_tuner.backends.hip import HipFunctions
import kernel_tuner.util as util
from kernel_tuner.backends.pycuda import PyCudaFunctions
from kernel_tuner.observers.nvml import NVMLObserver
from kernel_tuner.observers.observer import ContinuousObserver, OutputObserver

try:
import torch
Expand Down Expand Up @@ -245,7 +244,7 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
elif lang.upper() == "HIP":
dev = HipFunctions(device, compiler_options=compiler_options, iterations=iterations, observers=observers)
else:
raise ValueError("Sorry, support for languages other than CUDA, OpenCL, or C is not implemented yet")
raise ValueError("Sorry, support for languages other than CUDA, OpenCL, HIP, C, and Fortran is not implemented yet")

#look for NVMLObserver in observers, if present, enable special tunable parameters through nvml
self.use_nvml = False
Expand Down Expand Up @@ -529,26 +528,22 @@ def compile_kernel(self, instance, verbose):
raise e
return func

@staticmethod
def preprocess_gpu_arguments(old_arguments, params):
""" Get a flat list of arguments based on the configuration given by `params` """
return _preprocess_gpu_arguments(old_arguments, params)

def copy_shared_memory_args(self, smem_args):
"""adds shared memory arguments to the most recently compiled module, if using CUDA"""
if self.lang == "CUDA":
self.dev.copy_shared_memory_args(smem_args)
else:
raise RuntimeError("Error cannot copy shared memory arguments when language is not CUDA")
"""adds shared memory arguments to the most recently compiled module"""
self.dev.copy_shared_memory_args(smem_args)

def copy_constant_memory_args(self, cmem_args):
"""adds constant memory arguments to the most recently compiled module, if using CUDA"""
if self.lang == "CUDA":
self.dev.copy_constant_memory_args(cmem_args)
else:
raise RuntimeError("Error cannot copy constant memory arguments when language is not CUDA")
"""adds constant memory arguments to the most recently compiled module"""
self.dev.copy_constant_memory_args(cmem_args)

def copy_texture_memory_args(self, texmem_args):
"""adds texture memory arguments to the most recently compiled module, if using CUDA"""
if self.lang == "CUDA":
self.dev.copy_texture_memory_args(texmem_args)
else:
raise RuntimeError("Error cannot copy texture memory arguments when language is not CUDA")
"""adds texture memory arguments to the most recently compiled module"""
self.dev.copy_texture_memory_args(texmem_args)

def create_kernel_instance(self, kernel_source, kernel_options, params, verbose):
"""create kernel instance from kernel source, parameters, problem size, grid divisors, and so on"""
Expand Down