Skip to content

Commit

Permalink
Generate LB GPU PackInfo kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
jngrad committed Oct 30, 2024
1 parent 1f672a0 commit 999eb1b
Show file tree
Hide file tree
Showing 25 changed files with 3,920 additions and 91 deletions.
19 changes: 13 additions & 6 deletions maintainer/benchmarks/lb.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
parser.add_argument("--particles_per_core", metavar="N", action="store",
type=int, default=125, required=False,
help="Number of particles per core")
parser.add_argument("--box_l", action="store",
parser.add_argument("--box_l", action="store", nargs="+",
type=int, default=argparse.SUPPRESS, required=False,
help="Box length (cubic box)")
parser.add_argument("--lb_sites_per_particle", metavar="N_LB", action="store",
Expand All @@ -45,6 +45,8 @@
help="Using single-precision floating point accuracy")
parser.add_argument("--gpu", action=argparse.BooleanOptionalAction,
default=False, required=False, help="Use GPU implementation")
parser.add_argument("--multi-gpu", action=argparse.BooleanOptionalAction,
default=False, required=False, help="Use multi-GPU implementation")
parser.add_argument("--output", metavar="FILEPATH", action="store",
type=str, required=False, default="benchmarks.csv",
help="Output file (default: benchmarks.csv)")
Expand Down Expand Up @@ -83,9 +85,9 @@
n_proc = system.cell_system.get_state()["n_nodes"]
n_part = n_proc * args.particles_per_core
if n_part == 0:
box_l = args.box_l
box_l = 3 * args.box_l if len(args.box_l) == 1 else args.box_l
agrid = 1.
lb_grid = args.box_l
lb_grid = box_l
measurement_steps = 80
else:
# volume of N spheres with radius r: N * (4/3*pi*r^3)
Expand All @@ -96,13 +98,16 @@
agrid = box_l / lb_grid
measurement_steps = max(50, int(120**3 / lb_grid**3))
measurement_steps = 40
lb_grid = 3 * [lb_grid]
box_l = 3 * [box_l]

print(f"LB shape: [{lb_grid}, {lb_grid}, {lb_grid}]")
print(f"box length: {box_l}")
print(f"LB shape: {lb_grid}")
print(f"LB agrid: {agrid:.3f}")

# System
#############################################################
system.box_l = 3 * (box_l,)
system.box_l = box_l

# Integration parameters
#############################################################
Expand Down Expand Up @@ -135,8 +140,10 @@
# LB fluid setup
#############################################################
lb_class = espressomd.lb.LBFluidWalberla
if args.gpu:
if args.gpu or args.multi_gpu:
lb_class = espressomd.lb.LBFluidWalberlaGPU
if args.multi_gpu:
system.cuda_init_handle.call_method("set_device_id_per_rank")
lbf = lb_class(agrid=agrid, tau=system.time_step, kinematic_viscosity=1.,
density=1., single_precision=args.single_precision)
system.lb = lbf
Expand Down
1 change: 0 additions & 1 deletion maintainer/walberla_kernels/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ generate_lb_kernels --single-precision
generate_lb_kernels --gpu
generate_lb_kernels --gpu --single-precision
format_lb_kernels
git diff src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_*CUDA*.cu # verify pragmas

# EK kernels
cd $(git rev-parse --show-toplevel)/src/walberla_bridge/src/electrokinetics/generated_kernels/
Expand Down
45 changes: 45 additions & 0 deletions maintainer/walberla_kernels/custom_additional_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,3 +349,48 @@ def generate_kernel_selector(
"templates/ReactionKernelSelector.tmpl.h").render(**context)

generation_context.write_file(f"{class_name}_all.h", header)


def generate_device_preprocessor(kernel, defines=()):
"""
Generate device preprocessor directives.
"""
pragmas = {
"packinfo": {
"nvcc": ["diag_suppress 177 // unused variable"],
"clang_host": ["-Wunused-variable"],
"clang_dev": ["-Wunused-variable"],
"gcc": ["-Wunused-variable"],
},
"ubb_boundary": {
"nvcc": ["diag_suppress 177 // unused variable"],
"clang_host": ["-Wstrict-aliasing", "-Wunused-variable", "-Wconversion", "-Wsign-compare"], # nopep8
"clang_dev": ["-Wstrict-aliasing", "-Wunused-variable", "-Wconversion", "-Wsign-compare"], # nopep8
"gcc": ["-Wstrict-aliasing", "-Wunused-variable", "-Wconversion"],
},
}

defines_table = {
"nvcc": {"RESTRICT": "__restrict__", "FUNC_PREFIX": "__global__"},
"msvc": {"RESTRICT": "__restrict", "FUNC_PREFIX": ""},
"clang_host": {"RESTRICT": "__restrict__", "FUNC_PREFIX": ""},
"clang_dev": {"RESTRICT": "__restrict__", "FUNC_PREFIX": "__global__"},
"gcc": {"RESTRICT": "__restrict__", "FUNC_PREFIX": ""},
"other": {"RESTRICT": "", "FUNC_PREFIX": ""},
}

context = {
"pragmas": pragmas[kernel],
"defines_table": defines_table,
"defines": defines,
}

custom_env = jinja2.Environment(
loader=jinja2.FileSystemLoader(pathlib.Path(__file__).parent),
undefined=jinja2.StrictUndefined
)

content = custom_env.get_template(
"templates/preprocessor.tmpl.cuh").render(**context)

return content.split("\n/* section */\n")[1:]
120 changes: 94 additions & 26 deletions maintainer/walberla_kernels/generate_lb_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import relaxation_rates
import walberla_lbm_generation
import code_generation_context
import custom_additional_extensions

parser = argparse.ArgumentParser(description="Generate the waLBerla kernels.")
parser.add_argument("--single-precision", action="store_true", required=False,
Expand All @@ -65,6 +66,24 @@ def paramlist(parameters, keys):
yield parameters[key]


def get_ext_header(target_suffix):
return {"CUDA": "h"}.get(target_suffix, "h")


def get_ext_source(target_suffix):
return {"CUDA": "cu"}.get(target_suffix, "cpp")


def patch_file(class_name, extension, target_suffix, patch):
with open(f"{class_name}.{extension}", "r+") as f:
old_content = f.read()
new_content = patch(old_content, target_suffix)
if new_content != old_content:
f.seek(0)
f.truncate()
f.write(new_content)


with code_generation_context.CodeGeneration() as ctx:
ctx.double_accuracy = not args.single_precision
if target == ps.Target.GPU:
Expand Down Expand Up @@ -196,44 +215,93 @@ def paramlist(parameters, keys):
# generate PackInfo
assignments = pystencils_espresso.generate_pack_info_pdfs_field_assignments(
fields, streaming_pattern="pull")
spec = pystencils_espresso.generate_pack_info_vector_field_specifications(
spec = pystencils_espresso.generate_pack_info_field_specifications(
config, stencil, force_field.layout)
for params, target_suffix in paramlist(parameters, ["CPU"]):

def patch_packinfo_header(content, target_suffix):
import re
if target_suffix in ["", "AVX"]:
token = "\n //TODO: optimize by generating kernel for this case\n" # nopep8
assert token in content
content = content.replace(token, "\n")
ft = "float" if "SinglePrecision" in content else "double"
token = " pack(dir, outBuffer.forward(dataSize)"
assert token in content
content = content.replace(token, f"{token[:-1]} + sizeof({ft}))")
token = " unpack(dir, buffer.skip(dataSize)"
assert token in content
content = content.replace(token, f"{token[:-1]} + sizeof({ft}))")
elif target_suffix in ["CUDA"]:
token = "#define FUNC_PREFIX __global__"
assert token in content
content = content.replace(token, "")
content = re.sub(r"#ifdef __GNUC__[\s\S]+?#endif\n\n", "", content)
return content

def patch_packinfo_kernel(content, target_suffix):
import re
# add header includes
token = '#include "PackInfo'
assert token in content
content = content.replace(token, f"#include <cstdint>\n{token}")
if target_suffix in ["", "AVX"]:
# fix MPI buffer
m = re.search("(float|double) *\* *buffer = reinterpret_cast<(?:float|double) *\*>\(byte_buffer\);\n", content) # nopep8
assert m is not None
content = content.replace(m.group(0), f"byte_buffer += sizeof({m.group(1)}) - (reinterpret_cast<std::size_t>(byte_buffer) - (reinterpret_cast<std::size_t>(byte_buffer) / sizeof({m.group(1)})) * sizeof({m.group(1)}));\n {m.group(0)}") # nopep8
if target_suffix in ["CUDA"]:
token = "#define FUNC_PREFIX __global__"
assert token in content
push, _ = custom_additional_extensions.generate_device_preprocessor(
"packinfo", defines=("RESTRICT",))
content = content.replace(token, f"{token}\n{push}")
return content

for params, target_suffix in paramlist(parameters, ["CPU", "GPU"]):
pystencils_walberla.generate_pack_info_from_kernel(
ctx, f"PackInfoPdf{precision_prefix}{target_suffix}", assignments,
kind="pull", **params)
pystencils_walberla.generate_pack_info(
ctx, f"PackInfoVec{precision_prefix}{target_suffix}", spec, **params)
if target_suffix == "CUDA":
continue
token = "\n //TODO: optimize by generating kernel for this case\n"
for field_suffix in ["Pdf", "Vec"]:
class_name = f"PackInfo{field_suffix}{precision_prefix}{target_suffix}" # nopep8
with open(f"{class_name}.h", "r+") as f:
content = f.read()
assert token in content
content = content.replace(token, "\n")
f.seek(0)
f.truncate()
f.write(content)
for suffix in ["Pdf", "Vec"]:
class_name = f"PackInfo{suffix}{precision_prefix}{target_suffix}"
patch_file(class_name, get_ext_header(target_suffix),
target_suffix, patch_packinfo_header)
patch_file(class_name, get_ext_source(target_suffix),
target_suffix, patch_packinfo_kernel)

# boundary conditions
ubb_dynamic = lbmpy_espresso.UBB(
lambda *args: None, dim=3, data_type=config.data_type.default_factory())
ubb_data_handler = lbmpy_espresso.BounceBackSlipVelocityUBB(
method.stencil, ubb_dynamic)

for _, target_suffix in paramlist(parameters, ("GPU", "CPU")):
# pylint: disable=unused-argument
def patch_boundary_header(content, target_suffix):
# patch for floating point accuracy
content = content.replace("real_t",
config.data_type.default_factory().c_name)
return content

def patch_boundary_kernel(content, target_suffix):
import re
if target_suffix in ["CUDA"]:
push, pop = custom_additional_extensions.generate_device_preprocessor(
"ubb_boundary", defines=("RESTRICT",))
content = re.sub(r"#ifdef __GNUC__[\s\S]+?#endif(?=\n\n|\n//)", "", content) # nopep8
content = re.sub(r"#ifdef __CUDACC__[\s\S]+?#endif(?=\n\n|\n//)", push, content, 1) # nopep8
content = re.sub(r"#ifdef __CUDACC__[\s\S]+?#endif(?=\n\n|\n//)", pop, content, 1) # nopep8
assert push in content
assert pop in content
return content

for _, target_suffix in paramlist(parameters, ("CPU", "GPU")):
class_name = f"Dynamic_UBB_{precision_suffix}{target_suffix}"
lbmpy_walberla.generate_boundary(
ctx, f"Dynamic_UBB_{precision_suffix}{target_suffix}", ubb_dynamic,
method, additional_data_handler=ubb_data_handler,
ctx, class_name, ubb_dynamic, method,
additional_data_handler=ubb_data_handler,
streaming_pattern=streaming_pattern, target=target)

with open(f"Dynamic_UBB_{precision_suffix}{target_suffix}.h", "r+") as f:
content = f.read()
f.seek(0)
f.truncate(0)
# patch for floating point accuracy
content = content.replace("real_t",
config.data_type.default_factory().c_name)
f.write(content)
patch_file(class_name, get_ext_header(target_suffix),
target_suffix, patch_boundary_header)
patch_file(class_name, get_ext_source(target_suffix),
target_suffix, patch_boundary_kernel)
7 changes: 4 additions & 3 deletions maintainer/walberla_kernels/pystencils_espresso.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ def generate_pack_info_pdfs_field_assignments(fields, streaming_pattern):
return lbm_update_rule.all_assignments


def generate_pack_info_vector_field_specifications(config, stencil, layout):
def generate_pack_info_field_specifications(
config, stencil, layout, vec_len=3):
import collections
import itertools
field = ps.Field.create_generic(
Expand All @@ -248,7 +249,7 @@ def generate_pack_info_vector_field_specifications(config, stencil, layout):
data_type_np[config.data_type.default_factory().c_name],
index_dimensions=1,
layout=layout,
index_shape=(3,)
index_shape=(vec_len,)
)
q = len(stencil)
coord = itertools.product(*[(-1, 0, 1)] * 3)
Expand All @@ -257,7 +258,7 @@ def generate_pack_info_vector_field_specifications(config, stencil, layout):
else:
dirs = tuple((i, j, k) for i, j, k in coord)
spec = collections.defaultdict(set)
spec[dirs] = {field[0, 0, 0](i) for i in range(3)}
spec[dirs] = {field[0, 0, 0](i) for i in range(vec_len)}
return spec


Expand Down
Loading

0 comments on commit 999eb1b

Please sign in to comment.