Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Betsy to speed up BC6 compression #91535

Merged
merged 1 commit into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions core/io/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#include "image.h"

#include "core/config/project_settings.h"
#include "core/error/error_list.h"
#include "core/error/error_macros.h"
#include "core/io/image_loader.h"
Expand Down Expand Up @@ -2734,6 +2735,27 @@ Error Image::compress(CompressMode p_mode, CompressSource p_source, ASTCFormat p
Error Image::compress_from_channels(CompressMode p_mode, UsedChannels p_channels, ASTCFormat p_astc_format) {
ERR_FAIL_COND_V(data.is_empty(), ERR_INVALID_DATA);

// RenderingDevice only.
if (GLOBAL_GET("rendering/textures/vram_compression/compress_with_gpu")) {
switch (p_mode) {
case COMPRESS_BPTC: {
// BC7 is unsupported currently.
if ((format >= FORMAT_RF && format <= FORMAT_RGBE9995) && _image_compress_bptc_rd_func) {
Error result = _image_compress_bptc_rd_func(this, p_channels);

// If the image was compressed successfully, we return here. If not, we fall back to the default compression scheme.
if (result == OK) {
return OK;
}
}

} break;

default: {
}
}
}

switch (p_mode) {
case COMPRESS_S3TC: {
ERR_FAIL_NULL_V(_image_compress_bc_func, ERR_UNAVAILABLE);
Expand Down Expand Up @@ -3115,6 +3137,7 @@ void (*Image::_image_compress_bptc_func)(Image *, Image::UsedChannels) = nullptr
void (*Image::_image_compress_etc1_func)(Image *) = nullptr;
void (*Image::_image_compress_etc2_func)(Image *, Image::UsedChannels) = nullptr;
void (*Image::_image_compress_astc_func)(Image *, Image::ASTCFormat) = nullptr;
Error (*Image::_image_compress_bptc_rd_func)(Image *, Image::UsedChannels) = nullptr;
void (*Image::_image_decompress_bc)(Image *) = nullptr;
void (*Image::_image_decompress_bptc)(Image *) = nullptr;
void (*Image::_image_decompress_etc1)(Image *) = nullptr;
Expand Down
2 changes: 2 additions & 0 deletions core/io/image.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ class Image : public Resource {
static void (*_image_compress_etc2_func)(Image *, UsedChannels p_channels);
static void (*_image_compress_astc_func)(Image *, ASTCFormat p_format);

static Error (*_image_compress_bptc_rd_func)(Image *, UsedChannels p_channels);

static void (*_image_decompress_bc)(Image *);
static void (*_image_decompress_bptc)(Image *);
static void (*_image_decompress_etc1)(Image *);
Expand Down
5 changes: 5 additions & 0 deletions doc/classes/ProjectSettings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2875,6 +2875,11 @@
<member name="rendering/textures/lossless_compression/force_png" type="bool" setter="" getter="" default="false">
If [code]true[/code], the texture importer will import lossless textures using the PNG format. Otherwise, it will default to using WebP.
</member>
<member name="rendering/textures/vram_compression/compress_with_gpu" type="bool" setter="" getter="" default="true">
If [code]true[/code], the texture importer will utilize the GPU for compressing textures, which makes large textures import significantly faster.
[b]Note:[/b] This setting requires either Vulkan or D3D12 available as a rendering backend.
[b]Note:[/b] Currently this only affects BC6H compression, which is used on Desktop and Console for HDR images.
</member>
<member name="rendering/textures/vram_compression/import_etc2_astc" type="bool" setter="" getter="" default="false">
If [code]true[/code], the texture importer will import VRAM-compressed textures using the Ericsson Texture Compression 2 algorithm for lower quality textures and normal maps and Adaptable Scalable Texture Compression algorithm for high quality textures (in 4×4 block size).
[b]Note:[/b] This setting is an override. The texture importer will always import the format the host platform needs, even if this is set to [code]false[/code].
Expand Down
2 changes: 0 additions & 2 deletions editor/import/resource_importer_layered_texture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,6 @@ Error ResourceImporterLayeredTexture::import(const String &p_source_file, const
}

if (compress_mode == COMPRESS_VRAM_COMPRESSED) {
mipmaps = true;

//if using video ram, optimize
if (channel_pack == 0) {
//remove alpha if not needed, so compression is more efficient
Expand Down
76 changes: 76 additions & 0 deletions modules/betsy/CrossPlatformSettings_piece_all.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@

#define min3(a, b, c) min(a, min(b, c))
#define max3(a, b, c) max(a, max(b, c))

#define float2 vec2
#define float3 vec3
#define float4 vec4

#define int2 ivec2
#define int3 ivec3
#define int4 ivec4

#define uint2 uvec2
#define uint3 uvec3
#define uint4 uvec4

#define float2x2 mat2
#define float3x3 mat3
#define float4x4 mat4
#define ogre_float4x3 mat3x4

#define ushort uint
#define ushort3 uint3
#define ushort4 uint4

//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal
#define rshort int
#define rshort2 int2
#define rint int
//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal
#define wshort2 int2
#define wshort3 int3

#define toFloat3x3(x) mat3(x)
#define buildFloat3x3(row0, row1, row2) mat3(row0, row1, row2)

#define mul(x, y) ((x) * (y))
#define saturate(x) clamp((x), 0.0, 1.0)
#define lerp mix
#define rsqrt inversesqrt
#define INLINE
#define NO_INTERPOLATION_PREFIX flat
#define NO_INTERPOLATION_SUFFIX

#define PARAMS_ARG_DECL
#define PARAMS_ARG

#define reversebits bitfieldReverse

#define OGRE_Sample(tex, sampler, uv) texture(tex, uv)
#define OGRE_SampleLevel(tex, sampler, uv, lod) textureLod(tex, uv, lod)
#define OGRE_SampleArray2D(tex, sampler, uv, arrayIdx) texture(tex, vec3(uv, arrayIdx))
#define OGRE_SampleArray2DLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec3(uv, arrayIdx), lod)
#define OGRE_SampleArrayCubeLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec4(uv, arrayIdx), lod)
#define OGRE_SampleGrad(tex, sampler, uv, ddx, ddy) textureGrad(tex, uv, ddx, ddy)
#define OGRE_SampleArray2DGrad(tex, sampler, uv, arrayIdx, ddx, ddy) textureGrad(tex, vec3(uv, arrayIdx), ddx, ddy)
#define OGRE_ddx(val) dFdx(val)
#define OGRE_ddy(val) dFdy(val)
#define OGRE_Load2D(tex, iuv, lod) texelFetch(tex, iuv, lod)
#define OGRE_LoadArray2D(tex, iuv, arrayIdx, lod) texelFetch(tex, ivec3(iuv, arrayIdx), lod)
#define OGRE_Load2DMS(tex, iuv, subsample) texelFetch(tex, iuv, subsample)

#define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod)

#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0)
#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1)
#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2)

#define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x

#define OGRE_SAMPLER_ARG_DECL(samplerName)
#define OGRE_SAMPLER_ARG(samplerName)

#define OGRE_Texture3D_float4 sampler3D
#define OGRE_OUT_REF(declType, variableName) out declType variableName
#define OGRE_INOUT_REF(declType, variableName) inout declType variableName
24 changes: 24 additions & 0 deletions modules/betsy/SCsub
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# !/ usr / bin / env python
Import("env")
Import("env_modules")

env_betsy = env_modules.Clone()
env_betsy.GLSL_HEADER("bc6h.glsl")
env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"])

# Thirdparty source files
thirdparty_obj = []
thirdparty_dir = "#thirdparty/betsy/"
env_betsy.Prepend(CPPPATH=[thirdparty_dir])

env_thirdparty = env_betsy.Clone()
env_thirdparty.disable_warnings()
env.modules_sources += thirdparty_obj

# Godot source files
module_obj = []
env_betsy.add_source_files(module_obj, "*.cpp")
env.modules_sources += module_obj

# Needed to force rebuilding the module files when the thirdparty library is updated.
env.Depends(module_obj, thirdparty_obj)
17 changes: 17 additions & 0 deletions modules/betsy/UavCrossPlatform_piece_all.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

#define OGRE_imageLoad2D(inImage, iuv) imageLoad(inImage, int2(iuv))
#define OGRE_imageLoad2DArray(inImage, iuvw) imageLoad(inImage, int3(iuvw))

#define OGRE_imageWrite2D1(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0, 0))
#define OGRE_imageWrite2D2(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0))
#define OGRE_imageWrite2D4(outImage, iuv, value) imageStore(outImage, int2(iuv), value)

#define OGRE_imageLoad3D(inImage, iuv) imageLoad(inImage, int3(iuv))

#define OGRE_imageWrite3D1(outImage, iuv, value) imageStore(outImage, int3(iuv), value)
#define OGRE_imageWrite3D4(outImage, iuv, value) imageStore(outImage, int3(iuv), value)

#define OGRE_imageWrite2DArray1(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value)
#define OGRE_imageWrite2DArray4(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value)

//#define sharedOnlyBarrier memoryBarrierShared();barrier();
Loading
Loading