diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj index e57e4e9..4e3a0c5 100644 --- a/build2/kram.xcodeproj/project.pbxproj +++ b/build2/kram.xcodeproj/project.pbxproj @@ -19,8 +19,6 @@ 706EEF8926D1595D001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */; }; 706EEF8A26D1595D001C950E /* EtcBlock4x4Encoding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */; }; 706EEF8B26D1595D001C950E /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; }; - 706EEF8C26D1595D001C950E /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDCE26D1583E001C950E /* bc7decomp.cpp */; }; - 706EEF8D26D1595D001C950E /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD026D1583E001C950E /* bc7enc.cpp */; }; 706EEFA726D1595D001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; }; 706EEFA826D1595D001C950E /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; }; 706EEFA926D1595D001C950E /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; }; @@ -68,10 +66,6 @@ 706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */; }; 706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */; }; 706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */; }; - 706EEFE326D15984001C950E /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCB26D1583E001C950E /* rgbcx.h */; }; - 706EEFE426D15984001C950E /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCC26D1583E001C950E /* bc7enc.h */; }; - 706EEFE526D15984001C950E /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCD26D1583E001C950E /* bc7decomp.h */; }; - 706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD126D1583E001C950E /* rgbcx_table4.h */; }; 706EEFF226D15984001C950E /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; }; 706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; }; 706EEFF426D15984001C950E /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; }; @@ -134,10 +128,6 @@ 706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */; }; 706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */; }; 706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */; }; - 706EF15D26D166C5001C950E /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCB26D1583E001C950E /* rgbcx.h */; }; - 706EF15E26D166C5001C950E /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCC26D1583E001C950E /* bc7enc.h */; }; - 706EF15F26D166C5001C950E /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCD26D1583E001C950E /* bc7decomp.h */; }; - 706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD126D1583E001C950E /* rgbcx_table4.h */; }; 706EF16C26D166C5001C950E /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; }; 706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; }; 706EF16E26D166C5001C950E /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; }; @@ -193,8 +183,6 @@ 706EF1A126D166C5001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */; }; 706EF1A226D166C5001C950E /* EtcBlock4x4Encoding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */; }; 706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; }; - 706EF1A426D166C5001C950E /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDCE26D1583E001C950E /* bc7decomp.cpp */; }; - 706EF1A526D166C5001C950E /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD026D1583E001C950E /* bc7enc.cpp */; }; 706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; }; 706EF1C026D166C5001C950E /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; }; 706EF1C126D166C5001C950E /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; }; @@ -245,6 +233,36 @@ 706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; }; 706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; }; 706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; }; + 707789D52881BA81008A51BC /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789C62881BA81008A51BC /* bc7enc.cpp */; }; + 707789D62881BA81008A51BC /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789C62881BA81008A51BC /* bc7enc.cpp */; }; + 707789D72881BA81008A51BC /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C72881BA81008A51BC /* bc7enc.h */; }; + 707789D82881BA81008A51BC /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C72881BA81008A51BC /* bc7enc.h */; }; + 707789D92881BA81008A51BC /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C82881BA81008A51BC /* bc7decomp.h */; }; + 707789DA2881BA81008A51BC /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C82881BA81008A51BC /* bc7decomp.h */; }; + 707789DB2881BA81008A51BC /* ert.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C92881BA81008A51BC /* ert.h */; }; + 707789DC2881BA81008A51BC /* ert.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C92881BA81008A51BC /* ert.h */; }; + 707789DD2881BA81008A51BC /* rgbcx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CA2881BA81008A51BC /* rgbcx.cpp */; }; + 707789DE2881BA81008A51BC /* rgbcx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CA2881BA81008A51BC /* rgbcx.cpp */; }; + 707789DF2881BA81008A51BC /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CB2881BA81008A51BC /* rgbcx_table4.h */; }; + 707789E02881BA81008A51BC /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CB2881BA81008A51BC /* rgbcx_table4.h */; }; + 707789E12881BA81008A51BC /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CC2881BA81008A51BC /* utils.cpp */; }; + 707789E22881BA81008A51BC /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CC2881BA81008A51BC /* utils.cpp */; }; + 707789E32881BA81008A51BC /* rgbcx_table4_small.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */; }; + 707789E42881BA81008A51BC /* rgbcx_table4_small.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */; }; + 707789E52881BA81008A51BC /* ert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CE2881BA81008A51BC /* ert.cpp */; }; + 707789E62881BA81008A51BC /* ert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CE2881BA81008A51BC /* ert.cpp */; }; + 707789E72881BA81008A51BC /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CF2881BA81008A51BC /* rgbcx.h */; }; + 707789E82881BA81008A51BC /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CF2881BA81008A51BC /* rgbcx.h */; }; + 707789E92881BA81008A51BC /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D02881BA81008A51BC /* bc7decomp.cpp */; }; + 707789EA2881BA81008A51BC /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D02881BA81008A51BC /* bc7decomp.cpp */; }; + 707789EB2881BA81008A51BC /* utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789D22881BA81008A51BC /* utils.h */; }; + 707789EC2881BA81008A51BC /* utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789D22881BA81008A51BC /* utils.h */; }; + 707789ED2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */; }; + 707789EE2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */; }; + 707789F12881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */; }; + 707789F22881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */; }; + 707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */; }; + 707789F42881BCE2008A51BC /* rdo_bc_encoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */; }; 70871DC927DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; }; 70871DCA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; }; 70871DCB27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */; }; @@ -365,14 +383,6 @@ 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4.cpp; sourceTree = ""; }; 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcColorFloatRGBA.h; sourceTree = ""; }; 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcBlock4x4Encoding.h; sourceTree = ""; }; - 706EEDCA26D1583E001C950E /* LICENSE */ = {isa = PBXFileReference; lastKnownFileType = text; path = LICENSE; sourceTree = ""; }; - 706EEDCB26D1583E001C950E /* rgbcx.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = rgbcx.h; sourceTree = ""; }; - 706EEDCC26D1583E001C950E /* bc7enc.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bc7enc.h; sourceTree = ""; }; - 706EEDCD26D1583E001C950E /* bc7decomp.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bc7decomp.h; sourceTree = ""; }; - 706EEDCE26D1583E001C950E /* bc7decomp.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bc7decomp.cpp; sourceTree = ""; }; - 706EEDCF26D1583E001C950E /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; - 706EEDD026D1583E001C950E /* bc7enc.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bc7enc.cpp; sourceTree = ""; }; - 706EEDD126D1583E001C950E /* rgbcx_table4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = rgbcx_table4.h; sourceTree = ""; }; 706EEDF926D1583E001C950E /* ateencoder.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ateencoder.mm; sourceTree = ""; }; 706EEDFA26D1583E001C950E /* ateencoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ateencoder.h; sourceTree = ""; }; 706EEDFC26D1583E001C950E /* basisu_transcoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu_transcoder.h; sourceTree = ""; }; @@ -598,6 +608,23 @@ 706EFD5E26D3473F001C950E /* hashtable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hashtable.cpp; sourceTree = ""; }; 706EFD5F26D3473F001C950E /* red_black_tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = red_black_tree.cpp; sourceTree = ""; }; 706EFD6026D3473F001C950E /* fixed_pool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fixed_pool.cpp; sourceTree = ""; }; + 707789C62881BA81008A51BC /* bc7enc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc7enc.cpp; sourceTree = ""; }; + 707789C72881BA81008A51BC /* bc7enc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc7enc.h; sourceTree = ""; }; + 707789C82881BA81008A51BC /* bc7decomp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc7decomp.h; sourceTree = ""; }; + 707789C92881BA81008A51BC /* ert.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ert.h; sourceTree = ""; }; + 707789CA2881BA81008A51BC /* rgbcx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rgbcx.cpp; sourceTree = ""; }; + 707789CB2881BA81008A51BC /* rgbcx_table4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rgbcx_table4.h; sourceTree = ""; }; + 707789CC2881BA81008A51BC /* utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = utils.cpp; sourceTree = ""; }; + 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rgbcx_table4_small.h; sourceTree = ""; }; + 707789CE2881BA81008A51BC /* ert.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ert.cpp; sourceTree = ""; }; + 707789CF2881BA81008A51BC /* rgbcx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rgbcx.h; sourceTree = ""; }; + 707789D02881BA81008A51BC /* bc7decomp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc7decomp.cpp; sourceTree = ""; }; + 707789D12881BA81008A51BC /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; + 707789D22881BA81008A51BC /* utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utils.h; sourceTree = ""; }; + 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc7decomp_ref.cpp; sourceTree = ""; }; + 707789D42881BA81008A51BC /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = ""; }; + 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rdo_bc_encoder.cpp; sourceTree = ""; }; + 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rdo_bc_encoder.h; sourceTree = ""; }; 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_common_4.h; sourceTree = ""; }; 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_image.cpp; sourceTree = ""; }; 70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_find_best_partitioning.cpp; sourceTree = ""; }; @@ -744,14 +771,23 @@ 706EEDC926D1583E001C950E /* bc7enc */ = { isa = PBXGroup; children = ( - 706EEDCA26D1583E001C950E /* LICENSE */, - 706EEDCB26D1583E001C950E /* rgbcx.h */, - 706EEDCC26D1583E001C950E /* bc7enc.h */, - 706EEDCD26D1583E001C950E /* bc7decomp.h */, - 706EEDCE26D1583E001C950E /* bc7decomp.cpp */, - 706EEDCF26D1583E001C950E /* README.md */, - 706EEDD026D1583E001C950E /* bc7enc.cpp */, - 706EEDD126D1583E001C950E /* rgbcx_table4.h */, + 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */, + 707789D02881BA81008A51BC /* bc7decomp.cpp */, + 707789C82881BA81008A51BC /* bc7decomp.h */, + 707789C62881BA81008A51BC /* bc7enc.cpp */, + 707789C72881BA81008A51BC /* bc7enc.h */, + 707789CE2881BA81008A51BC /* ert.cpp */, + 707789C92881BA81008A51BC /* ert.h */, + 707789D42881BA81008A51BC /* LICENSE */, + 707789D12881BA81008A51BC /* README.md */, + 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */, + 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */, + 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */, + 707789CB2881BA81008A51BC /* rgbcx_table4.h */, + 707789CA2881BA81008A51BC /* rgbcx.cpp */, + 707789CF2881BA81008A51BC /* rgbcx.h */, + 707789CC2881BA81008A51BC /* utils.cpp */, + 707789D22881BA81008A51BC /* utils.h */, ); path = bc7enc; sourceTree = ""; @@ -1205,7 +1241,9 @@ 706EEFDA26D15984001C950E /* EtcBlock4x4EncodingBits.h in Headers */, 706EEFDB26D15984001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */, 706EEFDC26D15984001C950E /* EtcBlock4x4.h in Headers */, + 707789E72881BA81008A51BC /* rgbcx.h in Headers */, 706EEFDD26D15984001C950E /* Etc.h in Headers */, + 707789D72881BA81008A51BC /* bc7enc.h in Headers */, 706EEFDE26D15984001C950E /* EtcImage.h in Headers */, 70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */, 708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */, @@ -1213,10 +1251,6 @@ 706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */, 706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */, 706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */, - 706EEFE326D15984001C950E /* rgbcx.h in Headers */, - 706EEFE426D15984001C950E /* bc7enc.h in Headers */, - 706EEFE526D15984001C950E /* bc7decomp.h in Headers */, - 706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */, 706EEFF226D15984001C950E /* ateencoder.h in Headers */, 706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */, 70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */, @@ -1224,12 +1258,14 @@ 706EEFF426D15984001C950E /* basisu_containers.h in Headers */, 70871DD527DDDBCD00D0B9E1 /* astcenc.h in Headers */, 706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */, + 707789EB2881BA81008A51BC /* utils.h in Headers */, 706EEFF626D15985001C950E /* basisu_transcoder_internal.h in Headers */, 70871DF927DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */, 70871DFB27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */, 706EEFF726D15985001C950E /* basisu_global_selector_cb.h in Headers */, 706EEFF826D15985001C950E /* basisu_transcoder_uastc.h in Headers */, 706EEFF926D15985001C950E /* basisu_global_selector_palette.h in Headers */, + 707789E32881BA81008A51BC /* rgbcx_table4_small.h in Headers */, 706EEFFA26D15985001C950E /* basisu.h in Headers */, 706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */, 706EEFFC26D15985001C950E /* miniz.h in Headers */, @@ -1246,10 +1282,13 @@ 706EF00626D15985001C950E /* KramImage.h in Headers */, 706EF00726D15985001C950E /* win_mmap.h in Headers */, 70871DDD27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */, + 707789D92881BA81008A51BC /* bc7decomp.h in Headers */, 706EF00826D15985001C950E /* Kram.h in Headers */, 70871DED27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */, + 707789DB2881BA81008A51BC /* ert.h in Headers */, 706EF00926D15985001C950E /* KTXImage.h in Headers */, 706EF00A26D15985001C950E /* KramImageInfo.h in Headers */, + 707789DF2881BA81008A51BC /* rgbcx_table4.h in Headers */, 70871DF727DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */, 706EF00B26D15985001C950E /* KramTimer.h in Headers */, 706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */, @@ -1265,6 +1304,7 @@ 708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */, 706EF01526D15985001C950E /* singlecolourfit.h in Headers */, 706EF01626D15985001C950E /* maths.h in Headers */, + 707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */, 706EF01726D15985001C950E /* colourset.h in Headers */, 708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */, 706EF01826D15985001C950E /* colourblock.h in Headers */, @@ -1293,7 +1333,9 @@ 706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */, 706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */, 706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */, + 707789E82881BA81008A51BC /* rgbcx.h in Headers */, 706EF15726D166C5001C950E /* Etc.h in Headers */, + 707789D82881BA81008A51BC /* bc7enc.h in Headers */, 706EF15826D166C5001C950E /* EtcImage.h in Headers */, 70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */, 708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */, @@ -1301,10 +1343,6 @@ 706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */, 706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */, 706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */, - 706EF15D26D166C5001C950E /* rgbcx.h in Headers */, - 706EF15E26D166C5001C950E /* bc7enc.h in Headers */, - 706EF15F26D166C5001C950E /* bc7decomp.h in Headers */, - 706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */, 706EF16C26D166C5001C950E /* ateencoder.h in Headers */, 706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */, 70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */, @@ -1312,12 +1350,14 @@ 706EF16E26D166C5001C950E /* basisu_containers.h in Headers */, 70871DD627DDDBCD00D0B9E1 /* astcenc.h in Headers */, 706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */, + 707789EC2881BA81008A51BC /* utils.h in Headers */, 706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */, 70871DFA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */, 70871DFC27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */, 706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */, 706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */, 706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */, + 707789E42881BA81008A51BC /* rgbcx_table4_small.h in Headers */, 706EF17426D166C5001C950E /* basisu.h in Headers */, 706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */, 706EF17626D166C5001C950E /* miniz.h in Headers */, @@ -1334,10 +1374,13 @@ 706EF18026D166C5001C950E /* KramImage.h in Headers */, 706EF18126D166C5001C950E /* win_mmap.h in Headers */, 70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */, + 707789DA2881BA81008A51BC /* bc7decomp.h in Headers */, 706EF18226D166C5001C950E /* Kram.h in Headers */, 70871DEE27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */, + 707789DC2881BA81008A51BC /* ert.h in Headers */, 706EF18326D166C5001C950E /* KTXImage.h in Headers */, 706EF18426D166C5001C950E /* KramImageInfo.h in Headers */, + 707789E02881BA81008A51BC /* rgbcx_table4.h in Headers */, 70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */, 706EF18526D166C5001C950E /* KramTimer.h in Headers */, 706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */, @@ -1353,6 +1396,7 @@ 708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */, 706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */, 706EF19026D166C5001C950E /* maths.h in Headers */, + 707789F42881BCE2008A51BC /* rdo_bc_encoder.h in Headers */, 706EF19126D166C5001C950E /* colourset.h in Headers */, 708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */, 706EF19226D166C5001C950E /* colourblock.h in Headers */, @@ -1441,10 +1485,12 @@ buildActionMask = 2147483647; files = ( 70871DD727DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */, + 707789E52881BA81008A51BC /* ert.cpp in Sources */, 70871E0327DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */, 70871DD127DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */, 70871DDF27DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */, 706EF26426D17DCC001C950E /* ateencoder.mm in Sources */, + 707789ED2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */, 706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */, 70871DCD27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */, 70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */, @@ -1456,15 +1502,15 @@ 706EEF8426D1595D001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */, 706EEF8526D1595D001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */, 706EEF8726D1595D001C950E /* EtcIndividualTrys.cpp in Sources */, + 707789DD2881BA81008A51BC /* rgbcx.cpp in Sources */, 706EEF8826D1595D001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */, + 707789F12881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */, 70871DF527DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */, 706EEF8926D1595D001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */, 706EEF8A26D1595D001C950E /* EtcBlock4x4Encoding.cpp in Sources */, 706EEF8B26D1595D001C950E /* EtcBlock4x4.cpp in Sources */, 70871DDB27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */, - 706EEF8C26D1595D001C950E /* bc7decomp.cpp in Sources */, 70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */, - 706EEF8D26D1595D001C950E /* bc7enc.cpp in Sources */, 708A6A9A2708CE4700BA5410 /* bc6h_encode.cpp in Sources */, 70A7BD3027092A1200DBCCF7 /* hdr_encode.cpp in Sources */, 706EFF7726D34740001C950E /* string.cpp in Sources */, @@ -1476,7 +1522,9 @@ 70871DE327DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */, 70871E0727DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */, 70871E0527DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */, + 707789D52881BA81008A51BC /* bc7enc.cpp in Sources */, 706EFF7F26D34740001C950E /* intrusive_list.cpp in Sources */, + 707789E92881BA81008A51BC /* bc7decomp.cpp in Sources */, 706EEFA826D1595D001C950E /* miniz.cpp in Sources */, 70871DE527DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */, 706EEFA926D1595D001C950E /* hedistance.cpp in Sources */, @@ -1518,6 +1566,7 @@ 706EEFC226D1595E001C950E /* zstd.cpp in Sources */, 706EEFC326D1595E001C950E /* zstddeclib.cpp in Sources */, 706EEFC426D1595E001C950E /* lodepng.cpp in Sources */, + 707789E12881BA81008A51BC /* utils.cpp in Sources */, 706EEFC526D1595E001C950E /* tmpfileplus.cpp in Sources */, 70871E0127DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */, ); @@ -1528,10 +1577,12 @@ buildActionMask = 2147483647; files = ( 70871DD827DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */, + 707789E62881BA81008A51BC /* ert.cpp in Sources */, 70871E0427DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */, 70871DD227DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */, 70871DE027DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */, 706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */, + 707789EE2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */, 706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */, 70871DCE27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */, 70CDB65327A1382700A546C1 /* KramDDSHelper.cpp in Sources */, @@ -1543,15 +1594,15 @@ 706EF19D26D166C5001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */, 706EF19E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */, 706EF19F26D166C5001C950E /* EtcIndividualTrys.cpp in Sources */, + 707789DE2881BA81008A51BC /* rgbcx.cpp in Sources */, 706EF1A026D166C5001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */, + 707789F22881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */, 70871DF627DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */, 706EF1A126D166C5001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */, 706EF1A226D166C5001C950E /* EtcBlock4x4Encoding.cpp in Sources */, 706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */, 70871DDC27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */, - 706EF1A426D166C5001C950E /* bc7decomp.cpp in Sources */, 70871DE227DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */, - 706EF1A526D166C5001C950E /* bc7enc.cpp in Sources */, 708A6A9B2708CE4700BA5410 /* bc6h_encode.cpp in Sources */, 70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */, 706EFF7826D34740001C950E /* string.cpp in Sources */, @@ -1563,7 +1614,9 @@ 70871DE427DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */, 70871E0827DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */, 70871E0627DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */, + 707789D62881BA81008A51BC /* bc7enc.cpp in Sources */, 706EFF8026D34740001C950E /* intrusive_list.cpp in Sources */, + 707789EA2881BA81008A51BC /* bc7decomp.cpp in Sources */, 706EF1C026D166C5001C950E /* miniz.cpp in Sources */, 70871DE627DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */, 706EF1C126D166C5001C950E /* hedistance.cpp in Sources */, @@ -1605,6 +1658,7 @@ 706EF1D826D166C5001C950E /* zstd.cpp in Sources */, 706EF1D926D166C5001C950E /* zstddeclib.cpp in Sources */, 706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */, + 707789E22881BA81008A51BC /* utils.cpp in Sources */, 706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */, 70871E0227DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */, ); diff --git a/libkram/bc7enc/LICENSE b/libkram/bc7enc/LICENSE index 71e10da..b3b1f69 100644 --- a/libkram/bc7enc/LICENSE +++ b/libkram/bc7enc/LICENSE @@ -1,10 +1,12 @@ -The following source code files are available under 2 licenses -- choose whichever you prefer: -rgbcx.h -bc7decomp.cpp/h -bc7enc.c +If you use this software in a product, attribution / credits is requested but not required. + +bc7e.ispc uses the Apache 2.0 license and is Copyright (C) 2018-2021 Binomial LLC. +LodePNG is Copyright (c) 2005-2016 Lode Vandevenne. See LodePNG.cpp for its license. + +All other source code files in this repo are available under 2 licenses -- choose whichever you prefer. ALTERNATIVE A - MIT License -Copyright(c) 2020 Richard Geldreich, Jr. +Copyright(c) 2020-2021 Richard Geldreich, Jr. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files(the "Software"), to deal in the Software without restriction, including without limitation the rights to diff --git a/libkram/bc7enc/README.md b/libkram/bc7enc/README.md index 8f762b2..b86777f 100644 --- a/libkram/bc7enc/README.md +++ b/libkram/bc7enc/README.md @@ -1,146 +1,163 @@ -bc7enc - Fast, single source file BC1-5 and BC7/BPTC GPU texture encoders. +bc7enc - Fast BC1-7 GPU texture encoders with Rate Distortion Optimization (RDO) -Features: -- BC1/3 encoder (in [rgbcx.h](https://github.com/richgel999/bc7enc/blob/master/rgbcx.h)) uses a new algorithm (which we've named "prioritized cluster fit") which is 3-4x faster than traditional cluster fit (as implemented in [libsquish](https://github.com/svn2github/libsquish) with SSE2) at the same or slightly higher average quality using scalar CPU instructions. This algorithm is suitable for GPU encoder implementations. +This repo contains fast texture encoders for BC1-7. All formats support a simple post-processing transform on the encoded texture data designed to trade off quality for smaller compressed file sizes using LZ compression. Significant (10-50%) size reductions are possible. The BC7 encoder also supports a "reduced entropy" mode using the -e option which causes the output to be biased/weighted in various ways which minimally impact quality, which results in 5-10% smaller file sizes with no slowdowns in encoding time. -The BC1/BC3 encoder also implements [Castano's optimal endpoint rounding improvement](https://gist.github.com/castano/c92c7626f288f9e99e158520b14a61cf). +Currently, the entropy reduction transform is tuned for Deflate, LZHAM, or LZMA. The method used to control the rate-distortion tradeoff is the classic Lagrangian multiplier RDO method, modified to favor MSE on very smooth blocks. Rate is approximated using a fixed Deflate model. The post-processing transform applied to the encoded texture data tries to introduce the longest match it can into every encoded output block. It also tries to continue matches between blocks and (specifically for codecs like LZHAM/LZMA/Zstd) it tries to utilize REP0 (repeat) matches. -rgbcx's BC1 encoder is faster than both AMD Compressonator and libsquish at the same average quality. +You can see examples of the RDO BC7 encoder's current output [here](https://richg42.blogspot.com/2021/02/more-rdo-bc7-encoding.html). Some examples on how to use the command line tool are on my blog, [here](https://richg42.blogspot.com/2021/02/how-to-use-bc7encrdo.html). -- BC7 encoder (in bc7enc.c/.h) has perceptual colorspace metric support, and is very fast compared to ispc_texcomp (see below) for RGB textures. Important: The BC7 encoder included in this repo is still a work in progress. I took bc7enc16 and added more modes for better alpha support, but it needs more testing and development. +This repo contains both [bc7e.ispc](https://github.com/BinomialLLC/bc7e) and its distantly related but weaker 4 mode only non-ispc variant, bc7enc.cpp. By default, if you set SUPPORT_BC7E=TRUE when running cmake, you get bc7e.ispc, otherwise you get bc7enc.cpp. (The -C option forces bc7enc.cpp.) bc7e supports all BC7 modes and features, but doesn't yet support reduced entropy BC7 encoding. bc7enc.cpp supports optional reduced entropy encoding (using -e with the command line tool). RDO BC7 is supported when using either encoder, however. -- Full decoders for BC1-5/7. BC7 decoder is in bc7decomp.cpp/.h, BC1-5 decoders in rgbcx.h. +The next major focus will be improving the default smooth block handling and improving rate distorton performance. -This project is basically a demo of some of the techniques we use in Basis BC7, -which is Binomial's state of the art vectorized BC7 encoder. Basis BC7 is the -highest quality and fastest CPU BC7 encoder available (2-3x faster than -ispc_texcomp). It supports all modes and linear/perceptual colorspace metrics. -Licensees get full ISPC source code so they can customize the codec as needed. +This repo was originally derived from [bc7enc](https://github.com/richgel999/bc7enc) and [bc7e](https://github.com/BinomialLLC/bc7e). Note this repo contains the latest version of bc7e.ispc, which has a determinism bug fix. -bc7enc currently only supports modes 1 and 6 for RGB, and modes 1, 5, 6, and 7 for alpha. The plan is to add all the modes. See the [bc7enc16](https://github.com/richgel999/bc7enc16) project for the previous version (which only supports modes 1 and 6). Note this readme still refers to "bc7enc16", but bc7enc is the same encoder but with more alpha modes. +**Note: If you use this software in a product, attribution / credits is requested but not required. Thanks!** -This codec supports a perceptual mode when encoding BC7, where it computes colorspace error in -weighted YCbCr space (like etc2comp), and it also supports weighted RGBA -metrics. It's particular strong in perceptual mode, beating the current state of -the art CPU encoder (Intel's ispc_texcomp) by a wide margin when measured by -Luma PSNR, even though it only supports 2 modes and isn't vectorized. +### Compiling -Why only modes 1 and 6 for opaque BC7? -Because with these two modes you have a complete encoder that supports both -opaque and transparent textures in a small amount (~1400 lines) of -understandable plain C code. Mode 6 excels on smooth blocks, and mode 1 is -strong with complex blocks, and a strong encoder that combines both modes can be -quite high quality. Fast mode 6-only encoders will have noticeable block -artifacts which this codec avoids by fully supporting mode 1. +This build has been tested with MSVC 2019 x64 and clang 6.0.0 under Ubuntu v18.04. -Modes 1 and 6 are typically the most used modes on many textures using other -encoders. Mode 1 has two subsets, 64 possible partitions, and 3-bit indices, -while mode 6 has large 4-bit indices and high precision 7777.1 endpoints. This -codec produces output that is far higher quality than any BC1 encoder, and -approaches (or in perceptual mode exceeds!) the quality of other full BC7 -encoders. +To compile with bc7e.ispc (on Linux this requires [Intel's ISPC compiler](https://ispc.github.io/downloads.html) to be in your path - recommended): -Why is bc7enc16 so fast in perceptual mode? -Computing error in YCbCr space is more expensive than in RGB space, yet bc7enc16 -in perceptual mode is stronger than ispc_texcomp (see the benchmark below) - -even without SSE/AVX vectorization and with only 2 modes to work with! +``` +cmake -D SUPPORT_BC7E=TRUE . +make +``` + +To compile without BC7E: + +``` +cmake . +make +``` -Most BC7 encoders only support linear RGB colorspace metrics, which is a -fundamental weakness. Some support weighted RGB metrics, which is better. With -linear RGB metrics, encoding error is roughly balanced between each channel, and -encoders have to work *very* hard (examining large amounts of RGB search space) -to get overall quality up. With perceptual colorspace metrics, RGB error tends -to become a bit unbalanced, with green quality favored more highly than red and -blue, and blue quality favored the least. A perceptual encoder is tuned to -prefer exploring solutions along the luma axis, where it's much less work to find -solutions with less luma error. bc7enc16 is, as far as I know, the first BC7 -codec to support computing error in weighted YCbCr colorspace. +Note the MSVC and Linux builds enable OpenMP for faster compression. -Note: Most of the timings here (except for the ispc_texcomp "fast" mode timings at the very bottom) -are for the *original* release, before I added several more optimizations. The latest version of -bc7enc16.c is around 8-27% faster than the initial release at same quality (when mode 1 is enabled - -there's no change with just mode 6). +### Examples -Some benchmarks across 31 images (kodim corpus+others): +The [.DDS](https://docs.microsoft.com/en-us/windows/win32/direct3ddds/dx-graphics-dds-pguide) output files can be loaded/viewed using tools like [AMD Compressonator](https://gpuopen.com/compressonator/). -Perceptual (average REC709 Luma PSNR - higher is better quality): +To encode to non-RDO BC7 using BC7E, highest quality, linear RGB(A) metrics: + +``` +./bc7enc blah.png ``` -iscp_texcomp slow vs. bc7enc16 uber4/max_partitions 64 -iscp_texcomp: 355.4 secs 48.6 dB -bc7enc16: 122.6 secs 50.0 dB -iscp_texcomp slow vs. bc7enc16 uber0/max_partitions 64 -iscp_texcomp: 355.4 secs 48.6 dB -bc7enc16: 38.3 secs 49.6 dB +To encode to non-RDO BC7 using BC7E, highest quality, using perceptual (scaled YCbCr) colorspace error metrics: -iscp_texcomp basic vs. bc7enc16 uber0/max_partitions 16 -ispc_texcomp: 100.2 secs 48.3 dB -bc7enc16: 20.8 secs 49.3 dB +``` +./bc7enc blah.png -s +``` -iscp_texcomp fast vs. bc7enc16 uber0/max_partitions 16 -iscp_texcomp: 41.5 secs 48.0 dB -bc7enc16: 20.8 secs 49.3 dB +To encode to RDO BC7 using BC7E, highest quality, lambda=.5, linear metrics (perceptual colorspace metrics are always automatically disabled when -z is specified), with a balance of encoding performance vs. RDO efficiency: -iscp_texcomp ultrafast vs. bc7enc16 uber0/max_partitions 0 -iscp_texcomp: 1.9 secs 46.2 dB -bc7enc16: 8.9 secs 48.4 dB +``` +./bc7enc blah.png -z.5 +``` -Non-perceptual (average RGB PSNR): +To encode to RDO BC7 using BC7E, lower baseline quality (-u4) for faster encoding, lambda=.5, and with faster encoding (only inject one match vs two, with a tiny RDO lookback window size of 16 bytes): -iscp_texcomp slow vs. bc7enc16 uber4/max_partitions 64 -iscp_texcomp: 355.4 secs 46.8 dB -bc7enc16: 51 secs 46.1 dB +``` +./bc7enc blah.png -u4 -z.5 -ze -zc16 +``` -iscp_texcomp slow vs. bc7enc16 uber0/max_partitions 64 -iscp_texcomp: 355.4 secs 46.8 dB -bc7enc16: 29.3 secs 45.8 dB +To encode to non-RDO BC7 using entropy reduced or quantized/weighted BC7 (no slowdown vs. non-RDO bc7enc.cpp for BC7, slightly reduced quality, but 5-10% better LZ compression, only uses 2 or 4 BC7 modes): -iscp_texcomp basic vs. bc7enc16 uber4/max_partitions 64 -iscp_texcomp: 99.9 secs 46.5 dB -bc7enc16: 51 secs 46.1 dB +``` +./bc7enc blah.png -C -e +``` -iscp_texcomp fast vs. bc7enc16 uber1/max_partitions 16 -ispc_texcomp: 41.5 secs 46.1 dB -bc7enc16: 19.8 secs 45.5 dB +To encode to RDO BC7 using the entropy reduction transform combined with reduced entropy BC7 encoding, with a slightly larger window size than the default which is 128 bytes: -iscp_texcomp fast vs. bc7enc16 uber0/max_partitions 8 -ispc_texcomp: 41.5 secs 46.1 dB -bc7enc16: 10.46 secs 44.4 dB +``` +./bc7enc -zc256 blah.png -C -e -z1.0 +``` -iscp_texcomp ultrafast vs. bc7enc16 uber0/max_partitions 0 -ispc_texcomp: 1.9 secs 42.7 dB -bc7enc16: 3.8 secs 42.7 dB +Same as before, but higher compression (allow 2 matches per block instead of 1): -DirectXTex CPU in "mode 6 only" mode vs. bc7enc16 uber1/max_partions 0 (mode 6 only), non-perceptual: - -DirectXTex: 466.4 secs 41.9 dB -bc7enc16: 6.7 secs 42.8 dB +``` +./bc7enc -zc256 blah.png -C -e -z1.0 -zn +``` -DirectXTex CPU in (default - no 3 subset modes) vs. bc7enc16 uber1/max_partions 64, non-perceptual: +Same, except disable ultra-smooth block handling: -DirectXTex: 9485.1 secs 45.6 dB -bc7enc16: 36 secs 46.0 dB -``` -(Note this version of DirectXTex has a key pbit bugfix which I've submitted but -is still waiting to be accepted. Non-bugfixed versions will be slightly lower -quality.) +``` +./bc7enc -zc256 blah.png -C -e -z1.0 -zu +``` -UPDATE: To illustrate how strong the mode 1+6 implementation is in bc7enc16, let's compare ispc_texcomp -fast vs. the latest version of bc7enc16 uber4/max_partitions 64: +To encode to RDO BC7 using the entropy reduction transform at lower quality, combined with reduced entropy BC7 encoding, with a slightly larger window size than the default which is 128 bytes: -Without filterbank optimizations: ``` - Time RGB PSNR Y PSNR -ispc_texcomp: 41.45 secs 46.09 dB 48.0 dB -bc7enc16: 41.42 secs 46.03 dB 48.2 dB +./bc7enc -zc256 blah.png -C -e -z2.0 +``` + +To encode to RDO BC7 using the entropy reduction transform at higher effectivenes using a larger window size, without using reduced entropy BC7 encoding: -With filterbank optimizations enabled: -bc7enc16: 38.78 secs 45.94 dB 48.12 dB ``` -They both have virtually the same average RGB PSNR with these settings (.06 dB is basically noise), but -bc7enc16 is just as fast as ispc_texcomp fast, even though it's not vectorized. Interestingly, our Y PSNR is better, -although bc7enc16 wasn't using perceptual metrics in these benchmarks. +./bc7enc -zc1024 blah.png -z1.0 +``` + +To encode to RDO BC7 using the entropy reduction transform at higher effectivenes using a larger window size, with a manually specified max smooth block max error scale: + +``` +./bc7enc -zc1024 blah.png -z2.0 -zb30.0 +``` + +To encode to RDO BC7 using the entropy reduction transform at higher effectivenes using a larger window size, using only mode 6 (more block artifacts, but better rate-distortion performance as measured by PSNR): + +``` +./bc7enc -zc1024 blah.png -6 -z1.0 -e +``` + +To encode to BC1: +``` +./bc7enc -1 blah.png +``` + +To encode to BC1 with Rate Distortion Optimization (RDO) at lambda=1.0: +``` +./bc7enc -1 -z1.0 blah.png +``` + +The -z option controls lambda, or the rate vs. distortion tradeoff. 0 = maximum quality, higher values=lower bitrates but lower quality. Try values [.25-8]. + +To encode to BC1 with RDO, with RDO debug output, to monitor the percentage of blocks impacted: +``` +./bc7enc -1 -z1.0 -zd blah.png +``` + +To encode to BC1 with RDO with a higher then default smooth block scale factor: +``` +./bc7enc -1 -z1.0 -zb40.0 blah.png +``` + +Use -zb1.0 to disable smooth block error scaling completely, which increases RDO performance but can result in noticeable artifacts on smooth/flat blocks at higher lambdas. + +Use -zc# to control the RDO window size in bytes. Good values to try are 16-8192. +Use -zt to disable RDO multithreading. + +To encode to BC1 with RDO at the highest achievable quality/effectiveness (this is extremely slow): + +``` +./bc7enc -1 -z1.0 -zc32768 blah.png +``` + +This sets the window size to 32KB (the highest setting that makes sense for Deflate). Window sizes of 2KB (the default) to 8KB are way faster and in practice are almost as effective. The maximum window size setting supported by the command line tool is 64KB, but this would be very slow. + +For even higher quality per bit (this is incredibly slow): +``` +./bc7enc -1 -z1.0 -zc32768 -zm blah.png +``` + +### Dependencies +There are no 3rd party code or library dependencies. utils.cpp/.h is only needed by the example command line tool. It uses C++11. The individual .cpp files are designed to be easily dropped into other codebases. + +For RDO post-processing of any block-based format: ert.cpp/.h. You provide this function an array of encoded blocks, an array of source/original 32bpp blocks, some parameters, and a pointer to a block decoder function for your format as a callback. It must return false if the passed in block data is invalid. (Make sure you *really* validate the block's data, because the ERT post-processor will inevitably call your callback with invalid blocks.) This transform works on most other texture formats, such as ETC1/2, EAC, and ASTC. The ERT works on block sizes ranging from 1x1 to 12x12. This file has no other dependencies apart from utils.cpp/h. + +For BC1-5 encoding/decoding: rgbcx.cpp/.h + +For BC7 encoding: bc7enc.cpp/.h + +For BC7 decoding: bc7decomp.cpp/.h -This was a multithreaded benchmark (using OpenMP) on a dual Xeon workstation. -ispc_texcomp was called with 64-blocks at a time and used AVX instructions. -Timings are for encoding only. diff --git a/libkram/bc7enc/bc7decomp.cpp b/libkram/bc7enc/bc7decomp.cpp index 3099ec4..cf1574a 100644 --- a/libkram/bc7enc/bc7decomp.cpp +++ b/libkram/bc7enc/bc7decomp.cpp @@ -1,9 +1,33 @@ // File: bc7decomp.c - Richard Geldreich, Jr. 3/31/2020 - MIT license or public domain (see end of file) #include "bc7decomp.h" +#include -namespace bc7decomp +#if (defined(_M_AMD64) || defined(_M_X64) || defined(__SSE2__)) +# define BC7DECOMP_USE_SSE2 +#endif + +#ifdef BC7DECOMP_USE_SSE2 +#include +#include +#endif + +namespace bc7decomp { +#ifdef BC7DECOMP_USE_SSE2 + const __m128i g_bc7_weights4_sse2[8] = + { + _mm_set_epi16(4, 4, 4, 4, 0, 0, 0, 0), + _mm_set_epi16(13, 13, 13, 13, 9, 9, 9, 9), + _mm_set_epi16(21, 21, 21, 21, 17, 17, 17, 17), + _mm_set_epi16(30, 30, 30, 30, 26, 26, 26, 26), + _mm_set_epi16(38, 38, 38, 38, 34, 34, 34, 34), + _mm_set_epi16(47, 47, 47, 47, 43, 43, 43, 43), + _mm_set_epi16(55, 55, 55, 55, 51, 51, 51, 51), + _mm_set_epi16(64, 64, 64, 64, 60, 60, 60, 60), + }; +#endif + const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 }; const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; @@ -44,27 +68,32 @@ const uint8_t g_bc7_table_anchor_index_third_subset_2[64] = 15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8 }; -inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) +const uint8_t g_bc7_first_byte_to_mode[256] = { - assert(codesize <= 32); - uint32_t bits = 0; - uint32_t total_bits = 0; - - while (total_bits < codesize) - { - uint32_t byte_bit_offset = bit_offset & 7; - uint32_t bits_to_read = std::min(codesize - total_bits, 8 - byte_bit_offset); - - uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; - byte_bits &= ((1 << bits_to_read) - 1); - - bits |= (byte_bits << total_bits); + 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +}; - total_bits += bits_to_read; - bit_offset += bits_to_read; - } +inline void insert_weight_zero(uint64_t& index_bits, uint32_t bits_per_index, uint32_t offset) +{ + uint64_t LOW_BIT_MASK = (static_cast(1) << ((bits_per_index * (offset + 1)) - 1)) - 1; + uint64_t HIGH_BIT_MASK = ~LOW_BIT_MASK; - return bits; + index_bits = ((index_bits & HIGH_BIT_MASK) << 1) | (index_bits & LOW_BIT_MASK); } // BC7 mode 0-7 decompression. @@ -89,51 +118,142 @@ static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t b } return 0; } - -bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + + +#ifdef BC7DECOMP_USE_SSE2 +static inline __m128i bc7_interp_sse2(__m128i l, __m128i h, __m128i w, __m128i iw) +{ + return _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(l, iw), _mm_mullo_epi16(h, w)), _mm_set1_epi16(32)), 6); +} + +static inline void bc7_interp2_sse2(const color_rgba* endpoint_pair, color_rgba* out_colors) +{ + __m128i endpoints = _mm_loadu_si64(endpoint_pair); + __m128i endpoints_16 = _mm_unpacklo_epi8(endpoints, _mm_setzero_si128()); + + __m128i endpoints_16_swapped = _mm_shuffle_epi32(endpoints_16, _MM_SHUFFLE(1, 0, 3, 2)); + + // Interpolated colors will be color 1 and 2 + __m128i interpolated_colors = bc7_interp_sse2(endpoints_16, endpoints_16_swapped, _mm_set1_epi16(21), _mm_set1_epi16(43)); + + // all_colors will be 1, 2, 0, 3 + __m128i all_colors = _mm_packus_epi16(interpolated_colors, endpoints_16); + + all_colors = _mm_shuffle_epi32(all_colors, _MM_SHUFFLE(3, 1, 0, 2)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors), all_colors); +} + +static inline void bc7_interp3_sse2(const color_rgba* endpoint_pair, color_rgba* out_colors) +{ + __m128i endpoints = _mm_loadu_si64(endpoint_pair); + __m128i endpoints_16bit = _mm_unpacklo_epi8(endpoints, _mm_setzero_si128()); + __m128i endpoints_16bit_swapped = _mm_shuffle_epi32(endpoints_16bit, _MM_SHUFFLE(1, 0, 3, 2)); + + __m128i interpolated_16 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set1_epi16(9), _mm_set1_epi16(55)); + __m128i interpolated_23 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set_epi16(37, 37, 37, 37, 18, 18, 18, 18), _mm_set_epi16(27, 27, 27, 27, 46, 46, 46, 46)); + __m128i interpolated_45 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set_epi16(18, 18, 18, 18, 37, 37, 37, 37), _mm_set_epi16(46, 46, 46, 46, 27, 27, 27, 27)); + + __m128i interpolated_01 = _mm_unpacklo_epi64(endpoints_16bit, interpolated_16); + __m128i interpolated_67 = _mm_unpackhi_epi64(interpolated_16, endpoints_16bit); + + __m128i all_colors_0 = _mm_packus_epi16(interpolated_01, interpolated_23); + __m128i all_colors_1 = _mm_packus_epi16(interpolated_45, interpolated_67); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors), all_colors_0); + _mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors + 4), all_colors_1); +} +#endif + +bool unpack_bc7_mode0_2(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels) { //const uint32_t SUBSETS = 3; const uint32_t ENDPOINTS = 6; const uint32_t COMPS = 3; const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2; + const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1; const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; + const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1; const uint32_t PBITS = (mode == 0) ? 6 : 0; const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; - - uint32_t bit_offset = 0; - const uint8_t* pBuf = static_cast(pBlock_bits); + const uint32_t PART_BITS = (mode == 0) ? 4 : 6; + const uint32_t PART_MASK = (1 << PART_BITS) - 1; + + const uint64_t low_chunk = data_chunks[0]; + const uint64_t high_chunk = data_chunks[1]; - if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + const uint32_t part = (low_chunk >> (mode + 1)) & PART_MASK; - const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6); + uint64_t channel_read_chunks[3] = { 0, 0, 0 }; + + if (mode == 0) + { + channel_read_chunks[0] = low_chunk >> 5; + channel_read_chunks[1] = low_chunk >> 29; + channel_read_chunks[2] = ((low_chunk >> 53) | (high_chunk << 11)); + } + else + { + channel_read_chunks[0] = low_chunk >> 9; + channel_read_chunks[1] = ((low_chunk >> 39) | (high_chunk << 25)); + channel_read_chunks[2] = high_chunk >> 5; + } color_rgba endpoints[ENDPOINTS]; for (uint32_t c = 0; c < COMPS; c++) + { + uint64_t channel_read_chunk = channel_read_chunks[c]; for (uint32_t e = 0; e < ENDPOINTS; e++) - endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + { + endpoints[e][c] = static_cast(channel_read_chunk & ENDPOINT_MASK); + channel_read_chunk >>= ENDPOINT_BITS; + } + } uint32_t pbits[6]; - for (uint32_t p = 0; p < PBITS; p++) - pbits[p] = read_bits32(pBuf, bit_offset, 1); + if (mode == 0) + { + uint8_t p_bits_chunk = static_cast((high_chunk >> 13) & 0xff); + + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = (p_bits_chunk >> p) & 1; + } + + uint64_t weights_read_chunk = high_chunk >> (67 - 16 * WEIGHT_BITS); + insert_weight_zero(weights_read_chunk, WEIGHT_BITS, 0); + insert_weight_zero(weights_read_chunk, WEIGHT_BITS, std::min(g_bc7_table_anchor_index_third_subset_1[part], g_bc7_table_anchor_index_third_subset_2[part])); + insert_weight_zero(weights_read_chunk, WEIGHT_BITS, std::max(g_bc7_table_anchor_index_third_subset_1[part], g_bc7_table_anchor_index_third_subset_2[part])); uint32_t weights[16]; for (uint32_t i = 0; i < 16; i++) - weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_third_subset_1[part]) || (i == g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); - - assert(bit_offset == 128); + { + weights[i] = static_cast(weights_read_chunk & WEIGHT_MASK); + weights_read_chunk >>= WEIGHT_BITS; + } for (uint32_t e = 0; e < ENDPOINTS; e++) for (uint32_t c = 0; c < 4; c++) - endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); + endpoints[e][c] = static_cast((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); color_rgba block_colors[3][8]; + +#ifdef BC7DECOMP_USE_SSE2 + for (uint32_t s = 0; s < 3; s++) + { + if (WEIGHT_BITS == 2) + bc7_interp2_sse2(endpoints + s * 2, block_colors[s]); + else + bc7_interp3_sse2(endpoints + s * 2, block_colors[s]); + } +#else for (uint32_t s = 0; s < 3; s++) for (uint32_t i = 0; i < WEIGHT_VALS; i++) { for (uint32_t c = 0; c < 3; c++) - block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][c] = static_cast(bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS)); block_colors[s][i][3] = 255; } +#endif for (uint32_t i = 0; i < 16; i++) pPixels[i] = block_colors[g_bc7_partition3[part * 16 + i]][weights[i]]; @@ -141,51 +261,102 @@ bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPix return true; } -bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) +bool unpack_bc7_mode1_3_7(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels) { //const uint32_t SUBSETS = 2; const uint32_t ENDPOINTS = 4; const uint32_t COMPS = (mode == 7) ? 4 : 3; const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2; + const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1; const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7); + const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1; const uint32_t PBITS = (mode == 1) ? 2 : 4; const uint32_t SHARED_PBITS = (mode == 1) ? true : false; const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; - - uint32_t bit_offset = 0; - const uint8_t* pBuf = static_cast(pBlock_bits); - if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + const uint64_t low_chunk = data_chunks[0]; + const uint64_t high_chunk = data_chunks[1]; - const uint32_t part = read_bits32(pBuf, bit_offset, 6); + const uint32_t part = ((low_chunk >> (mode + 1)) & 0x3f); color_rgba endpoints[ENDPOINTS]; + + uint64_t channel_read_chunks[4] = { 0, 0, 0, 0 }; + uint64_t p_read_chunk = 0; + channel_read_chunks[0] = (low_chunk >> (mode + 7)); + uint64_t weight_read_chunk; + + switch (mode) + { + case 1: + channel_read_chunks[1] = (low_chunk >> 32); + channel_read_chunks[2] = ((low_chunk >> 56) | (high_chunk << 8)); + p_read_chunk = high_chunk >> 16; + weight_read_chunk = high_chunk >> 18; + break; + case 3: + channel_read_chunks[1] = ((low_chunk >> 38) | (high_chunk << 26)); + channel_read_chunks[2] = high_chunk >> 2; + p_read_chunk = high_chunk >> 30; + weight_read_chunk = high_chunk >> 34; + break; + case 7: + channel_read_chunks[1] = low_chunk >> 34; + channel_read_chunks[2] = ((low_chunk >> 54) | (high_chunk << 10)); + channel_read_chunks[3] = high_chunk >> 10; + p_read_chunk = (high_chunk >> 30); + weight_read_chunk = (high_chunk >> 34); + break; + default: + return false; + }; + for (uint32_t c = 0; c < COMPS; c++) + { + uint64_t channel_read_chunk = channel_read_chunks[c]; for (uint32_t e = 0; e < ENDPOINTS; e++) - endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + { + endpoints[e][c] = static_cast(channel_read_chunk & ENDPOINT_MASK); + channel_read_chunk >>= ENDPOINT_BITS; + } + } uint32_t pbits[4]; for (uint32_t p = 0; p < PBITS; p++) - pbits[p] = read_bits32(pBuf, bit_offset, 1); - + pbits[p] = (p_read_chunk >> p) & 1; + + insert_weight_zero(weight_read_chunk, WEIGHT_BITS, 0); + insert_weight_zero(weight_read_chunk, WEIGHT_BITS, g_bc7_table_anchor_index_second_subset[part]); + uint32_t weights[16]; for (uint32_t i = 0; i < 16; i++) - weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); - - assert(bit_offset == 128); + { + weights[i] = static_cast(weight_read_chunk & WEIGHT_MASK); + weight_read_chunk >>= WEIGHT_BITS; + } for (uint32_t e = 0; e < ENDPOINTS; e++) for (uint32_t c = 0; c < 4; c++) - endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); + endpoints[e][c] = static_cast((mode != 7U && c == 3U) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); color_rgba block_colors[2][8]; +#ifdef BC7DECOMP_USE_SSE2 + for (uint32_t s = 0; s < 2; s++) + { + if (WEIGHT_BITS == 2) + bc7_interp2_sse2(endpoints + s * 2, block_colors[s]); + else + bc7_interp3_sse2(endpoints + s * 2, block_colors[s]); + } +#else for (uint32_t s = 0; s < 2; s++) for (uint32_t i = 0; i < WEIGHT_VALS; i++) { for (uint32_t c = 0; c < COMPS; c++) - block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][c] = static_cast(bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS)); block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3]; } +#endif for (uint32_t i = 0; i < 16; i++) pPixels[i] = block_colors[g_bc7_partition2[part * 16 + i]][weights[i]]; @@ -193,53 +364,101 @@ bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pP return true; } -bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) +bool unpack_bc7_mode4_5(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels) { const uint32_t ENDPOINTS = 2; - const uint32_t COMPS = 4; + //const uint32_t COMPS = 4; const uint32_t WEIGHT_BITS = 2; + const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1; const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2; + const uint32_t A_WEIGHT_MASK = (1 << A_WEIGHT_BITS) - 1; const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7; + const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1; const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8; + const uint32_t A_ENDPOINT_MASK = (1 << A_ENDPOINT_BITS) - 1; //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS; - uint32_t bit_offset = 0; - const uint8_t* pBuf = static_cast(pBlock_bits); + const uint64_t low_chunk = data_chunks[0]; + const uint64_t high_chunk = data_chunks[1]; - if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + const uint32_t comp_rot = (low_chunk >> (mode + 1)) & 0x3; + const uint32_t index_mode = (mode == 4) ? static_cast((low_chunk >> 7) & 1) : 0; - const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2); - const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0; + uint64_t color_read_bits = low_chunk >> 8; color_rgba endpoints[ENDPOINTS]; - for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t c = 0; c < 3; c++) + { for (uint32_t e = 0; e < ENDPOINTS; e++) - endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); - + { + endpoints[e][c] = static_cast(color_read_bits & ENDPOINT_MASK); + color_read_bits >>= ENDPOINT_BITS; + } + } + + endpoints[0][3] = static_cast(color_read_bits & ENDPOINT_MASK); + + uint64_t rgb_weights_chunk; + uint64_t a_weights_chunk; + if (mode == 4) + { + endpoints[0][3] = static_cast(color_read_bits & A_ENDPOINT_MASK); + endpoints[1][3] = static_cast((color_read_bits >> A_ENDPOINT_BITS) & A_ENDPOINT_MASK); + rgb_weights_chunk = ((low_chunk >> 50) | (high_chunk << 14)); + a_weights_chunk = high_chunk >> 17; + } + else if (mode == 5) + { + endpoints[0][3] = static_cast(color_read_bits & A_ENDPOINT_MASK); + endpoints[1][3] = static_cast(((low_chunk >> 58) | (high_chunk << 6)) & A_ENDPOINT_MASK); + rgb_weights_chunk = high_chunk >> 2; + a_weights_chunk = high_chunk >> 33; + } + else + return false; + + insert_weight_zero(rgb_weights_chunk, WEIGHT_BITS, 0); + insert_weight_zero(a_weights_chunk, A_WEIGHT_BITS, 0); + const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; - + const uint32_t weight_mask[2] = { index_mode ? A_WEIGHT_MASK : WEIGHT_MASK, index_mode ? WEIGHT_MASK : A_WEIGHT_MASK }; + uint32_t weights[16], a_weights[16]; - - for (uint32_t i = 0; i < 16; i++) - (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0)); + + if (index_mode) + std::swap(rgb_weights_chunk, a_weights_chunk); for (uint32_t i = 0; i < 16; i++) - (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0)); + { + weights[i] = (rgb_weights_chunk & weight_mask[0]); + rgb_weights_chunk >>= weight_bits[0]; + } - assert(bit_offset == 128); + for (uint32_t i = 0; i < 16; i++) + { + a_weights[i] = (a_weights_chunk & weight_mask[1]); + a_weights_chunk >>= weight_bits[1]; + } for (uint32_t e = 0; e < ENDPOINTS; e++) for (uint32_t c = 0; c < 4; c++) - endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + endpoints[e][c] = static_cast(bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS)); color_rgba block_colors[8]; +#ifdef BC7DECOMP_USE_SSE2 + if (weight_bits[0] == 3) + bc7_interp3_sse2(endpoints, block_colors); + else + bc7_interp2_sse2(endpoints, block_colors); +#else for (uint32_t i = 0; i < (1U << weight_bits[0]); i++) for (uint32_t c = 0; c < 3; c++) - block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]); + block_colors[i][c] = static_cast(bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0])); +#endif for (uint32_t i = 0; i < (1U << weight_bits[1]); i++) - block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]); + block_colors[i][3] = static_cast(bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1])); for (uint32_t i = 0; i < 16; i++) { @@ -308,26 +527,46 @@ bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) if (block.m_lo.m_mode != (1 << 6)) return false; - const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0); - const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0); - const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0); - const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0); - const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1); - const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1); - const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1); - const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1); + const uint32_t r0 = static_cast((block.m_lo.m_r0 << 1) | block.m_lo.m_p0); + const uint32_t g0 = static_cast((block.m_lo.m_g0 << 1) | block.m_lo.m_p0); + const uint32_t b0 = static_cast((block.m_lo.m_b0 << 1) | block.m_lo.m_p0); + const uint32_t a0 = static_cast((block.m_lo.m_a0 << 1) | block.m_lo.m_p0); + const uint32_t r1 = static_cast((block.m_lo.m_r1 << 1) | block.m_hi.m_p1); + const uint32_t g1 = static_cast((block.m_lo.m_g1 << 1) | block.m_hi.m_p1); + const uint32_t b1 = static_cast((block.m_lo.m_b1 << 1) | block.m_hi.m_p1); + const uint32_t a1 = static_cast((block.m_lo.m_a1 << 1) | block.m_hi.m_p1); color_rgba vals[16]; +#ifdef BC7DECOMP_USE_SSE2 + __m128i vep0 = _mm_set_epi16((short)a0, (short)b0, (short)g0, (short)r0, (short)a0, (short)b0, (short)g0, (short)r0); + __m128i vep1 = _mm_set_epi16((short)a1, (short)b1, (short)g1, (short)r1, (short)a1, (short)b1, (short)g1, (short)r1); + + for (uint32_t i = 0; i < 16; i += 4) + { + const __m128i w0 = g_bc7_weights4_sse2[i / 4 * 2 + 0]; + const __m128i w1 = g_bc7_weights4_sse2[i / 4 * 2 + 1]; + + const __m128i iw0 = _mm_sub_epi16(_mm_set1_epi16(64), w0); + const __m128i iw1 = _mm_sub_epi16(_mm_set1_epi16(64), w1); + + __m128i first_half = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(vep0, iw0), _mm_mullo_epi16(vep1, w0)), _mm_set1_epi16(32)), 6); + __m128i second_half = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(vep0, iw1), _mm_mullo_epi16(vep1, w1)), _mm_set1_epi16(32)), 6); + __m128i combined = _mm_packus_epi16(first_half, second_half); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(vals + i), combined); + } +#else for (uint32_t i = 0; i < 16; i++) { const uint32_t w = g_bc7_weights4[i]; const uint32_t iw = 64 - w; - vals[i].set_noclamp_rgba( - (r0 * iw + r1 * w + 32) >> 6, - (g0 * iw + g1 * w + 32) >> 6, - (b0 * iw + b1 * w + 32) >> 6, + vals[i].set_noclamp_rgba( + (r0 * iw + r1 * w + 32) >> 6, + (g0 * iw + g1 * w + 32) >> 6, + (b0 * iw + b1 * w + 32) >> 6, (a0 * iw + a1 * w + 32) >> 6); } +#endif pPixels[0] = vals[block.m_hi.m_s00]; pPixels[1] = vals[block.m_hi.m_s10]; @@ -338,7 +577,7 @@ bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) pPixels[5] = vals[block.m_hi.m_s11]; pPixels[6] = vals[block.m_hi.m_s21]; pPixels[7] = vals[block.m_hi.m_s31]; - + pPixels[8] = vals[block.m_hi.m_s02]; pPixels[9] = vals[block.m_hi.m_s12]; pPixels[10] = vals[block.m_hi.m_s22]; @@ -354,32 +593,43 @@ bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) bool unpack_bc7(const void *pBlock, color_rgba *pPixels) { - const uint32_t first_byte = static_cast(pBlock)[0]; + const uint8_t *block_bytes = static_cast(pBlock); + uint8_t mode = g_bc7_first_byte_to_mode[block_bytes[0]]; - for (uint32_t mode = 0; mode <= 7; mode++) + uint64_t data_chunks[2]; + + uint64_t endian_check = 1; + if (*reinterpret_cast(&endian_check) == 1) + memcpy(data_chunks, pBlock, 16); + else { - if (first_byte & (1U << mode)) + data_chunks[0] = data_chunks[1] = 0; + for (int chunk_index = 0; chunk_index < 2; chunk_index++) { - switch (mode) - { - case 0: - case 2: - return unpack_bc7_mode0_2(mode, pBlock, pPixels); - case 1: - case 3: - case 7: - return unpack_bc7_mode1_3_7(mode, pBlock, pPixels); - case 4: - case 5: - return unpack_bc7_mode4_5(mode, pBlock, pPixels); - case 6: - return unpack_bc7_mode6(pBlock, pPixels); - default: - break; - } + for (int byte_index = 0; byte_index < 8; byte_index++) + data_chunks[chunk_index] |= static_cast(block_bytes[chunk_index * 8 + byte_index]) << (byte_index * 8); } } + switch (mode) + { + case 0: + case 2: + return unpack_bc7_mode0_2(mode, data_chunks, pPixels); + case 1: + case 3: + case 7: + return unpack_bc7_mode1_3_7(mode, data_chunks, pPixels); + case 4: + case 5: + return unpack_bc7_mode4_5(mode, data_chunks, pPixels); + case 6: + return unpack_bc7_mode6(data_chunks, pPixels); + default: + memset(pPixels, 0, sizeof(color_rgba) * 16); + break; + } + return false; } diff --git a/libkram/bc7enc/bc7decomp.h b/libkram/bc7enc/bc7decomp.h index cccdf50..49dc934 100644 --- a/libkram/bc7enc/bc7decomp.h +++ b/libkram/bc7enc/bc7decomp.h @@ -1,8 +1,13 @@ #pragma once +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4201) // nonstandard extension used: nameless struct/union +#endif + #include #include -//#include +#include #include #include @@ -163,3 +168,13 @@ class color_rgba bool unpack_bc7(const void *pBlock, color_rgba *pPixels); } // namespace bc7decomp + +namespace bc7decomp_ref +{ + bool unpack_bc7(const void* pBlock, bc7decomp::color_rgba* pPixels); +} // namespace bc7decomp_ref + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + diff --git a/libkram/bc7enc/bc7decomp_ref.cpp b/libkram/bc7enc/bc7decomp_ref.cpp new file mode 100644 index 0000000..8a69e94 --- /dev/null +++ b/libkram/bc7enc/bc7decomp_ref.cpp @@ -0,0 +1,431 @@ +// File: bc7decomp.c - Richard Geldreich, Jr. 3/31/2020 - MIT license or public domain (see end of file) +#include "bc7decomp.h" + +using namespace bc7decomp; + +namespace bc7decomp_ref +{ + +const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 }; +const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; +const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + +const uint8_t g_bc7_partition2[64 * 16] = +{ + 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, + 0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1, + 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0, + 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1, + 0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0, + 0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0, + 0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1 +}; + +const uint8_t g_bc7_partition3[64 * 16] = +{ + 0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1, + 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0, + 0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0, + 0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1, + 0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1, + 0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1, + 0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2, + 0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0, +}; + +const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15, 15, 2, 8, 2, 2, 8, 8,15, 2, 8, 2, 2, 8, 8, 2, 2, 15,15, 6, 8, 2, 8,15,15, 2, 8, 2, 2, 2,15,15, 6, 6, 2, 6, 8,15,15, 2, 2, 15,15,15,15,15, 2, 2,15 }; + +const uint8_t g_bc7_table_anchor_index_third_subset_1[64] = +{ + 3, 3,15,15, 8, 3,15,15, 8, 8, 6, 6, 6, 5, 3, 3, 3, 3, 8,15, 3, 3, 6,10, 5, 8, 8, 6, 8, 5,15,15, 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, 3,15, 5, 5, 5, 8, 5,10, 5,10, 8,13,15,12, 3, 3 +}; + +const uint8_t g_bc7_table_anchor_index_third_subset_2[64] = +{ + 15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8 +}; + +inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) +{ + assert(codesize <= 32); + uint32_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_read = std::min(codesize - total_bits, 8 - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; + byte_bits &= ((1 << bits_to_read) - 1); + + bits |= (byte_bits << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; +} + +// BC7 mode 0-7 decompression. +// Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines. + +static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; } +static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; } + +static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - g_bc7_weights2[w]) + h * g_bc7_weights2[w] + 32) >> 6; } +static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - g_bc7_weights3[w]) + h * g_bc7_weights3[w] + 32) >> 6; } +static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - g_bc7_weights4[w]) + h * g_bc7_weights4[w] + 32) >> 6; } +static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits) +{ + assert(l <= 255 && h <= 255); + switch (bits) + { + case 2: return bc7_interp2(l, h, w); + case 3: return bc7_interp3(l, h, w); + case 4: return bc7_interp4(l, h, w); + default: + break; + } + return 0; +} + +bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) +{ + //const uint32_t SUBSETS = 3; + const uint32_t ENDPOINTS = 6; + const uint32_t COMPS = 3; + const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; + const uint32_t PBITS = (mode == 0) ? 6 : 0; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6); + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + + uint32_t pbits[6]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = read_bits32(pBuf, bit_offset, 1); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_third_subset_1[part]) || (i == g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); + + color_rgba block_colors[3][8]; + for (uint32_t s = 0; s < 3; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < 3; c++) + block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][3] = 255; + } + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[g_bc7_partition3[part * 16 + i]][weights[i]]; + + return true; +} + +bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) +{ + //const uint32_t SUBSETS = 2; + const uint32_t ENDPOINTS = 4; + const uint32_t COMPS = (mode == 7) ? 4 : 3; + const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7); + const uint32_t PBITS = (mode == 1) ? 2 : 4; + const uint32_t SHARED_PBITS = (mode == 1) ? true : false; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t part = read_bits32(pBuf, bit_offset, 6); + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + + uint32_t pbits[4]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = read_bits32(pBuf, bit_offset, 1); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); + + color_rgba block_colors[2][8]; + for (uint32_t s = 0; s < 2; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < COMPS; c++) + block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3]; + } + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[g_bc7_partition2[part * 16 + i]][weights[i]]; + + return true; +} + +bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) +{ + const uint32_t ENDPOINTS = 2; + const uint32_t COMPS = 4; + const uint32_t WEIGHT_BITS = 2; + const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7; + const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8; + //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2); + const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0; + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + + const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; + + uint32_t weights[16], a_weights[16]; + + for (uint32_t i = 0; i < 16; i++) + (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0)); + + for (uint32_t i = 0; i < 16; i++) + (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0)); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + + color_rgba block_colors[8]; + for (uint32_t i = 0; i < (1U << weight_bits[0]); i++) + for (uint32_t c = 0; c < 3; c++) + block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]); + + for (uint32_t i = 0; i < (1U << weight_bits[1]); i++) + block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]); + + for (uint32_t i = 0; i < 16; i++) + { + pPixels[i] = block_colors[weights[i]]; + pPixels[i].a = block_colors[a_weights[i]].a; + if (comp_rot >= 1) + std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]); + } + + return true; +} + +struct bc7_mode_6 +{ + struct + { + uint64_t m_mode : 7; + uint64_t m_r0 : 7; + uint64_t m_r1 : 7; + uint64_t m_g0 : 7; + uint64_t m_g1 : 7; + uint64_t m_b0 : 7; + uint64_t m_b1 : 7; + uint64_t m_a0 : 7; + uint64_t m_a1 : 7; + uint64_t m_p0 : 1; + } m_lo; + + union + { + struct + { + uint64_t m_p1 : 1; + uint64_t m_s00 : 3; + uint64_t m_s10 : 4; + uint64_t m_s20 : 4; + uint64_t m_s30 : 4; + + uint64_t m_s01 : 4; + uint64_t m_s11 : 4; + uint64_t m_s21 : 4; + uint64_t m_s31 : 4; + + uint64_t m_s02 : 4; + uint64_t m_s12 : 4; + uint64_t m_s22 : 4; + uint64_t m_s32 : 4; + + uint64_t m_s03 : 4; + uint64_t m_s13 : 4; + uint64_t m_s23 : 4; + uint64_t m_s33 : 4; + + } m_hi; + + uint64_t m_hi_bits; + }; +}; + +bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) +{ + static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16"); + + const bc7_mode_6 &block = *static_cast(pBlock_bits); + + if (block.m_lo.m_mode != (1 << 6)) + return false; + + const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0); + const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0); + const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0); + const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0); + const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1); + const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1); + const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1); + const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1); + + color_rgba vals[16]; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t w = g_bc7_weights4[i]; + const uint32_t iw = 64 - w; + vals[i].set_noclamp_rgba( + (r0 * iw + r1 * w + 32) >> 6, + (g0 * iw + g1 * w + 32) >> 6, + (b0 * iw + b1 * w + 32) >> 6, + (a0 * iw + a1 * w + 32) >> 6); + } + + pPixels[0] = vals[block.m_hi.m_s00]; + pPixels[1] = vals[block.m_hi.m_s10]; + pPixels[2] = vals[block.m_hi.m_s20]; + pPixels[3] = vals[block.m_hi.m_s30]; + + pPixels[4] = vals[block.m_hi.m_s01]; + pPixels[5] = vals[block.m_hi.m_s11]; + pPixels[6] = vals[block.m_hi.m_s21]; + pPixels[7] = vals[block.m_hi.m_s31]; + + pPixels[8] = vals[block.m_hi.m_s02]; + pPixels[9] = vals[block.m_hi.m_s12]; + pPixels[10] = vals[block.m_hi.m_s22]; + pPixels[11] = vals[block.m_hi.m_s32]; + + pPixels[12] = vals[block.m_hi.m_s03]; + pPixels[13] = vals[block.m_hi.m_s13]; + pPixels[14] = vals[block.m_hi.m_s23]; + pPixels[15] = vals[block.m_hi.m_s33]; + + return true; +} + +bool unpack_bc7(const void *pBlock, bc7decomp::color_rgba *pPixels) +{ + const uint32_t first_byte = static_cast(pBlock)[0]; + + for (uint32_t mode = 0; mode <= 7; mode++) + { + if (first_byte & (1U << mode)) + { + switch (mode) + { + case 0: + case 2: + return unpack_bc7_mode0_2(mode, pBlock, pPixels); + case 1: + case 3: + case 7: + return unpack_bc7_mode1_3_7(mode, pBlock, pPixels); + case 4: + case 5: + return unpack_bc7_mode4_5(mode, pBlock, pPixels); + case 6: + return unpack_bc7_mode6(pBlock, pPixels); + default: + break; + } + } + } + + return false; +} + +} // namespace bc7decomp_ref + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright(c) 2020 Richard Geldreich, Jr. +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files(the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions : +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain(www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non - commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain.We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors.We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ + diff --git a/libkram/bc7enc/bc7enc.cpp b/libkram/bc7enc/bc7enc.cpp index b2403b8..4cbdd55 100644 --- a/libkram/bc7enc/bc7enc.cpp +++ b/libkram/bc7enc/bc7enc.cpp @@ -1,82 +1,49 @@ // File: bc7enc.c - Richard Geldreich, Jr. 3/31/2020 - MIT license or public domain (see end of file) // Currently supports modes 1, 6 for RGB blocks, and modes 5, 6, 7 for RGBA blocks. -// NOTE: This module is still a work in progress as of 3/31/2020. It needs to support mode modes for RGB content. #include "bc7enc.h" #include #include #include #include -#include - -// Make mapping to simd classes more simple. -// Repeated individual ops instead of functions that can be optimized don't result in a speedup. -// The algorithm is already so fast. -#define USE_SIMD_BCENC 0 +#include // Helpers static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; } static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } -#if !USE_SIMD_BCENC static inline float saturate(float value) { return clampf(value, 0, 1.0f); } -#endif //static inline uint8_t minimumub(uint8_t a, uint8_t b) { return (a < b) ? a : b; } +static inline int32_t minimumi(int32_t a, int32_t b) { return (a < b) ? a : b; } static inline uint32_t minimumu(uint32_t a, uint32_t b) { return (a < b) ? a : b; } static inline float minimumf(float a, float b) { return (a < b) ? a : b; } //static inline uint8_t maximumub(uint8_t a, uint8_t b) { return (a > b) ? a : b; } static inline uint32_t maximumu(uint32_t a, uint32_t b) { return (a > b) ? a : b; } +//static inline int32_t maximumi(int32_t a, int32_t b) { return (a > b) ? a : b; } static inline float maximumf(float a, float b) { return (a > b) ? a : b; } static inline int squarei(int i) { return i * i; } static inline float squaref(float i) { return i * i; } +template inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; } static inline int32_t iabs32(int32_t v) { uint32_t msk = v >> 31; return (v ^ msk) - msk; } //static inline void swapub(uint8_t* a, uint8_t* b) { uint8_t t = *a; *a = *b; *b = t; } static inline void swapu(uint32_t* a, uint32_t* b) { uint32_t t = *a; *a = *b; *b = t; } //static inline void swapf(float* a, float* b) { float t = *a; *a = *b; *b = t; } -struct color_quad_u8 { - uint8_t r, g, b, a; - inline const uint8_t& operator[](int index) const { return *(&r + index); } - inline uint8_t& operator[](int index) { return *(&r + index); } -}; - -static inline color_quad_u8 *color_quad_u8_set_clamped(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { pRes->r = (uint8_t)clampi(r, 0, 255); pRes->g = (uint8_t)clampi(g, 0, 255); pRes->b = (uint8_t)clampi(b, 0, 255); pRes->a = (uint8_t)clampi(a, 0, 255); return pRes; } -static inline color_quad_u8 *color_quad_u8_set(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { assert((uint32_t)(r | g | b | a) <= 255); pRes->r = (uint8_t)r; pRes->g = (uint8_t)g; pRes->b = (uint8_t)b; pRes->a = (uint8_t)a; return pRes; } -static inline bc7enc_bool color_quad_u8_notequals(const color_quad_u8 *pLHS, const color_quad_u8 *pRHS) { return (pLHS->r != pRHS->r) || (pLHS->g != pRHS->g) || (pLHS->b != pRHS->b) || (pLHS->a != pRHS->a); } - -#if USE_SIMD_BCENC -using namespace simd; -using vec4F = float4; - -static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) { *pV = vec4F(x); return pV; } -static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) { *pV = float4m(x,y,z,w); return pV; } -static inline vec4F *vec4F_saturate_in_place(vec4F *pV) { *pV = saturate(*pV); return pV; } -static inline vec4F vec4F_saturate(const vec4F *pV) { vec4F res = saturate(*pV); return res; } - -static inline vec4F vec4F_from_color(const color_quad_u8 *pC) { vec4F res = float4m((float)pC->r, (float)pC->g, (float)pC->b, (float)pC->a); return res; } -static inline vec4F vec4F_add(const vec4F *pLHS, const vec4F *pRHS) { vec4F res = *pLHS + *pRHS; return res; } -static inline vec4F vec4F_sub(const vec4F *pLHS, const vec4F *pRHS) { vec4F res = *pLHS - *pRHS; return res; } -static inline float vec4F_dot(const vec4F *pLHS, const vec4F *pRHS) { return dot(*pLHS, *pRHS); } -static inline vec4F vec4F_mul(const vec4F *pLHS, float s) { vec4F res = *pLHS * s; return res; } -static inline vec4F *vec4F_normalize_in_place(vec4F *pV) { *pV = normalize(*pV); return pV; } +struct vec4F { float m_c[4]; }; + +static inline color_rgba *color_quad_u8_set_clamped(color_rgba *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { pRes->m_c[0] = (uint8_t)clampi(r, 0, 255); pRes->m_c[1] = (uint8_t)clampi(g, 0, 255); pRes->m_c[2] = (uint8_t)clampi(b, 0, 255); pRes->m_c[3] = (uint8_t)clampi(a, 0, 255); return pRes; } +static inline color_rgba *color_quad_u8_set(color_rgba *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { assert((uint32_t)(r | g | b | a) <= 255); pRes->m_c[0] = (uint8_t)r; pRes->m_c[1] = (uint8_t)g; pRes->m_c[2] = (uint8_t)b; pRes->m_c[3] = (uint8_t)a; return pRes; } +static inline bool color_quad_u8_notequals(const color_rgba *pLHS, const color_rgba *pRHS) { return (pLHS->m_c[0] != pRHS->m_c[0]) || (pLHS->m_c[1] != pRHS->m_c[1]) || (pLHS->m_c[2] != pRHS->m_c[2]) || (pLHS->m_c[3] != pRHS->m_c[3]); } +static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) { pV->m_c[0] = x; pV->m_c[1] = x; pV->m_c[2] = x; pV->m_c[3] = x; return pV; } +static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) { pV->m_c[0] = x; pV->m_c[1] = y; pV->m_c[2] = z; pV->m_c[3] = w; return pV; } +static inline vec4F *vec4F_saturate_in_place(vec4F *pV) { pV->m_c[0] = saturate(pV->m_c[0]); pV->m_c[1] = saturate(pV->m_c[1]); pV->m_c[2] = saturate(pV->m_c[2]); pV->m_c[3] = saturate(pV->m_c[3]); return pV; } +static inline vec4F vec4F_saturate(const vec4F *pV) { vec4F res; res.m_c[0] = saturate(pV->m_c[0]); res.m_c[1] = saturate(pV->m_c[1]); res.m_c[2] = saturate(pV->m_c[2]); res.m_c[3] = saturate(pV->m_c[3]); return res; } +static inline vec4F vec4F_from_color(const color_rgba *pC) { vec4F res; vec4F_set(&res, pC->m_c[0], pC->m_c[1], pC->m_c[2], pC->m_c[3]); return res; } +static inline vec4F vec4F_add(const vec4F *pLHS, const vec4F *pRHS) { vec4F res; vec4F_set(&res, pLHS->m_c[0] + pRHS->m_c[0], pLHS->m_c[1] + pRHS->m_c[1], pLHS->m_c[2] + pRHS->m_c[2], pLHS->m_c[3] + pRHS->m_c[3]); return res; } +static inline vec4F vec4F_sub(const vec4F *pLHS, const vec4F *pRHS) { vec4F res; vec4F_set(&res, pLHS->m_c[0] - pRHS->m_c[0], pLHS->m_c[1] - pRHS->m_c[1], pLHS->m_c[2] - pRHS->m_c[2], pLHS->m_c[3] - pRHS->m_c[3]); return res; } +static inline float vec4F_dot(const vec4F *pLHS, const vec4F *pRHS) { return pLHS->m_c[0] * pRHS->m_c[0] + pLHS->m_c[1] * pRHS->m_c[1] + pLHS->m_c[2] * pRHS->m_c[2] + pLHS->m_c[3] * pRHS->m_c[3]; } +static inline vec4F vec4F_mul(const vec4F *pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->m_c[0] * s, pLHS->m_c[1] * s, pLHS->m_c[2] * s, pLHS->m_c[3] * s); return res; } +static inline vec4F *vec4F_normalize_in_place(vec4F *pV) { float s = pV->m_c[0] * pV->m_c[0] + pV->m_c[1] * pV->m_c[1] + pV->m_c[2] * pV->m_c[2] + pV->m_c[3] * pV->m_c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->m_c[0] *= s; pV->m_c[1] *= s; pV->m_c[2] *= s; pV->m_c[3] *= s; } return pV; } -#else -struct vec4F { - float r, g, b, a; - inline const float& operator[](int index) const { return *(&r + index); } - inline float& operator[](int index) { return *(&r + index); } -}; - -static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) { pV->r = x; pV->g = x; pV->b = x; pV->a = x; return pV; } -static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) { pV->r = x; pV->g = y; pV->b = z; pV->a = w; return pV; } -static inline vec4F *vec4F_saturate_in_place(vec4F *pV) { pV->r = saturate(pV->r); pV->g = saturate(pV->g); pV->b = saturate(pV->b); pV->a = saturate(pV->a); return pV; } -static inline vec4F vec4F_saturate(const vec4F *pV) { vec4F res; res.r = saturate(pV->r); res.g = saturate(pV->g); res.b = saturate(pV->b); res.a = saturate(pV->a); return res; } -static inline vec4F vec4F_from_color(const color_quad_u8 *pC) { vec4F res; vec4F_set(&res, pC->r, pC->g, pC->b, pC->a); return res; } -static inline vec4F vec4F_add(const vec4F *pLHS, const vec4F *pRHS) { vec4F res; vec4F_set(&res, pLHS->r + pRHS->r, pLHS->g + pRHS->g, pLHS->b + pRHS->b, pLHS->a + pRHS->a); return res; } -static inline vec4F vec4F_sub(const vec4F *pLHS, const vec4F *pRHS) { vec4F res; vec4F_set(&res, pLHS->r - pRHS->r, pLHS->g - pRHS->g, pLHS->b - pRHS->b, pLHS->a - pRHS->a); return res; } -static inline float vec4F_dot(const vec4F *pLHS, const vec4F *pRHS) { return pLHS->r * pRHS->r + pLHS->g * pRHS->g + pLHS->b * pRHS->b + pLHS->a * pRHS->a; } -static inline vec4F vec4F_mul(const vec4F *pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->r * s, pLHS->g * s, pLHS->b * s, pLHS->a * s); return res; } -static inline vec4F *vec4F_normalize_in_place(vec4F *pV) { float s = pV->r * pV->r + pV->g * pV->g + pV->b * pV->b + pV->a * pV->a; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->r *= s; pV->g *= s; pV->b *= s; pV->a *= s; } return pV; } -#endif // Various BC7 tables static const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 }; static const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; @@ -135,7 +102,7 @@ static const uint8_t g_bc7_mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 }; static const uint8_t g_bc7_mode_has_shared_p_bits[8] = { 0, 1, 0, 0, 0, 0, 0, 0 }; static const uint8_t g_bc7_color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 }; static const int8_t g_bc7_alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 }; -static bc7enc_bool get_bc7_mode_has_seperate_alpha_selectors(int mode) { return (mode == 4) || (mode == 5); } +static bool get_bc7_mode_has_seperate_alpha_selectors(int mode) { return (mode == 4) || (mode == 5); } typedef struct { uint16_t m_error; uint8_t m_lo; uint8_t m_hi; } endpoint_err; @@ -145,9 +112,105 @@ static const uint32_t BC7ENC_MODE_1_OPTIMAL_INDEX = 2; static endpoint_err g_bc7_mode_7_optimal_endpoints[256][2][2]; // [c][pbit][hp][lp] const uint32_t BC7E_MODE_7_OPTIMAL_INDEX = 1; -// Initialize the lookup table used for optimal single color compression in mode 1. Must be called before encoding. +static float g_mode1_rgba_midpoints[64][2]; +static float g_mode5_rgba_midpoints[128]; +static float g_mode7_rgba_midpoints[32][2]; + +static uint8_t g_mode6_reduced_quant[2048][2]; + +static bool g_initialized; + +// Initialize the lookup table used for optimal single color compression in mode 1/7. Must be called before encoding. void bc7enc_compress_block_init() { + if (g_initialized) + return; + + // Mode 7 endpoint midpoints + for (uint32_t p = 0; p < 2; p++) + { + for (uint32_t i = 0; i < 32; i++) + { + uint32_t vl = ((i << 1) | p) << 2; + vl |= (vl >> 6); + float lo = vl / 255.0f; + + uint32_t vh = ((minimumi(31, (i + 1)) << 1) | p) << 2; + vh |= (vh >> 6); + float hi = vh / 255.0f; + + //g_mode7_quant_values[i][p] = lo; + if (i == 31) + g_mode7_rgba_midpoints[i][p] = 1.0f; + else + g_mode7_rgba_midpoints[i][p] = (lo + hi) / 2.0f; + } + } + + // Mode 1 endpoint midpoints + for (uint32_t p = 0; p < 2; p++) + { + for (uint32_t i = 0; i < 64; i++) + { + uint32_t vl = ((i << 1) | p) << 1; + vl |= (vl >> 7); + float lo = vl / 255.0f; + + uint32_t vh = ((minimumi(63, (i + 1)) << 1) | p) << 1; + vh |= (vh >> 7); + float hi = vh / 255.0f; + + //g_mode1_quant_values[i][p] = lo; + if (i == 63) + g_mode1_rgba_midpoints[i][p] = 1.0f; + else + g_mode1_rgba_midpoints[i][p] = (lo + hi) / 2.0f; + } + } + + // Mode 5 endpoint midpoints + for (uint32_t i = 0; i < 128; i++) + { + uint32_t vl = (i << 1); + vl |= (vl >> 7); + float lo = vl / 255.0f; + + uint32_t vh = minimumi(127, i + 1) << 1; + vh |= (vh >> 7); + float hi = vh / 255.0f; + + if (i == 127) + g_mode5_rgba_midpoints[i] = 1.0f; + else + g_mode5_rgba_midpoints[i] = (lo + hi) / 2.0f; + } + + for (uint32_t p = 0; p < 2; p++) + { + for (uint32_t i = 0; i < 2048; i++) + { + float f = i / 2047.0f; + + float best_err = 1e+9f; + int best_index = 0; + for (int j = 0; j < 64; j++) + { + int ik = (j * 127 + 31) / 63; + float k = ((ik << 1) + p) / 255.0f; + + float e = fabsf(k - f); + if (e < best_err) + { + best_err = e; + best_index = ik; + } + } + + g_mode6_reduced_quant[i][p] = (uint8_t)best_index; + } + } // p + + // Mode 1 for (int c = 0; c < 256; c++) { for (uint32_t lp = 0; lp < 2; lp++) @@ -217,9 +280,11 @@ void bc7enc_compress_block_init() } // lp } // c + + g_initialized = true; } -static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const vec4F *pSelector_weights, vec4F *pXl, vec4F *pXh, const color_quad_u8 *pColors) +static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const vec4F *pSelector_weights, vec4F *pXl, vec4F *pXh, const color_rgba *pColors) { // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. @@ -231,14 +296,14 @@ static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSel for (uint32_t i = 0; i < N; i++) { const uint32_t sel = pSelectors[i]; - z00 += pSelector_weights[sel][0]; - z10 += pSelector_weights[sel][1]; - z11 += pSelector_weights[sel][2]; - float w = pSelector_weights[sel][3]; - q00_r += w * pColors[i].r; t_r += pColors[i].r; - q00_g += w * pColors[i].g; t_g += pColors[i].g; - q00_b += w * pColors[i].b; t_b += pColors[i].b; - q00_a += w * pColors[i].a; t_a += pColors[i].a; + z00 += pSelector_weights[sel].m_c[0]; + z10 += pSelector_weights[sel].m_c[1]; + z11 += pSelector_weights[sel].m_c[2]; + float w = pSelector_weights[sel].m_c[3]; + q00_r += w * pColors[i].m_c[0]; t_r += pColors[i].m_c[0]; + q00_g += w * pColors[i].m_c[1]; t_g += pColors[i].m_c[1]; + q00_b += w * pColors[i].m_c[2]; t_b += pColors[i].m_c[2]; + q00_a += w * pColors[i].m_c[3]; t_a += pColors[i].m_c[3]; } q10_r = t_r - q00_r; @@ -258,32 +323,32 @@ static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSel iz10 = -z10 * det; iz11 = z00 * det; - pXl->r = (float)(iz00 * q00_r + iz01 * q10_r); pXh->r = (float)(iz10 * q00_r + iz11 * q10_r); - pXl->g = (float)(iz00 * q00_g + iz01 * q10_g); pXh->g = (float)(iz10 * q00_g + iz11 * q10_g); - pXl->b = (float)(iz00 * q00_b + iz01 * q10_b); pXh->b = (float)(iz10 * q00_b + iz11 * q10_b); - pXl->a = (float)(iz00 * q00_a + iz01 * q10_a); pXh->a = (float)(iz10 * q00_a + iz11 * q10_a); + pXl->m_c[0] = (float)(iz00 * q00_r + iz01 * q10_r); pXh->m_c[0] = (float)(iz10 * q00_r + iz11 * q10_r); + pXl->m_c[1] = (float)(iz00 * q00_g + iz01 * q10_g); pXh->m_c[1] = (float)(iz10 * q00_g + iz11 * q10_g); + pXl->m_c[2] = (float)(iz00 * q00_b + iz01 * q10_b); pXh->m_c[2] = (float)(iz10 * q00_b + iz11 * q10_b); + pXl->m_c[3] = (float)(iz00 * q00_a + iz01 * q10_a); pXh->m_c[3] = (float)(iz10 * q00_a + iz11 * q10_a); for (uint32_t c = 0; c < 4; c++) { - if (((*pXl)[c] < 0.0f) || ((*pXh)[c] > 255.0f)) + if ((pXl->m_c[c] < 0.0f) || (pXh->m_c[c] > 255.0f)) { uint32_t lo_v = UINT32_MAX, hi_v = 0; for (uint32_t i = 0; i < N; i++) { - lo_v = minimumu(lo_v, pColors[i][c]); - hi_v = maximumu(hi_v, pColors[i][c]); + lo_v = minimumu(lo_v, pColors[i].m_c[c]); + hi_v = maximumu(hi_v, pColors[i].m_c[c]); } if (lo_v == hi_v) { - (*pXl)[c] = (float)lo_v; - (*pXh)[c] = (float)hi_v; + pXl->m_c[c] = (float)lo_v; + pXh->m_c[c] = (float)hi_v; } } } } -static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSelectors, const vec4F *pSelector_weights, vec4F *pXl, vec4F *pXh, const color_quad_u8 *pColors) +static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSelectors, const vec4F *pSelector_weights, vec4F *pXl, vec4F *pXh, const color_rgba*pColors) { float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; @@ -292,13 +357,13 @@ static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSele for (uint32_t i = 0; i < N; i++) { const uint32_t sel = pSelectors[i]; - z00 += pSelector_weights[sel].r; - z10 += pSelector_weights[sel].g; - z11 += pSelector_weights[sel].b; - float w = pSelector_weights[sel].a; - q00_r += w * pColors[i].r; t_r += pColors[i].r; - q00_g += w * pColors[i].g; t_g += pColors[i].g; - q00_b += w * pColors[i].b; t_b += pColors[i].b; + z00 += pSelector_weights[sel].m_c[0]; + z10 += pSelector_weights[sel].m_c[1]; + z11 += pSelector_weights[sel].m_c[2]; + float w = pSelector_weights[sel].m_c[3]; + q00_r += w * pColors[i].m_c[0]; t_r += pColors[i].m_c[0]; + q00_g += w * pColors[i].m_c[1]; t_g += pColors[i].m_c[1]; + q00_b += w * pColors[i].m_c[2]; t_b += pColors[i].m_c[2]; } q10_r = t_r - q00_r; @@ -317,32 +382,32 @@ static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSele iz10 = -z10 * det; iz11 = z00 * det; - pXl->r = (float)(iz00 * q00_r + iz01 * q10_r); pXh->r = (float)(iz10 * q00_r + iz11 * q10_r); - pXl->g = (float)(iz00 * q00_g + iz01 * q10_g); pXh->g = (float)(iz10 * q00_g + iz11 * q10_g); - pXl->b = (float)(iz00 * q00_b + iz01 * q10_b); pXh->b = (float)(iz10 * q00_b + iz11 * q10_b); - pXl->a = 255.0f; pXh->a = 255.0f; + pXl->m_c[0] = (float)(iz00 * q00_r + iz01 * q10_r); pXh->m_c[0] = (float)(iz10 * q00_r + iz11 * q10_r); + pXl->m_c[1] = (float)(iz00 * q00_g + iz01 * q10_g); pXh->m_c[1] = (float)(iz10 * q00_g + iz11 * q10_g); + pXl->m_c[2] = (float)(iz00 * q00_b + iz01 * q10_b); pXh->m_c[2] = (float)(iz10 * q00_b + iz11 * q10_b); + pXl->m_c[3] = 255.0f; pXh->m_c[3] = 255.0f; for (uint32_t c = 0; c < 3; c++) { - if (((*pXl)[c] < 0.0f) || ((*pXh)[c] > 255.0f)) + if ((pXl->m_c[c] < 0.0f) || (pXh->m_c[c] > 255.0f)) { uint32_t lo_v = UINT32_MAX, hi_v = 0; for (uint32_t i = 0; i < N; i++) { - lo_v = minimumu(lo_v, pColors[i][c]); - hi_v = maximumu(hi_v, pColors[i][c]); + lo_v = minimumu(lo_v, pColors[i].m_c[c]); + hi_v = maximumu(hi_v, pColors[i].m_c[c]); } if (lo_v == hi_v) { - (*pXl)[c] = (float)lo_v; - (*pXh)[c] = (float)hi_v; + pXl->m_c[c] = (float)lo_v; + pXh->m_c[c] = (float)hi_v; } } } } -static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, float* pXl, float* pXh, const color_quad_u8* pColors) +static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, float* pXl, float* pXh, const color_rgba *pColors) { // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. @@ -352,13 +417,13 @@ static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelect { const uint32_t sel = pSelectors[i]; - z00 += pSelector_weights[sel].r; - z10 += pSelector_weights[sel].g; - z11 += pSelector_weights[sel].b; + z00 += pSelector_weights[sel].m_c[0]; + z10 += pSelector_weights[sel].m_c[1]; + z11 += pSelector_weights[sel].m_c[2]; - float w = pSelector_weights[sel].a; + float w = pSelector_weights[sel].m_c[3]; - q00_a += w * pColors[i].a; t_a += pColors[i].a; + q00_a += w * pColors[i].m_c[3]; t_a += pColors[i].m_c[3]; } q10_a = t_a - q00_a; @@ -382,8 +447,8 @@ static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelect uint32_t lo_v = UINT32_MAX, hi_v = 0; for (uint32_t i = 0; i < N; i++) { - lo_v = minimumu(lo_v, pColors[i].a); - hi_v = maximumu(hi_v, pColors[i].a); + lo_v = minimumu(lo_v, pColors[i].m_c[3]); + hi_v = maximumu(hi_v, pColors[i].m_c[3]); } if (lo_v == hi_v) @@ -394,78 +459,78 @@ static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelect } } -typedef struct +struct color_cell_compressor_params { uint32_t m_num_pixels; - const color_quad_u8 *m_pPixels; + const color_rgba *m_pPixels; uint32_t m_num_selector_weights; const uint32_t *m_pSelector_weights; const vec4F *m_pSelector_weightsx; uint32_t m_comp_bits; uint32_t m_weights[4]; - bc7enc_bool m_has_alpha; - bc7enc_bool m_has_pbits; - bc7enc_bool m_endpoints_share_pbit; - bc7enc_bool m_perceptual; -} color_cell_compressor_params; + bool m_has_alpha; + bool m_has_pbits; + bool m_endpoints_share_pbit; + bool m_perceptual; +}; -typedef struct +struct color_cell_compressor_results { uint64_t m_best_overall_err; - color_quad_u8 m_low_endpoint; - color_quad_u8 m_high_endpoint; + color_rgba m_low_endpoint; + color_rgba m_high_endpoint; uint32_t m_pbits[2]; uint8_t *m_pSelectors; uint8_t *m_pSelectors_temp; -} color_cell_compressor_results; +}; -static inline color_quad_u8 scale_color(const color_quad_u8 *pC, const color_cell_compressor_params *pParams) +static inline color_rgba scale_color(const color_rgba *pC, const color_cell_compressor_params *pParams) { - color_quad_u8 results; + color_rgba results; const uint32_t n = pParams->m_comp_bits + (pParams->m_has_pbits ? 1 : 0); assert((n >= 4) && (n <= 8)); for (uint32_t i = 0; i < 4; i++) { - uint32_t v = (*pC)[i] << (8 - n); + uint32_t v = pC->m_c[i] << (8 - n); v |= (v >> n); assert(v <= 255); - results[i] = (uint8_t)(v); + results.m_c[i] = (uint8_t)(v); } return results; } -static inline uint64_t compute_color_distance_rgb(const color_quad_u8 *pE1, const color_quad_u8 *pE2, bc7enc_bool perceptual, const uint32_t weights[4]) +static inline uint64_t compute_color_distance_rgb(const color_rgba *pE1, const color_rgba *pE2, bool perceptual, const uint32_t weights[4]) { int dr, dg, db; if (perceptual) { - const int l1 = pE1->r * 109 + pE1->g * 366 + pE1->b * 37; - const int cr1 = ((int)pE1->r << 9) - l1; - const int cb1 = ((int)pE1->b << 9) - l1; - const int l2 = pE2->r * 109 + pE2->g * 366 + pE2->b * 37; - const int cr2 = ((int)pE2->r << 9) - l2; - const int cb2 = ((int)pE2->b << 9) - l2; + const int l1 = pE1->m_c[0] * 109 + pE1->m_c[1] * 366 + pE1->m_c[2] * 37; + const int cr1 = ((int)pE1->m_c[0] << 9) - l1; + const int cb1 = ((int)pE1->m_c[2] << 9) - l1; + const int l2 = pE2->m_c[0] * 109 + pE2->m_c[1] * 366 + pE2->m_c[2] * 37; + const int cr2 = ((int)pE2->m_c[0] << 9) - l2; + const int cb2 = ((int)pE2->m_c[2] << 9) - l2; dr = (l1 - l2) >> 8; dg = (cr1 - cr2) >> 8; db = (cb1 - cb2) >> 8; } else { - dr = (int)pE1->r - (int)pE2->r; - dg = (int)pE1->g - (int)pE2->g; - db = (int)pE1->b - (int)pE2->b; + dr = (int)pE1->m_c[0] - (int)pE2->m_c[0]; + dg = (int)pE1->m_c[1] - (int)pE2->m_c[1]; + db = (int)pE1->m_c[2] - (int)pE2->m_c[2]; } return weights[0] * (uint32_t)(dr * dr) + weights[1] * (uint32_t)(dg * dg) + weights[2] * (uint32_t)(db * db); } -static inline uint64_t compute_color_distance_rgba(const color_quad_u8 *pE1, const color_quad_u8 *pE2, bc7enc_bool perceptual, const uint32_t weights[4]) +static inline uint64_t compute_color_distance_rgba(const color_rgba *pE1, const color_rgba *pE2, bool perceptual, const uint32_t weights[4]) { - int da = (int)pE1->a - (int)pE2->a; + int da = (int)pE1->m_c[3] - (int)pE2->m_c[3]; return compute_color_distance_rgb(pE1, pE2, perceptual, weights) + (weights[3] * (uint32_t)(da * da)); } @@ -497,18 +562,18 @@ static uint64_t pack_mode1_to_one_color(const color_cell_compressor_params *pPar memset(pSelectors, BC7ENC_MODE_1_OPTIMAL_INDEX, pParams->m_num_pixels); - color_quad_u8 p; + color_rgba p; for (uint32_t i = 0; i < 3; i++) { - uint32_t low = ((pResults->m_low_endpoint[i] << 1) | pResults->m_pbits[0]) << 1; + uint32_t low = ((pResults->m_low_endpoint.m_c[i] << 1) | pResults->m_pbits[0]) << 1; low |= (low >> 7); - uint32_t high = ((pResults->m_high_endpoint[i] << 1) | pResults->m_pbits[0]) << 1; + uint32_t high = ((pResults->m_high_endpoint.m_c[i] << 1) | pResults->m_pbits[0]) << 1; high |= (high >> 7); - p[i] = (uint8_t)((low * (64 - g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX]) + high * g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX] + 32) >> 6); + p.m_c[i] = (uint8_t)((low * (64 - g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX]) + high * g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX] + 32) >> 6); } - p.a = 255; + p.m_c[3] = 255; uint64_t total_err = 0; for (uint32_t i = 0; i < pParams->m_num_pixels; i++) @@ -520,7 +585,7 @@ static uint64_t pack_mode1_to_one_color(const color_cell_compressor_params *pPar } static uint64_t pack_mode7_to_one_color(const color_cell_compressor_params* pParams, color_cell_compressor_results* pResults, uint32_t r, uint32_t g, uint32_t b, uint32_t a, - uint8_t* pSelectors, uint32_t num_pixels, const color_quad_u8* pPixels) + uint8_t* pSelectors, uint32_t num_pixels, const color_rgba *pPixels) { uint32_t best_err = UINT_MAX; uint32_t best_p = 0; @@ -553,19 +618,19 @@ static uint64_t pack_mode7_to_one_color(const color_cell_compressor_params* pPar pResults->m_pbits[1] = best_hi_p; for (uint32_t i = 0; i < num_pixels; i++) - pSelectors[i] = BC7E_MODE_7_OPTIMAL_INDEX; + pSelectors[i] = (uint8_t)BC7E_MODE_7_OPTIMAL_INDEX; - color_quad_u8 p; + color_rgba p; for (uint32_t i = 0; i < 4; i++) { - uint32_t low = (pResults->m_low_endpoint[i] << 1) | pResults->m_pbits[0]; - uint32_t high = (pResults->m_high_endpoint[i] << 1) | pResults->m_pbits[1]; + uint32_t low = (pResults->m_low_endpoint.m_c[i] << 1) | pResults->m_pbits[0]; + uint32_t high = (pResults->m_high_endpoint.m_c[i] << 1) | pResults->m_pbits[1]; low = (low << 2) | (low >> 6); high = (high << 2) | (high >> 6); - p[i] = (low * (64 - g_bc7_weights2[BC7E_MODE_7_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7E_MODE_7_OPTIMAL_INDEX] + 32) >> 6; + p.m_c[i] = (uint8_t)((low * (64 - g_bc7_weights2[BC7E_MODE_7_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7E_MODE_7_OPTIMAL_INDEX] + 32) >> 6); } uint64_t total_err = 0; @@ -577,10 +642,11 @@ static uint64_t pack_mode7_to_one_color(const color_cell_compressor_params* pPar return total_err; } -static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 *pHigh, const uint32_t pbits[2], const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +static uint64_t evaluate_solution(const color_rgba *pLow, const color_rgba *pHigh, const uint32_t pbits[2], const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, + const bc7enc_compress_block_params* pComp_params) { - color_quad_u8 quantMinColor = *pLow; - color_quad_u8 quantMaxColor = *pHigh; + color_rgba quantMinColor = *pLow; + color_rgba quantMaxColor = *pHigh; if (pParams->m_has_pbits) { @@ -594,62 +660,79 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 maxPBit = pbits[1]; } - quantMinColor.r = (uint8_t)((pLow->r << 1) | minPBit); - quantMinColor.g = (uint8_t)((pLow->g << 1) | minPBit); - quantMinColor.b = (uint8_t)((pLow->b << 1) | minPBit); - quantMinColor.a = (uint8_t)((pLow->a << 1) | minPBit); + quantMinColor.m_c[0] = (uint8_t)((pLow->m_c[0] << 1) | minPBit); + quantMinColor.m_c[1] = (uint8_t)((pLow->m_c[1] << 1) | minPBit); + quantMinColor.m_c[2] = (uint8_t)((pLow->m_c[2] << 1) | minPBit); + quantMinColor.m_c[3] = (uint8_t)((pLow->m_c[3] << 1) | minPBit); - quantMaxColor.r = (uint8_t)((pHigh->r << 1) | maxPBit); - quantMaxColor.g = (uint8_t)((pHigh->g << 1) | maxPBit); - quantMaxColor.b = (uint8_t)((pHigh->b << 1) | maxPBit); - quantMaxColor.a = (uint8_t)((pHigh->a << 1) | maxPBit); + quantMaxColor.m_c[0] = (uint8_t)((pHigh->m_c[0] << 1) | maxPBit); + quantMaxColor.m_c[1] = (uint8_t)((pHigh->m_c[1] << 1) | maxPBit); + quantMaxColor.m_c[2] = (uint8_t)((pHigh->m_c[2] << 1) | maxPBit); + quantMaxColor.m_c[3] = (uint8_t)((pHigh->m_c[3] << 1) | maxPBit); } - color_quad_u8 actualMinColor = scale_color(&quantMinColor, pParams); - color_quad_u8 actualMaxColor = scale_color(&quantMaxColor, pParams); + color_rgba actualMinColor = scale_color(&quantMinColor, pParams); + color_rgba actualMaxColor = scale_color(&quantMaxColor, pParams); const uint32_t N = pParams->m_num_selector_weights; - color_quad_u8 weightedColors[16]; + color_rgba weightedColors[16]; weightedColors[0] = actualMinColor; weightedColors[N - 1] = actualMaxColor; const uint32_t nc = pParams->m_has_alpha ? 4 : 3; for (uint32_t i = 1; i < (N - 1); i++) for (uint32_t j = 0; j < nc; j++) - weightedColors[i][j] = (uint8_t)((actualMinColor[j] * (64 - pParams->m_pSelector_weights[i]) + actualMaxColor[j] * pParams->m_pSelector_weights[i] + 32) >> 6); - - const int lr = actualMinColor.r; - const int lg = actualMinColor.g; - const int lb = actualMinColor.b; - const int dr = actualMaxColor.r - lr; - const int dg = actualMaxColor.g - lg; - const int db = actualMaxColor.b - lb; + weightedColors[i].m_c[j] = (uint8_t)((actualMinColor.m_c[j] * (64 - pParams->m_pSelector_weights[i]) + actualMaxColor.m_c[j] * pParams->m_pSelector_weights[i] + 32) >> 6); + + const int lr = actualMinColor.m_c[0]; + const int lg = actualMinColor.m_c[1]; + const int lb = actualMinColor.m_c[2]; + const int dr = actualMaxColor.m_c[0] - lr; + const int dg = actualMaxColor.m_c[1] - lg; + const int db = actualMaxColor.m_c[2] - lb; uint64_t total_err = 0; - - if (!pParams->m_perceptual) + + if (pComp_params->m_force_selectors) + { + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const uint32_t best_sel = pComp_params->m_selectors[i]; + + uint64_t best_err; + if (pParams->m_has_alpha) + best_err = compute_color_distance_rgba(&weightedColors[best_sel], &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + else + best_err = compute_color_distance_rgb(&weightedColors[best_sel], &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + total_err += best_err; + + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; + } + } + else if (!pParams->m_perceptual) { if (pParams->m_has_alpha) { - const int la = actualMinColor.a; - const int da = actualMaxColor.a - la; + const int la = actualMinColor.m_c[3]; + const int da = actualMaxColor.m_c[3] - la; const float f = N / (float)(squarei(dr) + squarei(dg) + squarei(db) + squarei(da) + .00000125f); for (uint32_t i = 0; i < pParams->m_num_pixels; i++) { - const color_quad_u8 *pC = &pParams->m_pPixels[i]; - int r = pC->r; - int g = pC->g; - int b = pC->b; - int a = pC->a; + const color_rgba *pC = &pParams->m_pPixels[i]; + int r = pC->m_c[0]; + int g = pC->m_c[1]; + int b = pC->m_c[2]; + int a = pC->m_c[3]; int best_sel = (int)((float)((r - lr) * dr + (g - lg) * dg + (b - lb) * db + (a - la) * da) * f + .5f); best_sel = clampi(best_sel, 1, N - 1); - uint64_t err0 = compute_color_distance_rgba(&weightedColors[best_sel - 1], pC, BC7ENC_FALSE, pParams->m_weights); - uint64_t err1 = compute_color_distance_rgba(&weightedColors[best_sel], pC, BC7ENC_FALSE, pParams->m_weights); + uint64_t err0 = compute_color_distance_rgba(&weightedColors[best_sel - 1], pC, false, pParams->m_weights); + uint64_t err1 = compute_color_distance_rgba(&weightedColors[best_sel], pC, false, pParams->m_weights); if (err1 > err0) { @@ -667,16 +750,16 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 for (uint32_t i = 0; i < pParams->m_num_pixels; i++) { - const color_quad_u8 *pC = &pParams->m_pPixels[i]; - int r = pC->r; - int g = pC->g; - int b = pC->b; + const color_rgba *pC = &pParams->m_pPixels[i]; + int r = pC->m_c[0]; + int g = pC->m_c[1]; + int b = pC->m_c[2]; int sel = (int)((float)((r - lr) * dr + (g - lg) * dg + (b - lb) * db) * f + .5f); sel = clampi(sel, 1, N - 1); - uint64_t err0 = compute_color_distance_rgb(&weightedColors[sel - 1], pC, BC7ENC_FALSE, pParams->m_weights); - uint64_t err1 = compute_color_distance_rgb(&weightedColors[sel], pC, BC7ENC_FALSE, pParams->m_weights); + uint64_t err0 = compute_color_distance_rgb(&weightedColors[sel - 1], pC, false, pParams->m_weights); + uint64_t err1 = compute_color_distance_rgb(&weightedColors[sel], pC, false, pParams->m_weights); int best_sel = sel; uint64_t best_err = err1; @@ -704,7 +787,7 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 { for (uint32_t j = 0; j < N; j++) { - uint64_t err = compute_color_distance_rgba(&weightedColors[j], &pParams->m_pPixels[i], BC7ENC_TRUE, pParams->m_weights); + uint64_t err = compute_color_distance_rgba(&weightedColors[j], &pParams->m_pPixels[i], true, pParams->m_weights); if (err < best_err) { best_err = err; @@ -716,7 +799,7 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 { for (uint32_t j = 0; j < N; j++) { - uint64_t err = compute_color_distance_rgb(&weightedColors[j], &pParams->m_pPixels[i], BC7ENC_TRUE, pParams->m_weights); + uint64_t err = compute_color_distance_rgb(&weightedColors[j], &pParams->m_pPixels[i], true, pParams->m_weights); if (err < best_err) { best_err = err; @@ -747,32 +830,34 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 return total_err; } -static void fixDegenerateEndpoints(uint32_t mode, color_quad_u8 *pTrialMinColor, color_quad_u8 *pTrialMaxColor, const vec4F *pXl, const vec4F *pXh, uint32_t iscale) +static void fixDegenerateEndpoints(uint32_t mode, color_rgba *pTrialMinColor, color_rgba *pTrialMaxColor, const vec4F *pXl, const vec4F *pXh, uint32_t iscale, + const bc7enc_compress_block_params* pComp_params) { //if ((mode == 1) || (mode == 7)) - if (mode == 1) + //if (mode == 1) + if ( (mode == 1) || ((mode == 6) && (pComp_params->m_quant_mode6_endpoints)) ) { // fix degenerate case where the input collapses to a single colorspace voxel, and we loose all freedom (test with grayscale ramps) for (uint32_t i = 0; i < 3; i++) { - if ((*pTrialMinColor)[i] == (*pTrialMaxColor)[i]) + if (pTrialMinColor->m_c[i] == pTrialMaxColor->m_c[i]) { - if (fabs((*pXl)[i] - (*pXh)[i]) > 0.0f) + if (fabs(pXl->m_c[i] - pXh->m_c[i]) > 0.0f) { - if ((*pTrialMinColor)[i] > (iscale >> 1)) + if (pTrialMinColor->m_c[i] > (iscale >> 1)) { - if ((*pTrialMinColor)[i] > 0) - (*pTrialMinColor)[i]--; + if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; else - if ((*pTrialMaxColor)[i] < iscale) - (*pTrialMaxColor)[i]++; + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; } else { - if ((*pTrialMaxColor)[i] < iscale) - (*pTrialMaxColor)[i]++; - else if ((*pTrialMinColor)[i] > 0) - (*pTrialMinColor)[i]--; + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; + else if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; } } } @@ -780,7 +865,8 @@ static void fixDegenerateEndpoints(uint32_t mode, color_quad_u8 *pTrialMinColor, } } -static uint64_t find_optimal_solution(uint32_t mode, vec4F xl, vec4F xh, const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +static uint64_t find_optimal_solution(uint32_t mode, vec4F xl, vec4F xh, const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, + const bc7enc_compress_block_params* pComp_params) { vec4F_saturate_in_place(&xl); vec4F_saturate_in_place(&xh); @@ -792,114 +878,221 @@ static uint64_t find_optimal_solution(uint32_t mode, vec4F xl, vec4F xh, const c const int32_t totalComps = pParams->m_has_alpha ? 4 : 3; uint32_t best_pbits[2]; - color_quad_u8 bestMinColor, bestMaxColor; + color_rgba bestMinColor, bestMaxColor; if (!pParams->m_endpoints_share_pbit) { - float best_err0 = 1e+9; - float best_err1 = 1e+9; - - for (int p = 0; p < 2; p++) + if ((pParams->m_comp_bits == 7) && (pComp_params->m_quant_mode6_endpoints)) { - color_quad_u8 xMinColor, xMaxColor; + best_pbits[0] = 0; + bestMinColor.m_c[0] = g_mode6_reduced_quant[(int)((xl.m_c[0] * 2047.0f) + .5f)][0]; + bestMinColor.m_c[1] = g_mode6_reduced_quant[(int)((xl.m_c[1] * 2047.0f) + .5f)][0]; + bestMinColor.m_c[2] = g_mode6_reduced_quant[(int)((xl.m_c[2] * 2047.0f) + .5f)][0]; + bestMinColor.m_c[3] = g_mode6_reduced_quant[(int)((xl.m_c[3] * 2047.0f) + .5f)][0]; + + best_pbits[1] = 1; + bestMaxColor.m_c[0] = g_mode6_reduced_quant[(int)((xh.m_c[0] * 2047.0f) + .5f)][1]; + bestMaxColor.m_c[1] = g_mode6_reduced_quant[(int)((xh.m_c[1] * 2047.0f) + .5f)][1]; + bestMaxColor.m_c[2] = g_mode6_reduced_quant[(int)((xh.m_c[2] * 2047.0f) + .5f)][1]; + bestMaxColor.m_c[3] = g_mode6_reduced_quant[(int)((xh.m_c[3] * 2047.0f) + .5f)][1]; + } + else + { + float best_err0 = 1e+9; + float best_err1 = 1e+9; - // Notes: The pbit controls which quantization intervals are selected. - // total_levels=2^(comp_bits+1), where comp_bits=4 for mode 0, etc. - // pbit 0: v=(b*2)/(total_levels-1), pbit 1: v=(b*2+1)/(total_levels-1) where b is the component bin from [0,total_levels/2-1] and v is the [0,1] component value - // rearranging you get for pbit 0: b=floor(v*(total_levels-1)/2+.5) - // rearranging you get for pbit 1: b=floor((v*(total_levels-1)-1)/2+.5) - for (uint32_t c = 0; c < 4; c++) + for (int p = 0; p < 2; p++) { - xMinColor[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); - xMaxColor[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); - } + color_rgba xMinColor, xMaxColor; + + // Notes: The pbit controls which quantization intervals are selected. + // total_levels=2^(comp_bits+1), where comp_bits=4 for mode 0, etc. + // pbit 0: v=(b*2)/(total_levels-1), pbit 1: v=(b*2+1)/(total_levels-1) where b is the component bin from [0,total_levels/2-1] and v is the [0,1] component value + // rearranging you get for pbit 0: b=floor(v*(total_levels-1)/2+.5) + // rearranging you get for pbit 1: b=floor((v*(total_levels-1)-1)/2+.5) + if (pParams->m_comp_bits == 5) + { + for (uint32_t c = 0; c < 4; c++) + { + int vl = (int)(xl.m_c[c] * 31.0f); + vl += (xl.m_c[c] > g_mode7_rgba_midpoints[vl][p]); + xMinColor.m_c[c] = (uint8_t)clampi(vl * 2 + p, p, 63 - 1 + p); - color_quad_u8 scaledLow = scale_color(&xMinColor, pParams); - color_quad_u8 scaledHigh = scale_color(&xMaxColor, pParams); + int vh = (int)(xh.m_c[c] * 31.0f); + vh += (xh.m_c[c] > g_mode7_rgba_midpoints[vh][p]); + xMaxColor.m_c[c] = (uint8_t)clampi(vh * 2 + p, p, 63 - 1 + p); + } + } + else + { + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + } - float err0 = 0, err1 = 0; - for (int i = 0; i < totalComps; i++) - { - err0 += squaref(scaledLow[i] - xl[i] * 255.0f); - err1 += squaref(scaledHigh[i] - xh[i] * 255.0f); - } + color_rgba scaledLow = scale_color(&xMinColor, pParams); + color_rgba scaledHigh = scale_color(&xMaxColor, pParams); - if (err0 < best_err0) - { - best_err0 = err0; - best_pbits[0] = p; + float err0 = 0, err1 = 0; + for (int i = 0; i < totalComps; i++) + { + err0 += squaref(scaledLow.m_c[i] - xl.m_c[i] * 255.0f); + err1 += squaref(scaledHigh.m_c[i] - xh.m_c[i] * 255.0f); + } - bestMinColor.r = xMinColor.r >> 1; - bestMinColor.g = xMinColor.g >> 1; - bestMinColor.b = xMinColor.b >> 1; - bestMinColor.a = xMinColor.a >> 1; - } + if (p == 1) + { + err0 *= pComp_params->m_pbit1_weight; + err1 *= pComp_params->m_pbit1_weight; + } + + if (err0 < best_err0) + { + best_err0 = err0; + best_pbits[0] = p; - if (err1 < best_err1) - { - best_err1 = err1; - best_pbits[1] = p; + bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1; + bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1; + bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1; + bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1; + } + + if (err1 < best_err1) + { + best_err1 = err1; + best_pbits[1] = p; - bestMaxColor.r = xMaxColor.r >> 1; - bestMaxColor.g = xMaxColor.g >> 1; - bestMaxColor.b = xMaxColor.b >> 1; - bestMaxColor.a = xMaxColor.a >> 1; + bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1; + bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1; + bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1; + bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1; + } } } } else { - // Endpoints share pbits - float best_err = 1e+9; - - for (int p = 0; p < 2; p++) + if ((mode == 1) && (pComp_params->m_bias_mode1_pbits)) { - color_quad_u8 xMinColor, xMaxColor; + float x = 0.0f; + for (uint32_t c = 0; c < 3; c++) + x = std::max(std::max(x, xl.m_c[c]), xh.m_c[c]); + + int p = 0; + if (x > (253.0f / 255.0f)) + p = 1; + + color_rgba xMinColor, xMaxColor; for (uint32_t c = 0; c < 4; c++) { - xMinColor[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); - xMaxColor[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); - } + int vl = (int)(xl.m_c[c] * 63.0f); + vl += (xl.m_c[c] > g_mode1_rgba_midpoints[vl][p]); + xMinColor.m_c[c] = (uint8_t)clampi(vl * 2 + p, p, 127 - 1 + p); - color_quad_u8 scaledLow = scale_color(&xMinColor, pParams); - color_quad_u8 scaledHigh = scale_color(&xMaxColor, pParams); + int vh = (int)(xh.m_c[c] * 63.0f); + vh += (xh.m_c[c] > g_mode1_rgba_midpoints[vh][p]); + xMaxColor.m_c[c] = (uint8_t)clampi(vh * 2 + p, p, 127 - 1 + p); + } - float err = 0; - for (int i = 0; i < totalComps; i++) - err += squaref((scaledLow[i] / 255.0f) - xl[i]) + squaref((scaledHigh[i] / 255.0f) - xh[i]); + best_pbits[0] = p; + best_pbits[1] = p; + for (uint32_t j = 0; j < 4; j++) + { + bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1; + bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1; + } + } + else + { + // Endpoints share pbits + float best_err = 1e+9; - if (err < best_err) + for (int p = 0; p < 2; p++) { - best_err = err; - best_pbits[0] = p; - best_pbits[1] = p; - for (uint32_t j = 0; j < 4; j++) + color_rgba xMinColor, xMaxColor; + if (pParams->m_comp_bits == 6) + { + for (uint32_t c = 0; c < 4; c++) + { + int vl = (int)(xl.m_c[c] * 63.0f); + vl += (xl.m_c[c] > g_mode1_rgba_midpoints[vl][p]); + xMinColor.m_c[c] = (uint8_t)clampi(vl * 2 + p, p, 127 - 1 + p); + + int vh = (int)(xh.m_c[c] * 63.0f); + vh += (xh.m_c[c] > g_mode1_rgba_midpoints[vh][p]); + xMaxColor.m_c[c] = (uint8_t)clampi(vh * 2 + p, p, 127 - 1 + p); + } + } + else { - bestMinColor[j] = xMinColor[j] >> 1; - bestMaxColor[j] = xMaxColor[j] >> 1; + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + } + + color_rgba scaledLow = scale_color(&xMinColor, pParams); + color_rgba scaledHigh = scale_color(&xMaxColor, pParams); + + float err = 0; + for (int i = 0; i < totalComps; i++) + err += squaref((scaledLow.m_c[i] / 255.0f) - xl.m_c[i]) + squaref((scaledHigh.m_c[i] / 255.0f) - xh.m_c[i]); + + if (p == 1) + err *= pComp_params->m_pbit1_weight; + + if (err < best_err) + { + best_err = err; + best_pbits[0] = p; + best_pbits[1] = p; + for (uint32_t j = 0; j < 4; j++) + { + bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1; + bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1; + } } } } } - fixDegenerateEndpoints(mode, &bestMinColor, &bestMaxColor, &xl, &xh, iscalep >> 1); + fixDegenerateEndpoints(mode, &bestMinColor, &bestMaxColor, &xl, &xh, iscalep >> 1, pComp_params); if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&bestMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&bestMaxColor, &pResults->m_high_endpoint) || (best_pbits[0] != pResults->m_pbits[0]) || (best_pbits[1] != pResults->m_pbits[1])) - evaluate_solution(&bestMinColor, &bestMaxColor, best_pbits, pParams, pResults); + evaluate_solution(&bestMinColor, &bestMaxColor, best_pbits, pParams, pResults, pComp_params); } else { const int iscale = (1 << pParams->m_comp_bits) - 1; const float scale = (float)iscale; - color_quad_u8 trialMinColor, trialMaxColor; - color_quad_u8_set_clamped(&trialMinColor, (int)(xl.r * scale + .5f), (int)(xl.g * scale + .5f), (int)(xl.b * scale + .5f), (int)(xl.a * scale + .5f)); - color_quad_u8_set_clamped(&trialMaxColor, (int)(xh.r * scale + .5f), (int)(xh.g * scale + .5f), (int)(xh.b * scale + .5f), (int)(xh.a * scale + .5f)); + color_rgba trialMinColor, trialMaxColor; + if (pParams->m_comp_bits == 7) + { + for (uint32_t c = 0; c < 4; c++) + { + int vl = (int)(xl.m_c[c] * 127.0f); + vl += (xl.m_c[c] > g_mode5_rgba_midpoints[vl]); + trialMinColor.m_c[c] = (uint8_t)clampi(vl, 0, 127); - fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, iscale); + int vh = (int)(xh.m_c[c] * 127.0f); + vh += (xh.m_c[c] > g_mode5_rgba_midpoints[vh]); + trialMaxColor.m_c[c] = (uint8_t)clampi(vh, 0, 127); + } + } + else + { + color_quad_u8_set_clamped(&trialMinColor, (int)(xl.m_c[0] * scale + .5f), (int)(xl.m_c[1] * scale + .5f), (int)(xl.m_c[2] * scale + .5f), (int)(xl.m_c[3] * scale + .5f)); + color_quad_u8_set_clamped(&trialMaxColor, (int)(xh.m_c[0] * scale + .5f), (int)(xh.m_c[1] * scale + .5f), (int)(xh.m_c[2] * scale + .5f), (int)(xh.m_c[3] * scale + .5f)); + } + + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, iscale, pComp_params); if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) - evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults, pComp_params); } return pResults->m_best_overall_err; @@ -914,14 +1107,14 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso // If the partition's colors are all the same in mode 1, then just pack them as a single color. if (mode == 1) { - const uint32_t cr = pParams->m_pPixels[0].r, cg = pParams->m_pPixels[0].g, cb = pParams->m_pPixels[0].b; + const uint32_t cr = pParams->m_pPixels[0].m_c[0], cg = pParams->m_pPixels[0].m_c[1], cb = pParams->m_pPixels[0].m_c[2]; - bc7enc_bool allSame = BC7ENC_TRUE; + bool allSame = true; for (uint32_t i = 1; i < pParams->m_num_pixels; i++) { - if ((cr != pParams->m_pPixels[i].r) || (cg != pParams->m_pPixels[i].g) || (cb != pParams->m_pPixels[i].b)) + if ((cr != pParams->m_pPixels[i].m_c[0]) || (cg != pParams->m_pPixels[i].m_c[1]) || (cb != pParams->m_pPixels[i].m_c[2])) { - allSame = BC7ENC_FALSE; + allSame = false; break; } } @@ -931,14 +1124,14 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso } else if (mode == 7) { - const uint32_t cr = pParams->m_pPixels[0].r, cg = pParams->m_pPixels[0].g, cb = pParams->m_pPixels[0].b, ca = pParams->m_pPixels[0].a; + const uint32_t cr = pParams->m_pPixels[0].m_c[0], cg = pParams->m_pPixels[0].m_c[1], cb = pParams->m_pPixels[0].m_c[2], ca = pParams->m_pPixels[0].m_c[3]; - bc7enc_bool allSame = BC7ENC_TRUE; + bool allSame = true; for (uint32_t i = 1; i < pParams->m_num_pixels; i++) { - if ((cr != pParams->m_pPixels[i].r) || (cg != pParams->m_pPixels[i].g) || (cb != pParams->m_pPixels[i].b) || (ca != pParams->m_pPixels[i].a)) + if ((cr != pParams->m_pPixels[i].m_c[0]) || (cg != pParams->m_pPixels[i].m_c[1]) || (cb != pParams->m_pPixels[i].m_c[2]) || (ca != pParams->m_pPixels[i].m_c[3])) { - allSame = BC7ENC_FALSE; + allSame = false; break; } } @@ -970,16 +1163,16 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso { vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); color = vec4F_sub(&color, &meanColorScaled); - vec4F a = vec4F_mul(&color, color.r); - vec4F b = vec4F_mul(&color, color.g); - vec4F c = vec4F_mul(&color, color.b); - vec4F d = vec4F_mul(&color, color.a); + vec4F a = vec4F_mul(&color, color.m_c[0]); + vec4F b = vec4F_mul(&color, color.m_c[1]); + vec4F c = vec4F_mul(&color, color.m_c[2]); + vec4F d = vec4F_mul(&color, color.m_c[3]); vec4F n = i ? axis : color; vec4F_normalize_in_place(&n); - axis.r += vec4F_dot(&a, &n); - axis.g += vec4F_dot(&b, &n); - axis.b += vec4F_dot(&c, &n); - axis.a += vec4F_dot(&d, &n); + axis.m_c[0] += vec4F_dot(&a, &n); + axis.m_c[1] += vec4F_dot(&b, &n); + axis.m_c[2] += vec4F_dot(&c, &n); + axis.m_c[3] += vec4F_dot(&d, &n); } vec4F_normalize_in_place(&axis); } @@ -990,10 +1183,10 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso for (uint32_t i = 0; i < pParams->m_num_pixels; i++) { - const color_quad_u8 *pV = &pParams->m_pPixels[i]; - float r = pV->r - meanColorScaled.r; - float g = pV->g - meanColorScaled.g; - float b = pV->b - meanColorScaled.b; + const color_rgba *pV = &pParams->m_pPixels[i]; + float r = pV->m_c[0] - meanColorScaled.m_c[0]; + float g = pV->m_c[1] - meanColorScaled.m_c[1]; + float b = pV->m_c[2] - meanColorScaled.m_c[2]; cov[0] += r*r; cov[1] += r*g; cov[2] += r*b; cov[3] += g*g; cov[4] += g*b; cov[5] += b*b; } @@ -1070,20 +1263,20 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso minColor = maxColor; maxColor = temp; #else - float a = minColor.r, b = minColor.g, c = minColor.b, d = minColor.a; - minColor.r = maxColor.r; - minColor.g = maxColor.g; - minColor.b = maxColor.b; - minColor.a = maxColor.a; - maxColor.r = a; - maxColor.g = b; - maxColor.b = c; - maxColor.a = d; + float a = minColor.m_c[0], b = minColor.m_c[1], c = minColor.m_c[2], d = minColor.m_c[3]; + minColor.m_c[0] = maxColor.m_c[0]; + minColor.m_c[1] = maxColor.m_c[1]; + minColor.m_c[2] = maxColor.m_c[2]; + minColor.m_c[3] = maxColor.m_c[3]; + maxColor.m_c[0] = a; + maxColor.m_c[1] = b; + maxColor.m_c[2] = c; + maxColor.m_c[3] = d; #endif } // First find a solution using the block's PCA. - if (!find_optimal_solution(mode, minColor, maxColor, pParams, pResults)) + if (!find_optimal_solution(mode, minColor, maxColor, pParams, pResults, pComp_params)) return 0; if (pComp_params->m_try_least_squares) @@ -1100,7 +1293,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params)) return 0; } @@ -1141,7 +1334,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params)) return 0; for (uint32_t i = 0; i < pParams->m_num_pixels; i++) @@ -1160,7 +1353,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params)) return 0; for (uint32_t i = 0; i < pParams->m_num_pixels; i++) @@ -1181,7 +1374,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params)) return 0; // In uber levels 2+, try taking more advantage of endpoint extrapolation by scaling the selectors in one direction or another. @@ -1210,7 +1403,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params)) return 0; } } @@ -1221,7 +1414,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso { // Try encoding the partition as a single color by using the optimal singe colors tables to encode the block to its mean. color_cell_compressor_results avg_results = *pResults; - const uint32_t r = (int)(.5f + meanColor.r * 255.0f), g = (int)(.5f + meanColor.g * 255.0f), b = (int)(.5f + meanColor.b * 255.0f); + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); uint64_t avg_err = pack_mode1_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); if (avg_err < pResults->m_best_overall_err) { @@ -1234,7 +1427,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso { // Try encoding the partition as a single color by using the optimal singe colors tables to encode the block to its mean. color_cell_compressor_results avg_results = *pResults; - const uint32_t r = (int)(.5f + meanColor.r * 255.0f), g = (int)(.5f + meanColor.g * 255.0f), b = (int)(.5f + meanColor.b * 255.0f), a = (int)(.5f + meanColor.a * 255.0f); + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f), a = (int)(.5f + meanColor.m_c[3] * 255.0f); uint64_t avg_err = pack_mode7_to_one_color(pParams, &avg_results, r, g, b, a, pResults->m_pSelectors_temp, pParams->m_num_pixels, pParams->m_pPixels); if (avg_err < pResults->m_best_overall_err) { @@ -1247,46 +1440,46 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso return pResults->m_best_overall_err; } -static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const color_quad_u8 *pPixels, bc7enc_bool perceptual, uint32_t pweights[4], uint64_t best_err_so_far) +static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const color_rgba *pPixels, bool perceptual, uint32_t pweights[4], uint64_t best_err_so_far) { // Find RGB bounds as an approximation of the block's principle axis uint32_t lr = 255, lg = 255, lb = 255; uint32_t hr = 0, hg = 0, hb = 0; for (uint32_t i = 0; i < num_pixels; i++) { - const color_quad_u8 *pC = &pPixels[i]; - if (pC->r < lr) lr = pC->r; - if (pC->g < lg) lg = pC->g; - if (pC->b < lb) lb = pC->b; - if (pC->r > hr) hr = pC->r; - if (pC->g > hg) hg = pC->g; - if (pC->b > hb) hb = pC->b; + const color_rgba *pC = &pPixels[i]; + if (pC->m_c[0] < lr) lr = pC->m_c[0]; + if (pC->m_c[1] < lg) lg = pC->m_c[1]; + if (pC->m_c[2] < lb) lb = pC->m_c[2]; + if (pC->m_c[0] > hr) hr = pC->m_c[0]; + if (pC->m_c[1] > hg) hg = pC->m_c[1]; + if (pC->m_c[2] > hb) hb = pC->m_c[2]; } - color_quad_u8 lowColor; color_quad_u8_set(&lowColor, lr, lg, lb, 0); - color_quad_u8 highColor; color_quad_u8_set(&highColor, hr, hg, hb, 0); + color_rgba lowColor; color_quad_u8_set(&lowColor, lr, lg, lb, 0); + color_rgba highColor; color_quad_u8_set(&highColor, hr, hg, hb, 0); // Place endpoints at bbox diagonals and compute interpolated colors const uint32_t N = 8; - color_quad_u8 weightedColors[8]; + color_rgba weightedColors[8]; weightedColors[0] = lowColor; weightedColors[N - 1] = highColor; for (uint32_t i = 1; i < (N - 1); i++) { - weightedColors[i].r = (uint8_t)((lowColor.r * (64 - g_bc7_weights3[i]) + highColor.r * g_bc7_weights3[i] + 32) >> 6); - weightedColors[i].g = (uint8_t)((lowColor.g * (64 - g_bc7_weights3[i]) + highColor.g * g_bc7_weights3[i] + 32) >> 6); - weightedColors[i].b = (uint8_t)((lowColor.b * (64 - g_bc7_weights3[i]) + highColor.b * g_bc7_weights3[i] + 32) >> 6); + weightedColors[i].m_c[0] = (uint8_t)((lowColor.m_c[0] * (64 - g_bc7_weights3[i]) + highColor.m_c[0] * g_bc7_weights3[i] + 32) >> 6); + weightedColors[i].m_c[1] = (uint8_t)((lowColor.m_c[1] * (64 - g_bc7_weights3[i]) + highColor.m_c[1] * g_bc7_weights3[i] + 32) >> 6); + weightedColors[i].m_c[2] = (uint8_t)((lowColor.m_c[2] * (64 - g_bc7_weights3[i]) + highColor.m_c[2] * g_bc7_weights3[i] + 32) >> 6); } // Compute dots and thresholds - const int ar = highColor.r - lowColor.r; - const int ag = highColor.g - lowColor.g; - const int ab = highColor.b - lowColor.b; + const int ar = highColor.m_c[0] - lowColor.m_c[0]; + const int ag = highColor.m_c[1] - lowColor.m_c[1]; + const int ab = highColor.m_c[2] - lowColor.m_c[2]; int dots[8]; for (uint32_t i = 0; i < N; i++) - dots[i] = weightedColors[i].r * ar + weightedColors[i].g * ag + weightedColors[i].b * ab; + dots[i] = weightedColors[i].m_c[0] * ar + weightedColors[i].m_c[1] * ag + weightedColors[i].m_c[2] * ab; int thresh[8 - 1]; for (uint32_t i = 0; i < (N - 1); i++) @@ -1299,17 +1492,17 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo int l1[8], cr1[8], cb1[8]; for (int j = 0; j < 8; j++) { - const color_quad_u8 *pE1 = &weightedColors[j]; - l1[j] = pE1->r * 109 + pE1->g * 366 + pE1->b * 37; - cr1[j] = ((int)pE1->r << 9) - l1[j]; - cb1[j] = ((int)pE1->b << 9) - l1[j]; + const color_rgba *pE1 = &weightedColors[j]; + l1[j] = pE1->m_c[0] * 109 + pE1->m_c[1] * 366 + pE1->m_c[2] * 37; + cr1[j] = ((int)pE1->m_c[0] << 9) - l1[j]; + cb1[j] = ((int)pE1->m_c[2] << 9) - l1[j]; } for (uint32_t i = 0; i < num_pixels; i++) { - const color_quad_u8 *pC = &pPixels[i]; + const color_rgba *pC = &pPixels[i]; - int d = ar * pC->r + ag * pC->g + ab * pC->b; + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2]; // Find approximate selector uint32_t s = 0; @@ -1329,9 +1522,9 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo s = 1; // Compute error - const int l2 = pC->r * 109 + pC->g * 366 + pC->b * 37; - const int cr2 = ((int)pC->r << 9) - l2; - const int cb2 = ((int)pC->b << 9) - l2; + const int l2 = pC->m_c[0] * 109 + pC->m_c[1] * 366 + pC->m_c[2] * 37; + const int cr2 = ((int)pC->m_c[0] << 9) - l2; + const int cb2 = ((int)pC->m_c[2] << 9) - l2; const int dl = (l1[s] - l2) >> 8; const int dcr = (cr1[s] - cr2) >> 8; @@ -1348,9 +1541,9 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo { for (uint32_t i = 0; i < num_pixels; i++) { - const color_quad_u8 *pC = &pPixels[i]; + const color_rgba *pC = &pPixels[i]; - int d = ar * pC->r + ag * pC->g + ab * pC->b; + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2]; // Find approximate selector uint32_t s = 0; @@ -1370,11 +1563,11 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo s = 1; // Compute error - const color_quad_u8 *pE1 = &weightedColors[s]; + const color_rgba *pE1 = &weightedColors[s]; - int dr = (int)pE1->r - (int)pC->r; - int dg = (int)pE1->g - (int)pC->g; - int db = (int)pE1->b - (int)pC->b; + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; total_err += pweights[0] * (dr * dr) + pweights[1] * (dg * dg) + pweights[2] * (db * db); if (total_err > best_err_so_far) @@ -1385,51 +1578,51 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo return total_err; } -static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const color_quad_u8* pPixels, bc7enc_bool perceptual, uint32_t pweights[4], uint64_t best_err_so_far) +static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const color_rgba * pPixels, bool perceptual, uint32_t pweights[4], uint64_t best_err_so_far) { // Find RGB bounds as an approximation of the block's principle axis uint32_t lr = 255, lg = 255, lb = 255, la = 255; uint32_t hr = 0, hg = 0, hb = 0, ha = 0; for (uint32_t i = 0; i < num_pixels; i++) { - const color_quad_u8* pC = &pPixels[i]; - if (pC->r < lr) lr = pC->r; - if (pC->g < lg) lg = pC->g; - if (pC->b < lb) lb = pC->b; - if (pC->a < la) la = pC->a; - - if (pC->r > hr) hr = pC->r; - if (pC->g > hg) hg = pC->g; - if (pC->b > hb) hb = pC->b; - if (pC->a > ha) ha = pC->a; + const color_rgba* pC = &pPixels[i]; + if (pC->m_c[0] < lr) lr = pC->m_c[0]; + if (pC->m_c[1] < lg) lg = pC->m_c[1]; + if (pC->m_c[2] < lb) lb = pC->m_c[2]; + if (pC->m_c[3] < la) la = pC->m_c[3]; + + if (pC->m_c[0] > hr) hr = pC->m_c[0]; + if (pC->m_c[1] > hg) hg = pC->m_c[1]; + if (pC->m_c[2] > hb) hb = pC->m_c[2]; + if (pC->m_c[3] > ha) ha = pC->m_c[3]; } - color_quad_u8 lowColor; color_quad_u8_set(&lowColor, lr, lg, lb, la); - color_quad_u8 highColor; color_quad_u8_set(&highColor, hr, hg, hb, ha); + color_rgba lowColor; color_quad_u8_set(&lowColor, lr, lg, lb, la); + color_rgba highColor; color_quad_u8_set(&highColor, hr, hg, hb, ha); // Place endpoints at bbox diagonals and compute interpolated colors const uint32_t N = 4; - color_quad_u8 weightedColors[4]; + color_rgba weightedColors[4]; weightedColors[0] = lowColor; weightedColors[N - 1] = highColor; for (uint32_t i = 1; i < (N - 1); i++) { - weightedColors[i].r = (uint8_t)((lowColor.r * (64 - g_bc7_weights2[i]) + highColor.r * g_bc7_weights2[i] + 32) >> 6); - weightedColors[i].g = (uint8_t)((lowColor.g * (64 - g_bc7_weights2[i]) + highColor.g * g_bc7_weights2[i] + 32) >> 6); - weightedColors[i].b = (uint8_t)((lowColor.b * (64 - g_bc7_weights2[i]) + highColor.b * g_bc7_weights2[i] + 32) >> 6); - weightedColors[i].a = (uint8_t)((lowColor.a * (64 - g_bc7_weights2[i]) + highColor.a * g_bc7_weights2[i] + 32) >> 6); + weightedColors[i].m_c[0] = (uint8_t)((lowColor.m_c[0] * (64 - g_bc7_weights2[i]) + highColor.m_c[0] * g_bc7_weights2[i] + 32) >> 6); + weightedColors[i].m_c[1] = (uint8_t)((lowColor.m_c[1] * (64 - g_bc7_weights2[i]) + highColor.m_c[1] * g_bc7_weights2[i] + 32) >> 6); + weightedColors[i].m_c[2] = (uint8_t)((lowColor.m_c[2] * (64 - g_bc7_weights2[i]) + highColor.m_c[2] * g_bc7_weights2[i] + 32) >> 6); + weightedColors[i].m_c[3] = (uint8_t)((lowColor.m_c[3] * (64 - g_bc7_weights2[i]) + highColor.m_c[3] * g_bc7_weights2[i] + 32) >> 6); } // Compute dots and thresholds - const int ar = highColor.r - lowColor.r; - const int ag = highColor.g - lowColor.g; - const int ab = highColor.b - lowColor.b; - const int aa = highColor.a - lowColor.a; + const int ar = highColor.m_c[0] - lowColor.m_c[0]; + const int ag = highColor.m_c[1] - lowColor.m_c[1]; + const int ab = highColor.m_c[2] - lowColor.m_c[2]; + const int aa = highColor.m_c[3] - lowColor.m_c[3]; int dots[4]; for (uint32_t i = 0; i < N; i++) - dots[i] = weightedColors[i].r * ar + weightedColors[i].g * ag + weightedColors[i].b * ab + weightedColors[i].a * aa; + dots[i] = weightedColors[i].m_c[0] * ar + weightedColors[i].m_c[1] * ag + weightedColors[i].m_c[2] * ab + weightedColors[i].m_c[3] * aa; int thresh[4 - 1]; for (uint32_t i = 0; i < (N - 1); i++) @@ -1442,17 +1635,17 @@ static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const colo int l1[4], cr1[4], cb1[4]; for (int j = 0; j < 4; j++) { - const color_quad_u8* pE1 = &weightedColors[j]; - l1[j] = pE1->r * 109 + pE1->g * 366 + pE1->b * 37; - cr1[j] = ((int)pE1->r << 9) - l1[j]; - cb1[j] = ((int)pE1->b << 9) - l1[j]; + const color_rgba* pE1 = &weightedColors[j]; + l1[j] = pE1->m_c[0] * 109 + pE1->m_c[1] * 366 + pE1->m_c[2] * 37; + cr1[j] = ((int)pE1->m_c[0] << 9) - l1[j]; + cb1[j] = ((int)pE1->m_c[2] << 9) - l1[j]; } for (uint32_t i = 0; i < num_pixels; i++) { - const color_quad_u8* pC = &pPixels[i]; + const color_rgba* pC = &pPixels[i]; - int d = ar * pC->r + ag * pC->g + ab * pC->b + aa * pC->a; + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2] + aa * pC->m_c[3]; // Find approximate selector uint32_t s = 0; @@ -1464,15 +1657,15 @@ static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const colo s = 1; // Compute error - const int l2 = pC->r * 109 + pC->g * 366 + pC->b * 37; - const int cr2 = ((int)pC->r << 9) - l2; - const int cb2 = ((int)pC->b << 9) - l2; + const int l2 = pC->m_c[0] * 109 + pC->m_c[1] * 366 + pC->m_c[2] * 37; + const int cr2 = ((int)pC->m_c[0] << 9) - l2; + const int cb2 = ((int)pC->m_c[2] << 9) - l2; const int dl = (l1[s] - l2) >> 8; const int dcr = (cr1[s] - cr2) >> 8; const int dcb = (cb1[s] - cb2) >> 8; - const int dca = (int)pC->a - (int)weightedColors[s].a; + const int dca = (int)pC->m_c[3] - (int)weightedColors[s].m_c[3]; int ie = (pweights[0] * dl * dl) + (pweights[1] * dcr * dcr) + (pweights[2] * dcb * dcb) + (pweights[3] * dca * dca); @@ -1485,9 +1678,9 @@ static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const colo { for (uint32_t i = 0; i < num_pixels; i++) { - const color_quad_u8* pC = &pPixels[i]; + const color_rgba* pC = &pPixels[i]; - int d = ar * pC->r + ag * pC->g + ab * pC->b + aa * pC->a; + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2] + aa * pC->m_c[3]; // Find approximate selector uint32_t s = 0; @@ -1499,12 +1692,12 @@ static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const colo s = 1; // Compute error - const color_quad_u8* pE1 = &weightedColors[s]; + const color_rgba* pE1 = &weightedColors[s]; - int dr = (int)pE1->r - (int)pC->r; - int dg = (int)pE1->g - (int)pC->g; - int db = (int)pE1->b - (int)pC->b; - int da = (int)pE1->a - (int)pC->a; + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + int da = (int)pE1->m_c[3] - (int)pC->m_c[3]; total_err += pweights[0] * (dr * dr) + pweights[1] * (dg * dg) + pweights[2] * (db * db) + pweights[3] * (da * da); if (total_err > best_err_so_far) @@ -1558,9 +1751,9 @@ static const uint32_t g_partition_predictors[35] = }; // Estimate the partition used by modes 1/7. This scans through each partition and computes an approximate error for each. -static uint32_t estimate_partition(const color_quad_u8 *pPixels, const bc7enc_compress_block_params *pComp_params, uint32_t pweights[4], uint32_t mode) +static uint32_t estimate_partition(const color_rgba *pPixels, const bc7enc_compress_block_params *pComp_params, uint32_t pweights[4], uint32_t mode) { - const uint32_t total_partitions = minimumu(pComp_params->m_max_partitions_mode, BC7ENC_MAX_PARTITIONS1); + const uint32_t total_partitions = minimumu(pComp_params->m_max_partitions, BC7ENC_MAX_PARTITIONS); if (total_partitions <= 1) return 0; @@ -1590,7 +1783,7 @@ static uint32_t estimate_partition(const color_quad_u8 *pPixels, const bc7enc_co const uint32_t partition = s_sorted_partition_order[partition_iter]; // Check to see if we should bother evaluating this partition at all, depending on the best partition found from the first 14. - if (pComp_params->m_mode_partition_estimation_filterbank) + if (pComp_params->m_mode17_partition_estimation_filterbank) { if ((partition_iter >= 14) && (partition_iter <= 34)) { @@ -1607,7 +1800,7 @@ static uint32_t estimate_partition(const color_quad_u8 *pPixels, const bc7enc_co const uint8_t *pPartition = &g_bc7_partition2[partition * 16]; - color_quad_u8 subset_colors[2][16]; + color_rgba subset_colors[2][16]; uint32_t subset_total_colors[2] = { 0, 0 }; for (uint32_t index = 0; index < 16; index++) subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = pPixels[index]; @@ -1621,6 +1814,11 @@ static uint32_t estimate_partition(const color_quad_u8 *pPixels, const bc7enc_co total_subset_err += color_cell_compression_est_mode1(subset_total_colors[subset], &subset_colors[subset][0], pComp_params->m_perceptual, pweights, best_err); } + if (partition < 16) + { + total_subset_err = (uint64_t)((double)total_subset_err * pComp_params->m_low_frequency_partition_weight + .5f); + } + if (total_subset_err < best_err) { best_err = total_subset_err; @@ -1653,20 +1851,20 @@ static void set_block_bits(uint8_t *pBytes, uint32_t val, uint32_t num_bits, uin assert(*pCur_ofs <= 128); } -typedef struct +struct bc7_optimization_results { uint32_t m_mode; uint32_t m_partition; uint8_t m_selectors[16]; uint8_t m_alpha_selectors[16]; - color_quad_u8 m_low[3]; - color_quad_u8 m_high[3]; + color_rgba m_low[3]; + color_rgba m_high[3]; uint32_t m_pbits[3][2]; uint32_t m_rotation; uint32_t m_index_selector; -} bc7_optimization_results; +}; -static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults) +void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults) { assert(pResults->m_index_selector <= 1); assert(pResults->m_rotation <= 3); @@ -1692,7 +1890,7 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu uint8_t alpha_selectors[16]; memcpy(alpha_selectors, pResults->m_alpha_selectors, 16); - color_quad_u8 low[3], high[3]; + color_rgba low[3], high[3]; memcpy(low, pResults->m_low, sizeof(low)); memcpy(high, pResults->m_high, sizeof(high)); @@ -1729,14 +1927,14 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu { for (uint32_t q = 0; q < 3; q++) { - uint8_t t = low[k][q]; - low[k][q] = high[k][q]; - high[k][q] = t; + uint8_t t = low[k].m_c[q]; + low[k].m_c[q] = high[k].m_c[q]; + high[k].m_c[q] = t; } } else { - color_quad_u8 tmp = low[k]; + color_rgba tmp = low[k]; low[k] = high[k]; high[k] = tmp; } @@ -1760,9 +1958,9 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu if (pPartition[i] == k) alpha_selectors[i] = (uint8_t)((num_alpha_indices - 1) - alpha_selectors[i]); - uint8_t t = low[k].a; - low[k].a = high[k].a; - high[k].a = t; + uint8_t t = low[k].m_c[3]; + low[k].m_c[3] = high[k].m_c[3]; + high[k].m_c[3] = t; } } } @@ -1787,8 +1985,8 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu { for (uint32_t subset = 0; subset < total_subsets; subset++) { - set_block_bits(pBlock_bytes, low[subset][comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); - set_block_bits(pBlock_bytes, high[subset][comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); + set_block_bits(pBlock_bytes, low[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); + set_block_bits(pBlock_bytes, high[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); } } @@ -1838,16 +2036,16 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu assert(cur_bit_ofs == 128); } -static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_compress_block_params* pComp_params, color_cell_compressor_params* pParams, uint32_t lo_a, uint32_t hi_a, bc7_optimization_results* pOpt_results5, uint64_t* pMode5_err, uint64_t* pMode5_alpha_err) +static void handle_alpha_block_mode5(const color_rgba* pPixels, const bc7enc_compress_block_params* pComp_params, color_cell_compressor_params* pParams, uint32_t lo_a, uint32_t hi_a, bc7_optimization_results* pOpt_results5, uint64_t* pMode5_err, uint64_t* pMode5_alpha_err) { pParams->m_pSelector_weights = g_bc7_weights2; pParams->m_pSelector_weightsx = (const vec4F*)g_bc7_weights2x; pParams->m_num_selector_weights = 4; pParams->m_comp_bits = 7; - pParams->m_has_pbits = BC7ENC_FALSE; - pParams->m_endpoints_share_pbit = BC7ENC_FALSE; - pParams->m_has_alpha = BC7ENC_FALSE; + pParams->m_has_pbits = false; + pParams->m_endpoints_share_pbit = false; + pParams->m_has_alpha = false; pParams->m_perceptual = pComp_params->m_perceptual; @@ -1869,8 +2067,8 @@ static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_ if (lo_a == hi_a) { *pMode5_alpha_err = 0; - pOpt_results5->m_low[0].a = (uint8_t)lo_a; - pOpt_results5->m_high[0].a = (uint8_t)hi_a; + pOpt_results5->m_low[0].m_c[3] = (uint8_t)lo_a; + pOpt_results5->m_high[0].m_c[3] = (uint8_t)hi_a; memset(pOpt_results5->m_alpha_selectors, 0, sizeof(pOpt_results5->m_alpha_selectors)); } else @@ -1893,7 +2091,7 @@ static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_ uint64_t trial_alpha_err = 0; for (uint32_t i = 0; i < 16; i++) { - const int32_t a = pParams->m_pPixels[i].a; + const int32_t a = pParams->m_pPixels[i].m_c[3]; int s = 0; int32_t be = iabs32(a - vals[0]); @@ -1911,8 +2109,8 @@ static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_ if (trial_alpha_err < *pMode5_alpha_err) { *pMode5_alpha_err = trial_alpha_err; - pOpt_results5->m_low[0].a = (uint8_t)lo_a; - pOpt_results5->m_high[0].a = (uint8_t)hi_a; + pOpt_results5->m_low[0].m_c[3] = (uint8_t)lo_a; + pOpt_results5->m_high[0].m_c[3] = (uint8_t)hi_a; memcpy(pOpt_results5->m_alpha_selectors, trial_alpha_selectors, sizeof(pOpt_results5->m_alpha_selectors)); } @@ -1938,35 +2136,44 @@ static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_ } } -static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const bc7enc_compress_block_params *pComp_params, color_cell_compressor_params *pParams) +static void handle_alpha_block(void *pBlock, const color_rgba *pPixels, const bc7enc_compress_block_params *pComp_params, color_cell_compressor_params *pParams) { + assert((pComp_params->m_mode_mask & (1 << 6)) || (pComp_params->m_mode_mask & (1 << 5)) || (pComp_params->m_mode_mask & (1 << 7))); + pParams->m_pSelector_weights = g_bc7_weights4; pParams->m_pSelector_weightsx = (const vec4F *)g_bc7_weights4x; pParams->m_num_selector_weights = 16; pParams->m_comp_bits = 7; - pParams->m_has_pbits = BC7ENC_TRUE; - pParams->m_endpoints_share_pbit = BC7ENC_FALSE; - pParams->m_has_alpha = BC7ENC_TRUE; + pParams->m_has_pbits = true; + pParams->m_endpoints_share_pbit = false; + pParams->m_has_alpha = true; pParams->m_perceptual = pComp_params->m_perceptual; pParams->m_num_pixels = 16; pParams->m_pPixels = pPixels; bc7_optimization_results opt_results6, opt_results5, opt_results7; - color_cell_compressor_results results6; - results6.m_pSelectors = opt_results6.m_selectors; + memset(&results6, 0, sizeof(results6)); + + uint64_t best_err = UINT64_MAX; + uint32_t best_mode = 0; uint8_t selectors_temp[16]; - results6.m_pSelectors_temp = selectors_temp; - uint64_t best_err = color_cell_compression(6, pParams, &results6, pComp_params); - uint32_t best_mode = 6; + if (pComp_params->m_mode_mask & (1 << 6)) + { + results6.m_pSelectors = opt_results6.m_selectors; + results6.m_pSelectors_temp = selectors_temp; + + best_err = (uint64_t)(color_cell_compression(6, pParams, &results6, pComp_params) * pComp_params->m_mode6_error_weight + .5f); + best_mode = 6; + } - if ((best_err > 0) && (pComp_params->m_use_mode5_for_alpha)) + if ((best_err > 0) && (pComp_params->m_mode_mask & (1 << 5))) { uint32_t lo_a = 255, hi_a = 0; for (uint32_t i = 0; i < 16; i++) { - uint32_t a = pPixels[i].a; + uint32_t a = pPixels[i].m_c[3]; lo_a = minimumu(lo_a, a); hi_a = maximumu(hi_a, a); } @@ -1974,6 +2181,8 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const uint64_t mode5_err, mode5_alpha_err; handle_alpha_block_mode5(pPixels, pComp_params, pParams, lo_a, hi_a, &opt_results5, &mode5_err, &mode5_alpha_err); + mode5_err = (uint64_t)(mode5_err * pComp_params->m_mode5_error_weight + .5f); + if (mode5_err < best_err) { best_err = mode5_err; @@ -1981,7 +2190,7 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const } } - if ((best_err > 0) && (pComp_params->m_use_mode7_for_alpha)) + if ((best_err > 0) && (pComp_params->m_mode_mask & (1 << 7))) { const uint32_t trial_partition = estimate_partition(pPixels, pComp_params, pParams->m_weights, 7); @@ -1989,13 +2198,13 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const pParams->m_pSelector_weightsx = (const vec4F*)g_bc7_weights2x; pParams->m_num_selector_weights = 4; pParams->m_comp_bits = 5; - pParams->m_has_pbits = BC7ENC_TRUE; - pParams->m_endpoints_share_pbit = BC7ENC_FALSE; - pParams->m_has_alpha = BC7ENC_TRUE; + pParams->m_has_pbits = true; + pParams->m_endpoints_share_pbit = false; + pParams->m_has_alpha = true; const uint8_t* pPartition = &g_bc7_partition2[trial_partition * 16]; - color_quad_u8 subset_colors[2][16]; + color_rgba subset_colors[2][16]; uint32_t subset_total_colors7[2] = { 0, 0 }; @@ -2022,14 +2231,16 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const pResults->m_pSelectors_temp = selectors_temp; uint64_t err = color_cell_compression(7, pParams, pResults, pComp_params); trial_err += err; - if (trial_err > best_err) + if ((uint64_t)(trial_err * pComp_params->m_mode7_error_weight + .5f) > best_err) break; } // subset - if (trial_err < best_err) + const uint64_t mode7_trial_err = (uint64_t)(trial_err * pComp_params->m_mode7_error_weight + .5f); + + if (mode7_trial_err < best_err) { - best_err = trial_err; + best_err = mode7_trial_err; best_mode = 7; opt_results7.m_mode = 7; opt_results7.m_partition = trial_partition; @@ -2073,43 +2284,56 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const encode_bc7_block(pBlock, &opt_results6); } + else + { + assert(0); + } } -static void handle_opaque_block(void *pBlock, const color_quad_u8 *pPixels, const bc7enc_compress_block_params *pComp_params, color_cell_compressor_params *pParams) +static void handle_opaque_block(void *pBlock, const color_rgba *pPixels, const bc7enc_compress_block_params *pComp_params, color_cell_compressor_params *pParams) { + assert((pComp_params->m_mode_mask & (1 << 6)) || (pComp_params->m_mode_mask & (1 << 1))); + uint8_t selectors_temp[16]; - - // Mode 6 + bc7_optimization_results opt_results; - - pParams->m_pSelector_weights = g_bc7_weights4; - pParams->m_pSelector_weightsx = (const vec4F *)g_bc7_weights4x; - pParams->m_num_selector_weights = 16; - pParams->m_comp_bits = 7; - pParams->m_has_pbits = BC7ENC_TRUE; - pParams->m_endpoints_share_pbit = BC7ENC_FALSE; + + uint64_t best_err = UINT64_MAX; + pParams->m_perceptual = pComp_params->m_perceptual; pParams->m_num_pixels = 16; pParams->m_pPixels = pPixels; - pParams->m_has_alpha = BC7ENC_FALSE; + pParams->m_has_alpha = false; - color_cell_compressor_results results6; - results6.m_pSelectors = opt_results.m_selectors; - results6.m_pSelectors_temp = selectors_temp; - - uint64_t best_err = color_cell_compression(6, pParams, &results6, pComp_params); - - opt_results.m_mode = 6; opt_results.m_partition = 0; - opt_results.m_low[0] = results6.m_low_endpoint; - opt_results.m_high[0] = results6.m_high_endpoint; - opt_results.m_pbits[0][0] = results6.m_pbits[0]; - opt_results.m_pbits[0][1] = results6.m_pbits[1]; opt_results.m_index_selector = 0; opt_results.m_rotation = 0; + // Mode 6 + if (pComp_params->m_mode_mask & (1 << 6)) + { + pParams->m_pSelector_weights = g_bc7_weights4; + pParams->m_pSelector_weightsx = (const vec4F*)g_bc7_weights4x; + pParams->m_num_selector_weights = 16; + pParams->m_comp_bits = 7; + pParams->m_has_pbits = true; + pParams->m_endpoints_share_pbit = false; + + color_cell_compressor_results results6; + results6.m_pSelectors = opt_results.m_selectors; + results6.m_pSelectors_temp = selectors_temp; + + best_err = (uint64_t)(color_cell_compression(6, pParams, &results6, pComp_params) * pComp_params->m_mode6_error_weight + .5f); + + opt_results.m_mode = 6; + opt_results.m_low[0] = results6.m_low_endpoint; + opt_results.m_high[0] = results6.m_high_endpoint; + opt_results.m_pbits[0][0] = results6.m_pbits[0]; + opt_results.m_pbits[0][1] = results6.m_pbits[1]; + } + // Mode 1 - if ((best_err > 0) && (pComp_params->m_max_partitions_mode > 0)) + if ((best_err > 0) && (pComp_params->m_max_partitions > 0) && (pComp_params->m_mode_mask & (1 << 1))) { const uint32_t trial_partition = estimate_partition(pPixels, pComp_params, pParams->m_weights, 1); @@ -2117,12 +2341,12 @@ static void handle_opaque_block(void *pBlock, const color_quad_u8 *pPixels, cons pParams->m_pSelector_weightsx = (const vec4F *)g_bc7_weights3x; pParams->m_num_selector_weights = 8; pParams->m_comp_bits = 6; - pParams->m_has_pbits = BC7ENC_TRUE; - pParams->m_endpoints_share_pbit = BC7ENC_TRUE; + pParams->m_has_pbits = true; + pParams->m_endpoints_share_pbit = true; const uint8_t *pPartition = &g_bc7_partition2[trial_partition * 16]; - color_quad_u8 subset_colors[2][16]; + color_rgba subset_colors[2][16]; uint32_t subset_total_colors1[2] = { 0, 0 }; @@ -2150,14 +2374,15 @@ static void handle_opaque_block(void *pBlock, const color_quad_u8 *pPixels, cons uint64_t err = color_cell_compression(1, pParams, pResults, pComp_params); trial_err += err; - if (trial_err > best_err) + if ((uint64_t)(trial_err * pComp_params->m_mode1_error_weight + .5f) > best_err) break; } // subset - if (trial_err < best_err) + const uint64_t mode1_trial_err = (uint64_t)(trial_err * pComp_params->m_mode1_error_weight + .5f); + if (mode1_trial_err < best_err) { - best_err = trial_err; + best_err = mode1_trial_err; opt_results.m_mode = 1; opt_results.m_partition = trial_partition; for (uint32_t subset = 0; subset < 2; subset++) @@ -2174,11 +2399,11 @@ static void handle_opaque_block(void *pBlock, const color_quad_u8 *pPixels, cons encode_bc7_block(pBlock, &opt_results); } -bc7enc_bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params) +bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params) { assert(g_bc7_mode_1_optimal_endpoints[255][0].m_hi != 0); - const color_quad_u8 *pPixels = (const color_quad_u8 *)(pPixelsRGBA); + const color_rgba *pPixels = (const color_rgba *)(pPixelsRGBA); color_cell_compressor_params params; if (pComp_params->m_perceptual) @@ -2193,25 +2418,133 @@ bc7enc_bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const b } else memcpy(params.m_weights, pComp_params->m_weights, sizeof(params.m_weights)); + + if (pComp_params->m_force_alpha) + { + handle_alpha_block(pBlock, pPixels, pComp_params, ¶ms); + return true; + } for (uint32_t i = 0; i < 16; i++) { - if (pPixels[i].a < 255) + if (pPixels[i].m_c[3] < 255) { handle_alpha_block(pBlock, pPixels, pComp_params, ¶ms); - return BC7ENC_TRUE; + return true; } } handle_opaque_block(pBlock, pPixels, pComp_params, ¶ms); - return BC7ENC_FALSE; + return false; } +/* +static const uint8_t g_tdefl_small_dist_extra[512] = +{ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 +}; + +static const uint8_t g_tdefl_large_dist_extra[128] = +{ + 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 +}; + +static inline uint32_t compute_match_cost_estimate(uint32_t dist, uint32_t match_len_in_bytes) +{ + assert(match_len_in_bytes <= 258); + + uint32_t len_cost = 6; + if (match_len_in_bytes >= 12) + len_cost = 9; + else if (match_len_in_bytes >= 8) + len_cost = 8; + else if (match_len_in_bytes >= 6) + len_cost = 7; + + uint32_t dist_cost = 5; + if (dist < 512) + dist_cost += g_tdefl_small_dist_extra[dist & 511]; + else + { + dist_cost += g_tdefl_large_dist_extra[std::min(dist, 32767) >> 8]; + while (dist >= 32768) + { + dist_cost++; + dist >>= 1; + } + } + return len_cost + dist_cost; +} +*/ +class tracked_stat +{ +public: + tracked_stat() { clear(); } + + void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; } + + tracked_stat& operator += (uint32_t val) { update(val); return *this; } + + uint32_t get_number_of_values() { return m_num; } + uint64_t get_total() const { return m_total; } + uint64_t get_total2() const { return m_total2; } + + float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; }; + float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + float get_variance() const { float s = get_std_dev(); return s * s; } + +private: + uint32_t m_num; + uint64_t m_total; + uint64_t m_total2; +}; + +/* +static inline float compute_block_max_std_dev(const color_rgba* pPixels) +{ + tracked_stat r_stats, g_stats, b_stats, a_stats; + + for (uint32_t i = 0; i < 16; i++) + { + r_stats.update(pPixels[i].m_c[0]); + g_stats.update(pPixels[i].m_c[1]); + b_stats.update(pPixels[i].m_c[2]); + a_stats.update(pPixels[i].m_c[3]); + } + + return std::max(std::max(std::max(r_stats.get_std_dev(), g_stats.get_std_dev()), b_stats.get_std_dev()), a_stats.get_std_dev()); +} +*/ +struct bc7_block +{ + uint8_t m_bytes[16]; + + uint32_t get_mode() const + { + uint32_t bc7_mode = 0; + while (((m_bytes[0] & (1 << bc7_mode)) == 0) && (bc7_mode < 8)) + bc7_mode++; + return bc7_mode; + } +}; + /* ------------------------------------------------------------------------------ This software is available under 2 licenses -- choose whichever you prefer. +If you use this software in a product, attribution / credits is requested but not required. ------------------------------------------------------------------------------ ALTERNATIVE A - MIT License -Copyright(c) 2020 Richard Geldreich, Jr. +Copyright(c) 2020-2021 Richard Geldreich, Jr. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files(the "Software"), to deal in the Software without restriction, including without limitation the rights to diff --git a/libkram/bc7enc/bc7enc.h b/libkram/bc7enc/bc7enc.h index 2dbd910..8794c15 100644 --- a/libkram/bc7enc/bc7enc.h +++ b/libkram/bc7enc/bc7enc.h @@ -1,23 +1,22 @@ // File: bc7enc.h - Richard Geldreich, Jr. - MIT license or public domain (see end of bc7enc.c) +// If you use this software in a product, attribution / credits is requested but not required. #include #include - -#ifdef __cplusplus -extern "C" { -#endif +#include +#include #define BC7ENC_BLOCK_SIZE (16) -#define BC7ENC_MAX_PARTITIONS1 (64) +#define BC7ENC_MAX_PARTITIONS (64) #define BC7ENC_MAX_UBER_LEVEL (4) -typedef uint8_t bc7enc_bool; -#define BC7ENC_TRUE (1) -#define BC7ENC_FALSE (0) +struct color_rgba { uint8_t m_c[4]; }; -typedef struct +struct bc7enc_compress_block_params { - // m_max_partitions_mode may range from 0 (disables mode 1) to BC7ENC_MAX_PARTITIONS1. The higher this value, the slower the compressor, but the higher the quality. - uint32_t m_max_partitions_mode; + uint32_t m_mode_mask; + + // m_max_partitions may range from 0 (disables mode 1) to BC7ENC_MAX_PARTITIONS. The higher this value, the slower the compressor, but the higher the quality. + uint32_t m_max_partitions; // Relative RGBA or YCbCrA weights. uint32_t m_weights[4]; @@ -26,23 +25,58 @@ typedef struct uint32_t m_uber_level; // If m_perceptual is true, colorspace error is computed in YCbCr space, otherwise RGB. - bc7enc_bool m_perceptual; + bool m_perceptual; // Set m_try_least_squares to false for slightly faster/lower quality compression. - bc7enc_bool m_try_least_squares; + bool m_try_least_squares; - // When m_mode_partition_estimation_filterbank, the mode1 partition estimator skips lesser used partition patterns unless they are strongly predicted to be potentially useful. + // When m_mode17_partition_estimation_filterbank, the mode1 partition estimator skips lesser used partition patterns unless they are strongly predicted to be potentially useful. // There's a slight loss in quality with this enabled (around .08 dB RGB PSNR or .05 dB Y PSNR), but up to a 11% gain in speed depending on the other settings. - bc7enc_bool m_mode_partition_estimation_filterbank; + bool m_mode17_partition_estimation_filterbank; + + bool m_force_alpha; + + bool m_force_selectors; + uint8_t m_selectors[16]; + + bool m_quant_mode6_endpoints; + bool m_bias_mode1_pbits; - bc7enc_bool m_use_mode5_for_alpha; - bc7enc_bool m_use_mode7_for_alpha; + float m_pbit1_weight; -} bc7enc_compress_block_params; + float m_mode1_error_weight; + float m_mode5_error_weight; + float m_mode6_error_weight; + float m_mode7_error_weight; + + float m_low_frequency_partition_weight; + + void clear() + { + memset(this, 0, sizeof(*this)); + } + + void print() + { + printf("Mode mask: 0x%X\n", m_mode_mask); + printf("Max partitions: %u\n", m_max_partitions); + printf("Weights: %u %u %u %u\n", m_weights[0], m_weights[1], m_weights[2], m_weights[3]); + printf("Uber level: %u\n", m_uber_level); + printf("Perceptual: %u\n", m_perceptual); + printf("Try least squares: %u\n", m_try_least_squares); + printf("Mode 1/7 partition estimation filterbank: %u\n", m_mode17_partition_estimation_filterbank); + printf("Force alpha: %u\n", m_force_alpha); + printf("Quant mode 6 endpoints: %u\n", m_quant_mode6_endpoints); + printf("Bias mode 1 p-bits: %u\n", m_bias_mode1_pbits); + printf("p-bit 1 weight: %f\n", m_pbit1_weight); + printf("Mode error weights: %f %f %f %f\n", m_mode1_error_weight, m_mode5_error_weight, m_mode6_error_weight, m_mode7_error_weight); + printf("Low frequency partition weight: %f\n", m_low_frequency_partition_weight); + } +}; inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_block_params *p) { - p->m_perceptual = BC7ENC_FALSE; + p->m_perceptual = false; p->m_weights[0] = 1; p->m_weights[1] = 1; p->m_weights[2] = 1; @@ -51,7 +85,7 @@ inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_blo inline void bc7enc_compress_block_params_init_perceptual_weights(bc7enc_compress_block_params *p) { - p->m_perceptual = BC7ENC_TRUE; + p->m_perceptual = true; p->m_weights[0] = 128; p->m_weights[1] = 64; p->m_weights[2] = 16; @@ -60,23 +94,30 @@ inline void bc7enc_compress_block_params_init_perceptual_weights(bc7enc_compress inline void bc7enc_compress_block_params_init(bc7enc_compress_block_params *p) { - p->m_max_partitions_mode = BC7ENC_MAX_PARTITIONS1; - p->m_try_least_squares = BC7ENC_TRUE; - p->m_mode_partition_estimation_filterbank = BC7ENC_TRUE; + p->m_mode_mask = UINT32_MAX; + p->m_max_partitions = BC7ENC_MAX_PARTITIONS; + p->m_try_least_squares = true; + p->m_mode17_partition_estimation_filterbank = true; p->m_uber_level = 0; - p->m_use_mode5_for_alpha = BC7ENC_TRUE; - p->m_use_mode7_for_alpha = BC7ENC_TRUE; + p->m_force_selectors = false; + p->m_force_alpha = false; + p->m_quant_mode6_endpoints = false; + p->m_bias_mode1_pbits = false; + p->m_pbit1_weight = 1.0f; + p->m_mode1_error_weight = 1.0f; + p->m_mode5_error_weight = 1.0f; + p->m_mode6_error_weight = 1.0f; + p->m_mode7_error_weight = 1.0f; + p->m_low_frequency_partition_weight = 1.0f; bc7enc_compress_block_params_init_perceptual_weights(p); } // bc7enc_compress_block_init() MUST be called before calling bc7enc_compress_block() (or you'll get artifacts). void bc7enc_compress_block_init(); -// Packs a single block of 4x4=16 RGBA pixels (R first in memory) to 128-bit BC7 block pBlock, using either mode 1 and/or 6. +// Packs a single block of 16x16 RGBA pixels (R first in memory) to 128-bit BC7 block pBlock, using either mode 1 and/or 6. // Alpha blocks will always use mode 6, and by default opaque blocks will use either modes 1 or 6. -// Returns BC7ENC_TRUE if the block had any pixels with alpha < 255, otherwise it return BC7ENC_FALSE. (This is not an error code - a block is always encoded.) -bc7enc_bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params); +// Returns true if the block had any pixels with alpha < 255, otherwise it return false. (This is not an error code - a block is always encoded.) +bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params); + -#ifdef __cplusplus -} -#endif diff --git a/libkram/bc7enc/ert.cpp b/libkram/bc7enc/ert.cpp new file mode 100644 index 0000000..c09b966 --- /dev/null +++ b/libkram/bc7enc/ert.cpp @@ -0,0 +1,705 @@ +#include "ert.h" +#include +#include +#include +#include "utils.h" + +#define ERT_FAVOR_CONT_AND_REP0_MATCHES (1) +#define ERT_FAVOR_REP0_MATCHES (0) + +namespace ert +{ + const uint32_t MAX_BLOCK_PIXELS = 12 * 12; + const uint32_t MAX_BLOCK_SIZE_IN_BYTES = 256; + const uint32_t MIN_MATCH_LEN = 3; + const float LITERAL_BITS = 13.0f; + const float MATCH_CONTINUE_BITS = 1.0f; + const float MATCH_REP0_BITS = 4.0f; + + static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } + template inline F lerp(F a, F b, F s) { return a + (b - a) * s; } + + static const uint8_t g_tdefl_small_dist_extra[512] = + { + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 + }; + + static const uint8_t g_tdefl_large_dist_extra[128] = + { + 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 + }; + + static inline uint32_t compute_match_cost_estimate(uint32_t dist, uint32_t match_len_in_bytes) + { + assert(match_len_in_bytes <= 258); + + uint32_t len_cost = 6; + if (match_len_in_bytes >= 12) + len_cost = 9; + else if (match_len_in_bytes >= 8) + len_cost = 8; + else if (match_len_in_bytes >= 6) + len_cost = 7; + + uint32_t dist_cost = 5; + if (dist < 512) + dist_cost += g_tdefl_small_dist_extra[dist & 511]; + else + { + dist_cost += g_tdefl_large_dist_extra[std::min(dist, 32767) >> 8]; + while (dist >= 32768) + { + dist_cost++; + dist >>= 1; + } + } + return len_cost + dist_cost; + } + + class tracked_stat + { + public: + tracked_stat() { clear(); } + + void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; } + + tracked_stat& operator += (uint32_t val) { update(val); return *this; } + + uint32_t get_number_of_values() { return m_num; } + uint64_t get_total() const { return m_total; } + uint64_t get_total2() const { return m_total2; } + + float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; }; + float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + float get_variance() const { float s = get_std_dev(); return s * s; } + + private: + uint32_t m_num; + uint64_t m_total; + uint64_t m_total2; + }; + + static inline float compute_block_max_std_dev(const color_rgba* pPixels, uint32_t block_width, uint32_t block_height, uint32_t num_comps) + { + tracked_stat comp_stats[4]; + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const color_rgba* pPixel = pPixels + x + y * block_width; + + for (uint32_t c = 0; c < num_comps; c++) + comp_stats[c].update(pPixel->m_c[c]); + } + } + + float max_std_dev = 0.0f; + for (uint32_t i = 0; i < num_comps; i++) + max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev()); + return max_std_dev; + } + + static inline float compute_block_mse(const color_rgba* pPixelsA, const color_rgba* pPixelsB, uint32_t block_width, uint32_t block_height, uint32_t total_block_pixels, uint32_t num_comps, const uint32_t weights[4], float one_over_total_color_weight) + { + uint64_t total_err = 0; + + if ((block_width == 4) && (block_height == 4) && (num_comps == 4)) + { + if ((weights[0] == 1) && (weights[1] == 1) && (weights[2] == 1) && (weights[3] == 1)) + { + for (uint32_t i = 0; i < 16; i++) + { + const color_rgba* pA = pPixelsA + i; + const color_rgba* pB = pPixelsB + i; + + const int dr = pA->m_c[0] - pB->m_c[0]; + const int dg = pA->m_c[1] - pB->m_c[1]; + const int db = pA->m_c[2] - pB->m_c[2]; + const int da = pA->m_c[3] - pB->m_c[3]; + + total_err += dr * dr + dg * dg + db * db + da * da; + } + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + const color_rgba* pA = pPixelsA + i; + const color_rgba* pB = pPixelsB + i; + + const int dr = pA->m_c[0] - pB->m_c[0]; + const int dg = pA->m_c[1] - pB->m_c[1]; + const int db = pA->m_c[2] - pB->m_c[2]; + const int da = pA->m_c[3] - pB->m_c[3]; + + total_err += weights[0] * dr * dr + weights[1] * dg * dg + weights[2] * db * db + weights[3] * da * da; + } + } + } + else if ((block_width == 4) && (block_height == 4) && (num_comps == 3)) + { + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t y_ofs = y * 4; + for (uint32_t x = 0; x < 4; x++) + { + const color_rgba* pA = pPixelsA + x + y_ofs; + const color_rgba* pB = pPixelsB + x + y_ofs; + + const int dr = pA->m_c[0] - pB->m_c[0]; + const int dg = pA->m_c[1] - pB->m_c[1]; + const int db = pA->m_c[2] - pB->m_c[2]; + + total_err += weights[0] * dr * dr + weights[1] * dg * dg + weights[2] * db * db; + } + } + } + else if ((block_width == 4) && (block_height == 4) && (num_comps == 2)) + { + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t y_ofs = y * 4; + for (uint32_t x = 0; x < 4; x++) + { + const color_rgba* pA = pPixelsA + x + y_ofs; + const color_rgba* pB = pPixelsB + x + y_ofs; + + const int dr = pA->m_c[0] - pB->m_c[0]; + const int dg = pA->m_c[1] - pB->m_c[1]; + + total_err += weights[0] * dr * dr + weights[1] * dg * dg; + } + } + } + else if ((block_width == 4) && (block_height == 4) && (num_comps == 1)) + { + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t y_ofs = y * 4; + for (uint32_t x = 0; x < 4; x++) + { + const color_rgba* pA = pPixelsA + x + y_ofs; + const color_rgba* pB = pPixelsB + x + y_ofs; + + const int dr = pA->m_c[0] - pB->m_c[0]; + + total_err += weights[0] * dr * dr; + } + } + } + else + { + for (uint32_t y = 0; y < block_height; y++) + { + const uint32_t y_ofs = y * block_width; + for (uint32_t x = 0; x < block_width; x++) + { + const color_rgba* pA = pPixelsA + x + y_ofs; + const color_rgba* pB = pPixelsB + x + y_ofs; + + for (uint32_t c = 0; c < num_comps; c++) + { + const int d = pA->m_c[c] - pB->m_c[c]; + total_err += weights[c] * d * d; + } + } + } + } + + return total_err * (one_over_total_color_weight / total_block_pixels); + } + + uint32_t hash_hsieh(const uint8_t* pBuf, size_t len, uint32_t salt) + { + if (!pBuf || !len) + return 0; + + uint32_t h = static_cast(len + (salt << 16)); + + const uint32_t bytes_left = len & 3; + len >>= 2; + + while (len--) + { + const uint16_t* pWords = reinterpret_cast(pBuf); + + h += pWords[0]; + + const uint32_t t = (pWords[1] << 11) ^ h; + h = (h << 16) ^ t; + + pBuf += sizeof(uint32_t); + + h += h >> 11; + } + + switch (bytes_left) + { + case 1: + h += *reinterpret_cast(pBuf); + h ^= h << 10; + h += h >> 1; + break; + case 2: + h += *reinterpret_cast(pBuf); + h ^= h << 11; + h += h >> 17; + break; + case 3: + h += *reinterpret_cast(pBuf); + h ^= h << 16; + h ^= (static_cast(pBuf[sizeof(uint16_t)])) << 18; + h += h >> 11; + break; + default: + break; + } + + h ^= h << 3; + h += h >> 5; + h ^= h << 4; + h += h >> 17; + h ^= h << 25; + h += h >> 6; + + return h; + } + + // BC7 entropy reduction transform with Deflate/LZMA/LZHAM optimizations + bool reduce_entropy(void* pBlocks, uint32_t num_blocks, + uint32_t total_block_stride_in_bytes, uint32_t block_size_to_optimize_in_bytes, uint32_t block_width, uint32_t block_height, uint32_t num_comps, + const color_rgba* pBlock_pixels, const reduce_entropy_params& params, uint32_t& total_modified, + pUnpack_block_func pUnpack_block_func, void* pUnpack_block_func_user_data, + std::vector* pBlock_mse_scales) + { + assert(total_block_stride_in_bytes && block_size_to_optimize_in_bytes); + assert(total_block_stride_in_bytes >= block_size_to_optimize_in_bytes); + + assert(num_comps >= 1 && num_comps <= 4); + for (uint32_t i = num_comps; i < 4; i++) + { + assert(!params.m_color_weights[i]); + if (params.m_color_weights[i]) + return false; + } + + const uint32_t total_color_weight = params.m_color_weights[0] + params.m_color_weights[1] + params.m_color_weights[2] + params.m_color_weights[3]; + assert(total_color_weight); + const float one_over_total_color_weight = 1.0f / total_color_weight; + + assert((block_size_to_optimize_in_bytes >= MIN_MATCH_LEN) && (block_size_to_optimize_in_bytes <= MAX_BLOCK_SIZE_IN_BYTES)); + if ((block_size_to_optimize_in_bytes < MIN_MATCH_LEN) || (block_size_to_optimize_in_bytes > MAX_BLOCK_SIZE_IN_BYTES)) + return false; + + uint8_t* pBlock_bytes = (uint8_t*)pBlocks; + + const uint32_t total_block_pixels = block_width * block_height; + if (total_block_pixels > MAX_BLOCK_PIXELS) + return false; + + const int total_blocks_to_check = std::max(1U, params.m_lookback_window_size / total_block_stride_in_bytes); + + std::vector len_hist(MAX_BLOCK_SIZE_IN_BYTES + 1); + std::vector second_len_hist(MAX_BLOCK_SIZE_IN_BYTES + 1); + uint32_t total_second_matches = 0; + + int prev_match_window_ofs_to_favor_cont = -1, prev_match_dist_to_favor = -1; + + uint32_t total_smooth_blocks = 0; + + const uint32_t HASH_SIZE = 8192; + uint32_t hash[HASH_SIZE]; + + for (uint32_t block_index = 0; block_index < num_blocks; block_index++) + { + if ((block_index & 0xFF) == 0) + memset(hash, 0, sizeof(hash)); + + uint8_t* pOrig_block = &pBlock_bytes[block_index * total_block_stride_in_bytes]; + const color_rgba* pPixels = &pBlock_pixels[block_index * total_block_pixels]; + + color_rgba decoded_block[MAX_BLOCK_PIXELS]; + if (!(*pUnpack_block_func)(pOrig_block, decoded_block, block_index, pUnpack_block_func_user_data)) + return false; + + float cur_mse = compute_block_mse(pPixels, decoded_block, block_width, block_height, total_block_pixels, num_comps, params.m_color_weights, one_over_total_color_weight); + + if ((params.m_skip_zero_mse_blocks) && (cur_mse == 0.0f)) + continue; + + const float max_std_dev = compute_block_max_std_dev(pPixels, block_width, block_height, num_comps); + + float yl = clampf(max_std_dev / params.m_max_smooth_block_std_dev, 0.0f, 1.0f); + yl = yl * yl; + float smooth_block_mse_scale = lerp(params.m_smooth_block_max_mse_scale, 1.0f, yl); + + if (pBlock_mse_scales) + { + if ((*pBlock_mse_scales)[block_index] > 0.0f) + { + smooth_block_mse_scale = (*pBlock_mse_scales)[block_index]; + } + } + + if (smooth_block_mse_scale > 1.0f) + total_smooth_blocks++; + + float cur_bits = (LITERAL_BITS * block_size_to_optimize_in_bytes); + float cur_t = cur_mse * smooth_block_mse_scale + cur_bits * params.m_lambda; + + int first_block_to_check = std::max(0, block_index - total_blocks_to_check); + int last_block_to_check = block_index - 1; + + uint8_t best_block[MAX_BLOCK_SIZE_IN_BYTES]; + memcpy(best_block, pOrig_block, block_size_to_optimize_in_bytes); + + float best_t = cur_t; + uint32_t best_match_len = 0, best_match_src_window_ofs = 0, best_match_dst_window_ofs = 0, best_match_src_block_ofs = 0, best_match_dst_block_ofs = 0; + float best_match_bits = 0; + + // Don't let thresh_ms_err be 0 to let zero error blocks have slightly increased distortion + const float thresh_ms_err = params.m_max_allowed_rms_increase_ratio * params.m_max_allowed_rms_increase_ratio * std::max(cur_mse, 1.0f); + + for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index) + { + const uint8_t* pPrev_blk = &pBlock_bytes[prev_block_index * total_block_stride_in_bytes]; + + for (uint32_t len = block_size_to_optimize_in_bytes; len >= MIN_MATCH_LEN; len--) + { + if (params.m_allow_relative_movement) + { + for (uint32_t src_ofs = 0; src_ofs <= (block_size_to_optimize_in_bytes - len); src_ofs++) + { + assert(len + src_ofs <= block_size_to_optimize_in_bytes); + + const uint32_t src_match_window_ofs = prev_block_index * total_block_stride_in_bytes + src_ofs; + + for (uint32_t dst_ofs = 0; dst_ofs <= (block_size_to_optimize_in_bytes - len); dst_ofs++) + { + assert(len + dst_ofs <= block_size_to_optimize_in_bytes); + + const uint32_t dst_match_window_ofs = block_index * total_block_stride_in_bytes + dst_ofs; + + const uint32_t match_dist = dst_match_window_ofs - src_match_window_ofs; + + float trial_match_bits, trial_total_bits; + + uint32_t hs = hash_hsieh(pPrev_blk + src_ofs, len, dst_ofs); + +#if ERT_FAVOR_CONT_AND_REP0_MATCHES + // Continue a previous match (which would cross block boundaries) + if (((int)src_match_window_ofs == prev_match_window_ofs_to_favor_cont) && (dst_ofs == 0)) + { + trial_match_bits = MATCH_CONTINUE_BITS; + trial_total_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + MATCH_CONTINUE_BITS; + } + // Exploit REP0 matches + else if ((prev_match_dist_to_favor != -1) && (src_match_window_ofs == (dst_match_window_ofs - prev_match_dist_to_favor))) + { + trial_match_bits = MATCH_REP0_BITS; + trial_total_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + MATCH_REP0_BITS; + } + else + { + trial_match_bits = (float)compute_match_cost_estimate(match_dist, len); + trial_total_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + trial_match_bits; + + uint32_t hash_check = hash[hs & (HASH_SIZE - 1)]; + if ((hash_check & 0xFF) == (block_index & 0xFF)) + { + if ((hash_check >> 8) == (hs >> 8)) + continue; + } + } +#else + uint32_t hash_check = hash[hs & (HASH_SIZE - 1)]; + if ((hash_check & 0xFF) == (block_index & 0xFF)) + { + if ((hash_check >> 8) == (hs >> 8)) + continue; + } +#endif + + hash[hs & (HASH_SIZE - 1)] = (hs & 0xFFFFFF00) | (block_index & 0xFF); + + const float trial_total_bits_times_lambda = trial_total_bits * params.m_lambda; + + uint8_t trial_block[MAX_BLOCK_SIZE_IN_BYTES]; + memcpy(trial_block, pOrig_block, block_size_to_optimize_in_bytes); + memcpy(trial_block + dst_ofs, pPrev_blk + src_ofs, len); + + color_rgba decoded_trial_block[MAX_BLOCK_PIXELS]; + if (!(*pUnpack_block_func)(trial_block, decoded_trial_block, block_index, pUnpack_block_func_user_data)) + continue; + + float trial_mse = compute_block_mse(pPixels, decoded_trial_block, block_width, block_height, total_block_pixels, num_comps, params.m_color_weights, one_over_total_color_weight); + + if (trial_mse < thresh_ms_err) + { + float t = trial_mse * smooth_block_mse_scale + trial_total_bits_times_lambda; + + if (t < best_t) + { + best_t = t; + memcpy(best_block, trial_block, block_size_to_optimize_in_bytes); + best_match_len = len; + best_match_src_window_ofs = src_match_window_ofs; + best_match_dst_window_ofs = dst_match_window_ofs; + best_match_src_block_ofs = src_ofs; + best_match_dst_block_ofs = dst_ofs; + best_match_bits = trial_match_bits; + } + } + + } // dst_ofs + } // src_ofs + } + else + { + const uint32_t match_dist = (block_index - prev_block_index) * total_block_stride_in_bytes; + + // Assume the block has 1 match and block_size_to_optimize_in_bytes-match_len literals. + const float trial_match_bits = (float)compute_match_cost_estimate(match_dist, len); + const float trial_total_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + trial_match_bits; + const float trial_total_bits_times_lambda = trial_total_bits * params.m_lambda; + + for (uint32_t ofs = 0; ofs <= (block_size_to_optimize_in_bytes - len); ofs++) + { + assert(len + ofs <= block_size_to_optimize_in_bytes); + + const uint32_t dst_match_window_ofs = block_index * total_block_stride_in_bytes + ofs; + const uint32_t src_match_window_ofs = prev_block_index * total_block_stride_in_bytes + ofs; + + float trial_match_bits_to_use = trial_match_bits; + float trial_total_bits_times_lambda_to_use = trial_total_bits_times_lambda; + + uint32_t hs = hash_hsieh(pPrev_blk + ofs, len, ofs); + +#if ERT_FAVOR_CONT_AND_REP0_MATCHES + // Continue a previous match (which would cross block boundaries) + if (((int)src_match_window_ofs == prev_match_window_ofs_to_favor_cont) && (ofs == 0)) + { + float continue_match_trial_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + MATCH_CONTINUE_BITS; + trial_match_bits_to_use = MATCH_CONTINUE_BITS; + trial_total_bits_times_lambda_to_use = continue_match_trial_bits * params.m_lambda; + } + // Exploit REP0 matches + else if ((prev_match_dist_to_favor != -1) && (src_match_window_ofs == (dst_match_window_ofs - prev_match_dist_to_favor))) + { + float continue_match_trial_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + MATCH_REP0_BITS; + trial_match_bits_to_use = MATCH_REP0_BITS; + trial_total_bits_times_lambda_to_use = continue_match_trial_bits * params.m_lambda; + } + else + { + uint32_t hash_check = hash[hs & (HASH_SIZE - 1)]; + if ((hash_check & 0xFF) == (block_index & 0xFF)) + { + if ((hash_check >> 8) == (hs >> 8)) + continue; + } + } +#else + uint32_t hash_check = hash[hs & (HASH_SIZE - 1)]; + if ((hash_check & 0xFF) == (block_index & 0xFF)) + { + if ((hash_check >> 8) == (hs >> 8)) + continue; + } +#endif + + hash[hs & (HASH_SIZE - 1)] = (hs & 0xFFFFFF00) | (block_index & 0xFF); + + uint8_t trial_block[MAX_BLOCK_SIZE_IN_BYTES]; + memcpy(trial_block, pOrig_block, block_size_to_optimize_in_bytes); + memcpy(trial_block + ofs, pPrev_blk + ofs, len); + + color_rgba decoded_trial_block[MAX_BLOCK_PIXELS]; + if (!(*pUnpack_block_func)(trial_block, decoded_trial_block, block_index, pUnpack_block_func_user_data)) + continue; + + float trial_mse = compute_block_mse(pPixels, decoded_trial_block, block_width, block_height, total_block_pixels, num_comps, params.m_color_weights, one_over_total_color_weight); + + if (trial_mse < thresh_ms_err) + { + float t = trial_mse * smooth_block_mse_scale + trial_total_bits_times_lambda_to_use; + + if (t < best_t) + { + best_t = t; + memcpy(best_block, trial_block, block_size_to_optimize_in_bytes); + best_match_len = len; + best_match_src_window_ofs = src_match_window_ofs; + best_match_dst_window_ofs = dst_match_window_ofs; + best_match_src_block_ofs = ofs; + best_match_dst_block_ofs = ofs; + best_match_bits = trial_match_bits_to_use; + } + } + } // ofs + } + + } // len + + } // prev_block_index + + if (best_t < cur_t) + { + uint32_t best_second_match_len = 0, best_second_match_src_window_ofs = 0, best_second_match_dst_window_ofs = 0, best_second_match_src_block_ofs = 0, best_second_match_dst_block_ofs = 0; + + // Try injecting a second match, being sure it does't overlap with the first. + if ((params.m_try_two_matches) && (best_match_len <= (block_size_to_optimize_in_bytes - 3))) + { + uint8_t matched_flags[MAX_BLOCK_SIZE_IN_BYTES]; + memset(matched_flags, 0, sizeof(matched_flags)); + memset(matched_flags + best_match_dst_block_ofs, 1, best_match_len); + + uint8_t orig_best_block[MAX_BLOCK_SIZE_IN_BYTES]; + memcpy(orig_best_block, best_block, block_size_to_optimize_in_bytes); + + for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index) + { + const uint8_t* pPrev_blk = &pBlock_bytes[prev_block_index * total_block_stride_in_bytes]; + + const uint32_t match_dist = (block_index - prev_block_index) * total_block_stride_in_bytes; + + for (uint32_t len = 3; len <= (block_size_to_optimize_in_bytes - best_match_len); len++) + { + const float trial_total_bits = (block_size_to_optimize_in_bytes - len - best_match_len) * LITERAL_BITS + compute_match_cost_estimate(match_dist, len) + best_match_bits; + + const float trial_total_bits_times_lambda = trial_total_bits * params.m_lambda; + + for (uint32_t ofs = 0; ofs <= (block_size_to_optimize_in_bytes - len); ofs++) + { + int i; + for (i = 0; i < (int)len; i++) + if (matched_flags[ofs + i]) + break; + if (i != (int)len) + continue; + + assert(len + ofs <= block_size_to_optimize_in_bytes); + + const uint32_t dst_match_window_ofs = block_index * total_block_stride_in_bytes + ofs; + const uint32_t src_match_window_ofs = prev_block_index * total_block_stride_in_bytes + ofs; + + uint8_t trial_block[MAX_BLOCK_SIZE_IN_BYTES]; + memcpy(trial_block, orig_best_block, block_size_to_optimize_in_bytes); + memcpy(trial_block + ofs, pPrev_blk + ofs, len); + + color_rgba decoded_trial_block[MAX_BLOCK_PIXELS]; + if (!(*pUnpack_block_func)(trial_block, decoded_trial_block, block_index, pUnpack_block_func_user_data)) + continue; + + float trial_mse = compute_block_mse(pPixels, decoded_trial_block, block_width, block_height, total_block_pixels, num_comps, params.m_color_weights, one_over_total_color_weight); + + if (trial_mse < thresh_ms_err) + { + float t = trial_mse * smooth_block_mse_scale + trial_total_bits_times_lambda; + + if (t < best_t) + { + best_t = t; + memcpy(best_block, trial_block, block_size_to_optimize_in_bytes); + best_second_match_len = len; + best_second_match_src_window_ofs = src_match_window_ofs; + best_second_match_dst_window_ofs = dst_match_window_ofs; + best_second_match_src_block_ofs = ofs; + best_second_match_dst_block_ofs = ofs; + } + } + } + } + } + } + + memcpy(pOrig_block, best_block, block_size_to_optimize_in_bytes); + total_modified++; + + if ((best_second_match_len == 0) || (best_match_dst_window_ofs > best_second_match_dst_window_ofs)) + { + int best_match_dist = best_match_dst_window_ofs - best_match_src_window_ofs; + assert(best_match_dist >= 1); + (void)best_match_dist; + + if (block_size_to_optimize_in_bytes == total_block_stride_in_bytes) + { + // If the match goes all the way to the end of a block, we can try to continue it on the next encoded block. + if ((best_match_dst_block_ofs + best_match_len) == total_block_stride_in_bytes) + prev_match_window_ofs_to_favor_cont = best_match_src_window_ofs + best_match_len; + else + prev_match_window_ofs_to_favor_cont = -1; + } + +#if ERT_FAVOR_REP0_MATCHES + // Compute the window offset where a cheaper REP0 match would be available + prev_match_dist_to_favor = best_match_dist; +#endif + } + else + { + int best_match_dist = best_second_match_dst_window_ofs - best_second_match_src_window_ofs; + assert(best_match_dist >= 1); + (void)best_match_dist; + + if (block_size_to_optimize_in_bytes == total_block_stride_in_bytes) + { + // If the match goes all the way to the end of a block, we can try to continue it on the next encoded block. + if ((best_second_match_dst_block_ofs + best_second_match_len) == total_block_stride_in_bytes) + prev_match_window_ofs_to_favor_cont = best_second_match_src_window_ofs + best_second_match_len; + else + prev_match_window_ofs_to_favor_cont = -1; + } + +#if ERT_FAVOR_REP0_MATCHES + // Compute the window offset where a cheaper REP0 match would be available + prev_match_dist_to_favor = best_match_dist; +#endif + } + + len_hist[best_match_len]++; + + if (best_second_match_len) + { + second_len_hist[best_second_match_len]++; + total_second_matches++; + } + } + else + { + prev_match_window_ofs_to_favor_cont = -1; + } + + } // block_index + + if (params.m_debug_output) + { + printf("Total smooth blocks: %3.2f%%\n", total_smooth_blocks * 100.0f / num_blocks); + + printf("Match length histogram:\n"); + for (uint32_t i = MIN_MATCH_LEN; i <= block_size_to_optimize_in_bytes; i++) + printf("%u%c", len_hist[i], (i < block_size_to_optimize_in_bytes) ? ',' : '\n'); + + printf("Total second matches: %u %3.2f%%\n", total_second_matches, total_second_matches * 100.0f / num_blocks); + printf("Secod match length histogram:\n"); + for (uint32_t i = MIN_MATCH_LEN; i <= block_size_to_optimize_in_bytes; i++) + printf("%u%c", second_len_hist[i], (i < block_size_to_optimize_in_bytes) ? ',' : '\n'); + } + + return true; + } + +} // namespace ert + diff --git a/libkram/bc7enc/ert.h b/libkram/bc7enc/ert.h new file mode 100644 index 0000000..d387f52 --- /dev/null +++ b/libkram/bc7enc/ert.h @@ -0,0 +1,81 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ert +{ + struct color_rgba { uint8_t m_c[4]; }; + + struct reduce_entropy_params + { + // m_lambda: The post-processor tries to reduce distortion*smooth_block_scale + rate*lambda (rate is approximate LZ bits and distortion is scaled MS error multiplied against the smooth block MSE weighting factor). + // Larger values push the postprocessor towards optimizing more for lower rate, and smaller values more for distortion. 0=minimal distortion. + float m_lambda; + + // m_lookback_window_size: The number of bytes the encoder can look back from each block to find matches. The larger this value, the slower the encoder but the higher the quality per LZ compressed bit. + uint32_t m_lookback_window_size; + + // m_max_allowed_rms_increase_ratio: How much the RMS error of a block is allowed to increase before a trial is rejected. 1.0=no increase allowed, 1.05=5% increase allowed, etc. + float m_max_allowed_rms_increase_ratio; + + float m_max_smooth_block_std_dev; + float m_smooth_block_max_mse_scale; + + uint32_t m_color_weights[4]; + + bool m_try_two_matches; + bool m_allow_relative_movement; + bool m_skip_zero_mse_blocks; + bool m_debug_output; + + reduce_entropy_params() { clear(); } + + void clear() + { + m_lookback_window_size = 256; + m_lambda = 1.0f; + m_max_allowed_rms_increase_ratio = 10.0f; + m_max_smooth_block_std_dev = 18.0f; + m_smooth_block_max_mse_scale = 10.0f; + m_color_weights[0] = 1; + m_color_weights[1] = 1; + m_color_weights[2] = 1; + m_color_weights[3] = 1; + m_try_two_matches = false; + m_allow_relative_movement = false; + m_skip_zero_mse_blocks = false; + m_debug_output = false; + } + + void print() + { + printf("lambda: %f\n", m_lambda); + printf("Lookback window size: %u\n", m_lookback_window_size); + printf("Max allowed RMS increase ratio: %f\n", m_max_allowed_rms_increase_ratio); + printf("Max smooth block std dev: %f\n", m_max_smooth_block_std_dev); + printf("Smooth block max MSE scale: %f\n", m_smooth_block_max_mse_scale); + printf("Color weights: %u %u %u %u\n", m_color_weights[0], m_color_weights[1], m_color_weights[2], m_color_weights[3]); + printf("Try two matches: %u\n", m_try_two_matches); + printf("Allow relative movement: %u\n", m_allow_relative_movement); + printf("Skip zero MSE blocks: %u\n", m_skip_zero_mse_blocks); + } + }; + + typedef bool (*pUnpack_block_func)(const void* pBlock, color_rgba* pPixels, uint32_t block_index, void* pUser_data); + + // BC7 entropy reduction transform with Deflate/LZMA/LZHAM optimizations + bool reduce_entropy(void* pBlocks, uint32_t num_blocks, + uint32_t total_block_stride_in_bytes, uint32_t block_size_to_optimize_in_bytes, uint32_t block_width, uint32_t block_height, uint32_t num_comps, + const color_rgba* pBlock_pixels, const reduce_entropy_params& params, uint32_t& total_modified, + pUnpack_block_func pUnpack_block_func, void* pUnpack_block_func_user_data, + std::vector* pBlock_mse_scales = nullptr); + +} // namespace ert diff --git a/libkram/bc7enc/rdo_bc_encoder.cpp b/libkram/bc7enc/rdo_bc_encoder.cpp new file mode 100644 index 0000000..44d3933 --- /dev/null +++ b/libkram/bc7enc/rdo_bc_encoder.cpp @@ -0,0 +1,1270 @@ +// rdo_bc_encoder.cpp +#include "rdo_bc_encoder.h" + +#define RGBCX_IMPLEMENTATION +#include "rgbcx.h" + +#define DECODE_BC4_TO_GRAYSCALE (0) + +#ifdef _MSC_VER +#pragma warning (disable: 4127) // conditional expression is constant +#endif + +using namespace utils; + +namespace rdo_bc +{ + static const char* get_dxgi_format_string(DXGI_FORMAT fmt) + { + switch (fmt) + { + case DXGI_FORMAT_BC1_UNORM: return "BC1_UNORM"; + case DXGI_FORMAT_BC4_UNORM: return "BC4_UNORM"; + case DXGI_FORMAT_BC3_UNORM: return "BC3_UNORM"; + case DXGI_FORMAT_BC5_UNORM: return "BC5_UNORM"; + case DXGI_FORMAT_BC7_UNORM: return "BC7_UNORM"; + default: break; + } + return "?"; + } + + static std::vector compute_block_mse_scales(const image_u8& source_image, uint32_t blocks_x, uint32_t blocks_y, uint32_t total_blocks, bool rdo_debug_output) + { + const float ULTRASMOOTH_BLOCK_STD_DEV_THRESHOLD = 2.9f; + const float DARK_THRESHOLD = 13.0f; + const float BRIGHT_THRESHOLD = 222.0f; + const float ULTRAMOOTH_BLOCK_MSE_SCALE = 120.0f; + const uint32_t ULTRASMOOTH_REGION_TOO_SMALL_THRESHOLD = 64; + + image_u8 ultrasmooth_blocks_vis(blocks_x, blocks_y); + + for (uint32_t by = 0; by < blocks_y; by++) + { + for (uint32_t bx = 0; bx < blocks_x; bx++) + { + color_quad_u8 block_pixels[16]; + source_image.get_block(bx, by, 4, 4, block_pixels); + + tracked_stat y_stats; + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + { + int l = block_pixels[x + y * 4].get_luma(); + y_stats.update(l); + } + + float max_std_dev = compute_block_max_std_dev((color_quad_u8*)block_pixels, 4, 4, 3); + + float yl = max_std_dev / ULTRASMOOTH_BLOCK_STD_DEV_THRESHOLD; + + yl = clamp(yl, 0.0f, 1.0f); + yl *= yl; + + float y_avg = y_stats.get_mean(); + + if ((y_avg < DARK_THRESHOLD) || (y_avg >= BRIGHT_THRESHOLD)) + yl = 1.0f; + + int k = std::min((int)(yl * 255.0f + .5f), 255); + + ultrasmooth_blocks_vis.fill_box(bx, by, 1, 1, color_quad_u8((uint8_t)k, 255)); + } + } + + for (int pass = 0; pass < 1; pass++) + { + image_u8 next_vis(ultrasmooth_blocks_vis); + + for (int y = 0; y < (int)blocks_y; y++) + { + for (int x = 0; x < (int)blocks_x; x++) + { + int m = 0; + + for (int dy = -1; dy <= 1; dy++) + for (int dx = -1; dx <= 1; dx++) + { + if (ultrasmooth_blocks_vis.get_clamped(x + dx, y + dy).r == 255) + m = std::max(m, ultrasmooth_blocks_vis.get_clamped(x + dx, y + dy).r); + } + + next_vis(x, y).set((uint8_t)m, 255); + } + } + + ultrasmooth_blocks_vis.swap(next_vis); + } + + for (uint32_t pass = 0; pass < 32; pass++) + { + image_u8 next_vis(ultrasmooth_blocks_vis); + for (int y = 0; y < (int)blocks_y; y++) + { + for (int x = 0; x < (int)blocks_x; x++) + { + if (ultrasmooth_blocks_vis.get_clamped(x, y).r < 255) + { + int m = 0; + + for (int dy = -1; dy <= 1; dy++) + for (int dx = -1; dx <= 1; dx++) + if (ultrasmooth_blocks_vis.get_clamped(x + dx, y + dy).r == 255) + m++; + + if (m >= 5) + next_vis.set_pixel_clipped(x, y, color_quad_u8(255, 255, 255, 255)); + } + } + } + ultrasmooth_blocks_vis.swap(next_vis); + } + + image_u8 orig_ultrasmooth_blocks_vis(ultrasmooth_blocks_vis); + + if (rdo_debug_output) + { + save_png("ultrasmooth_block_mask_pre_filter.png", ultrasmooth_blocks_vis, false); + } + + for (uint32_t by = 0; by < blocks_y; by++) + { + for (uint32_t bx = 0; bx < blocks_x; bx++) + { + const bool is_ultrasmooth = ultrasmooth_blocks_vis(bx, by).r == 0; + if (!is_ultrasmooth) + continue; + + std::vector filled_pixels; + filled_pixels.reserve(256); + + uint32_t total_set_pixels = ultrasmooth_blocks_vis.flood_fill(bx, by, color_quad_u8(255, 255, 255, 255), color_quad_u8(0, 0, 0, 255), &filled_pixels); + + if (total_set_pixels < ULTRASMOOTH_REGION_TOO_SMALL_THRESHOLD) + { + for (uint32_t i = 0; i < filled_pixels.size(); i++) + orig_ultrasmooth_blocks_vis(filled_pixels[i].m_x, filled_pixels[i].m_y) = color_quad_u8(255, 255, 255, 255); + } + + } // bx + } // by + + ultrasmooth_blocks_vis = orig_ultrasmooth_blocks_vis; + + if (rdo_debug_output) + { + save_png("ultrasmooth_block_mask.png", ultrasmooth_blocks_vis, false); + } + + std::vector block_mse_scales(total_blocks); + + uint32_t total_ultrasmooth_blocks = 0; + for (uint32_t by = 0; by < blocks_y; by++) + { + for (uint32_t bx = 0; bx < blocks_x; bx++) + { + const bool is_ultrasmooth = ultrasmooth_blocks_vis(bx, by).r == 0; + + block_mse_scales[bx + by * blocks_x] = is_ultrasmooth ? ULTRAMOOTH_BLOCK_MSE_SCALE : -1.0f; + + total_ultrasmooth_blocks += is_ultrasmooth; + } + } + + if (rdo_debug_output) + printf("Total ultrasmooth blocks: %3.2f%%\n", total_ultrasmooth_blocks * 100.0f / total_blocks); + + return block_mse_scales; + } + + rdo_bc_encoder::rdo_bc_encoder() : + m_pOrig_source_image(nullptr), + m_orig_width(0), + m_orig_height(0), + m_blocks_x(0), + m_blocks_y(0), + m_total_blocks(0), + m_bytes_per_block(0), + m_pixel_format_bpp(0), + m_total_texels(0), + m_has_alpha(false) + { + } + + void rdo_bc_encoder::clear() + { + m_pOrig_source_image = nullptr; + + m_source_image.clear(); + + m_params.clear(); + + m_orig_width = 0; + m_orig_height = 0; + m_blocks_x = 0; + m_blocks_y = 0; + m_total_blocks = 0; + m_bytes_per_block = 0; + m_pixel_format_bpp = 0; + m_total_texels = 0; + m_has_alpha = false; + + m_packed_image8.clear(); + m_packed_image16.clear(); + + m_prerdo_packed_image8.clear(); + m_prerdo_packed_image16.clear(); + + m_bc7enc_pack_params.clear(); +#if SUPPORT_BC7E + memset(&m_bc7e_pack_params, 0, sizeof(m_bc7e_pack_params)); +#endif + } + + bool rdo_bc_encoder::init(const utils::image_u8& src_image, rdo_bc_params& params) + { + clear(); + + m_pOrig_source_image = &src_image; + m_params = params; + + init_encoders(); + + if (!init_source_image()) + return false; + + return true; + } + + bool rdo_bc_encoder::encode() + { + if (!m_packed_image8.size() && !m_packed_image16.size()) + return false; + + if (!init_encoder_params()) + return false; + + if (!encode_texture()) + return false; + + if (!postprocess_rdo()) + return false; + + return true; + } + + void rdo_bc_encoder::init_encoders() + { + rgbcx::init(m_params.m_bc1_mode); + bc7enc_compress_block_init(); +#if SUPPORT_BC7E + ispc::bc7e_compress_block_init(); +#endif + } + + bool rdo_bc_encoder::init_encoder_params() + { + bc7enc_compress_block_params_init(&m_bc7enc_pack_params); + if (!m_params.m_perceptual) + bc7enc_compress_block_params_init_linear_weights(&m_bc7enc_pack_params); + m_bc7enc_pack_params.m_max_partitions = m_params.m_bc7enc_max_partitions_to_scan; + m_bc7enc_pack_params.m_uber_level = std::min(BC7ENC_MAX_UBER_LEVEL, m_params.m_bc7_uber_level); + + if (m_params.m_bc7enc_mode6_only) + m_bc7enc_pack_params.m_mode_mask = 1 << 6; + + if ((m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) && (m_params.m_rdo_lambda > 0.0f)) + { + // Slam off perceptual in RDO mode - we don't support it (too slow). + m_params.m_perceptual = false; + m_bc7enc_pack_params.m_perceptual = false; + bc7enc_compress_block_params_init_linear_weights(&m_bc7enc_pack_params); + } + + if ((m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) && (m_params.m_bc7enc_reduce_entropy)) + { + // Configure the BC7 encoder with some decent parameters for later RDO post-processing. + // Textures with alpha are harder for BC7 to handle, so we use more conservative defaults. + + m_bc7enc_pack_params.m_mode17_partition_estimation_filterbank = false; + + if (m_params.m_bc7enc_rdo_bc7_weight_modes) + { + // Weight modes 5 and especially 6 more highly than the other modes. + if (m_has_alpha) + { + m_bc7enc_pack_params.m_mode5_error_weight = .7f; + m_bc7enc_pack_params.m_mode6_error_weight = .6f; + } + else + { + m_bc7enc_pack_params.m_mode6_error_weight = .4f; + } + } + + if (m_params.m_bc7enc_rdo_bc7_weight_low_frequency_partitions) + { + // Slightly prefer the lower frequency partition patterns. + m_bc7enc_pack_params.m_low_frequency_partition_weight = .9999f; + } + + if (m_params.m_bc7enc_rdo_bc7_quant_mode6_endpoints) + { + // As a good default, don't quantize mode 6 endpoints if the texture has alpha. This isn't required, but helps mask textures. + //if (!has_alpha) + m_bc7enc_pack_params.m_quant_mode6_endpoints = true; + } + + if (m_params.m_bc7enc_rdo_bc7_pbit1_weighting) + { + // Favor p-bit 0 vs. 1, to slightly lower the entropy of output blocks with p-bits + m_bc7enc_pack_params.m_pbit1_weight = 1.3f; + } + } + +#if SUPPORT_BC7E + // Now initialize the BC7 compressor's parameters. + + memset(&m_bc7e_pack_params, 0, sizeof(m_bc7e_pack_params)); + switch (m_params.m_bc7_uber_level) + { + case 0: + ispc::bc7e_compress_block_params_init_ultrafast(&m_bc7e_pack_params, m_params.m_perceptual); + break; + case 1: + ispc::bc7e_compress_block_params_init_veryfast(&m_bc7e_pack_params, m_params.m_perceptual); + break; + case 2: + ispc::bc7e_compress_block_params_init_fast(&m_bc7e_pack_params, m_params.m_perceptual); + break; + case 3: + ispc::bc7e_compress_block_params_init_basic(&m_bc7e_pack_params, m_params.m_perceptual); + break; + case 4: + ispc::bc7e_compress_block_params_init_slow(&m_bc7e_pack_params, m_params.m_perceptual); + break; + case 5: + ispc::bc7e_compress_block_params_init_veryslow(&m_bc7e_pack_params, m_params.m_perceptual); + break; + case 6: + default: + ispc::bc7e_compress_block_params_init_slowest(&m_bc7e_pack_params, m_params.m_perceptual); + break; + } +#endif + + if (m_params.m_status_output) + { + if (m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) + { + if ((SUPPORT_BC7E) && (m_params.m_use_bc7e)) + printf("bc7e.ispc uber level: %u, perceptual: %u\n", m_params.m_bc7_uber_level, m_params.m_perceptual); + else + { + printf("\nbc7enc parameters:\n"); + m_bc7enc_pack_params.print(); + } + } + else + { + printf("BC1 level: %u, use 3-color mode: %u, use 3-color mode for black: %u, bc1_mode: %u\n", + m_params.m_bc1_quality_level, m_params.m_use_bc1_3color_mode, m_params.m_use_bc1_3color_mode_for_black, (int)m_params.m_bc1_mode); + } + + if ((m_params.m_dxgi_format == DXGI_FORMAT_BC3_UNORM) || (m_params.m_dxgi_format == DXGI_FORMAT_BC4_UNORM) || (m_params.m_dxgi_format == DXGI_FORMAT_BC5_UNORM)) + { + printf("Use high quality BC4 block encoder: %u, BC4 block radius: %u, use 6 value mode: %u, use 8 value mode: %u\n", + m_params.m_use_hq_bc345, m_params.m_bc345_search_rad, (m_params.m_bc345_mode_mask & 2) != 0, (m_params.m_bc345_mode_mask & 1) != 0); + } + + printf("\nrdo_bc_params:\n"); + printf(" Perceptual: %u\n", m_params.m_perceptual); + printf(" Y Flip: %u\n", m_params.m_y_flip); + printf(" DXGI format: 0x%X %s\n", m_params.m_dxgi_format, get_dxgi_format_string(m_params.m_dxgi_format)); + + printf("BC1-5 parameters:\n"); + printf(" BC45 channels: %u %u\n", m_params.m_bc45_channel0, m_params.m_bc45_channel1); + printf(" BC1 approximation mode: %u\n", (int)m_params.m_bc1_mode); + printf(" Use BC1 3-color mode: %u\n", m_params.m_use_bc1_3color_mode); + printf(" Use BC1 3-color mode for black: %u\n", m_params.m_use_bc1_3color_mode_for_black); + printf(" BC1 quality level: %u\n", m_params.m_bc1_quality_level); + printf(" Use HQ BC345: %u\n", m_params.m_use_hq_bc345); + printf(" BC345 search radius: %u\n", m_params.m_bc345_search_rad); + printf(" BC345 mode mask: 0x%X\n", m_params.m_bc345_mode_mask); + + printf("BC7 parameters:\n"); + printf(" Use bc7e: %u\n", m_params.m_use_bc7e); + printf(" BC7 uber level: %u\n", m_params.m_bc7_uber_level); + + printf("RDO parameters:\n"); + printf(" Lambda: %f\n", m_params.m_rdo_lambda); + printf(" Lookback window size: %u\n", m_params.m_lookback_window_size); + printf(" Custom lookback window size: %u\n", m_params.m_custom_lookback_window_size); + printf(" Try 2 matches: %u\n", m_params.m_rdo_try_2_matches); + printf(" Smooth block error scale: %f\n", m_params.m_rdo_smooth_block_error_scale); + printf(" Custom RDO smooth block error scale: %u\n", m_params.m_custom_rdo_smooth_block_error_scale); + printf(" Max smooth block std dev: %f\n", m_params.m_rdo_max_smooth_block_std_dev); + printf(" Allow relative movement: %u\n", m_params.m_rdo_allow_relative_movement); + printf(" Ultrasmooth block handling: %u\n", m_params.m_rdo_ultrasmooth_block_handling); + printf(" Multithreading: %u, max threads: %u\n", m_params.m_rdo_multithreading, m_params.m_rdo_max_threads); + + printf("bc7enc parameters:\n"); + printf(" Mode 6 only: %u\n", m_params.m_bc7enc_mode6_only); + printf(" Max partitions to scan: %u\n", m_params.m_bc7enc_max_partitions_to_scan); + printf(" Quant mode 6 endpoints: %u\n", m_params.m_bc7enc_rdo_bc7_quant_mode6_endpoints); + printf(" Weight modes: %u\n", m_params.m_bc7enc_rdo_bc7_weight_modes); + printf(" Weight low freq partitions: %u\n", m_params.m_bc7enc_rdo_bc7_weight_low_frequency_partitions); + printf(" P-bit1 weighting: %u\n", m_params.m_bc7enc_rdo_bc7_pbit1_weighting); + printf(" Reduce entropy mode: %u\n", m_params.m_bc7enc_reduce_entropy); + printf("\n"); + } + + return true; + } + + bool rdo_bc_encoder::init_source_image() + { + switch (m_params.m_dxgi_format) + { + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC4_UNORM: + m_pixel_format_bpp = 4; + break; + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC7_UNORM: + m_pixel_format_bpp = 8; + break; + default: + return false; + } + + m_bytes_per_block = (16 * m_pixel_format_bpp) / 8; + assert((m_bytes_per_block == 8) || (m_bytes_per_block == 16)); + + m_source_image = *m_pOrig_source_image; + + m_orig_width = m_source_image.width(); + m_orig_height = m_source_image.height(); + + if (m_params.m_y_flip) + { + utils::image_u8 temp; + temp.init(m_orig_width, m_orig_height); + + for (uint32_t y = 0; y < m_orig_height; y++) + for (uint32_t x = 0; x < m_orig_width; x++) + temp(x, (m_orig_height - 1) - y) = m_source_image(x, y); + + temp.swap(m_source_image); + } + + m_source_image.crop_dup_borders((m_source_image.width() + 3) & ~3, (m_source_image.height() + 3) & ~3); + + m_blocks_x = m_source_image.width() / 4; + m_blocks_y = m_source_image.height() / 4; + m_total_blocks = m_blocks_x * m_blocks_y; + m_total_texels = m_total_blocks * 16; + + bool has_alpha = false; + for (int by = 0; by < ((int)m_blocks_y) && !has_alpha; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + color_quad_u8 pixels[16]; + m_source_image.get_block(bx, by, 4, 4, pixels); + + for (uint32_t i = 0; i < 16; i++) + { + if (pixels[i].m_c[3] < 255) + { + has_alpha = true; + break; + } + } + } + } + + if (m_pixel_format_bpp == 8) + m_packed_image16.resize(m_total_blocks); + else + m_packed_image8.resize(m_total_blocks); + + return true; + } + + bool rdo_bc_encoder::encode_texture() + { + clock_t start_t = clock(); + + uint32_t bc7_mode_hist[8]; + memset(bc7_mode_hist, 0, sizeof(bc7_mode_hist)); + +#if SUPPORT_BC7E + if ((m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) && (m_params.m_use_bc7e)) + { + if (m_params.m_status_output) + printf("Using bc7e: "); + +#pragma omp parallel for + for (int32_t by = 0; by < static_cast(m_blocks_y); by++) + { + // Process 64 blocks at a time, for efficient SIMD processing. + // Ideally, N >= 8 (or more) and (N % 8) == 0. + const int N = 64; + + for (uint32_t bx = 0; bx < m_blocks_x; bx += N) + { + const uint32_t num_blocks_to_process = std::min(m_blocks_x - bx, N); + + color_quad_u8 pixels[16 * N]; + + // Extract num_blocks_to_process 4x4 pixel blocks from the source image and put them into the pixels[] array. + for (uint32_t b = 0; b < num_blocks_to_process; b++) + m_source_image.get_block(bx + b, by, 4, 4, pixels + b * 16); + + // Compress the blocks to BC7. + // Note: If you've used Intel's ispc_texcomp, the input pixels are different. BC7E requires a pointer to an array of 16 pixels for each block. + block16* pBlock = &m_packed_image16[bx + by * m_blocks_x]; + ispc::bc7e_compress_blocks(num_blocks_to_process, reinterpret_cast(pBlock), reinterpret_cast(pixels), &m_bc7e_pack_params); + } + + if (m_params.m_status_output) + { + if ((by & 63) == 0) + printf("."); + } + } + + for (int by = 0; by < (int)m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + block16* pBlock = &m_packed_image16[bx + by * m_blocks_x]; + + uint32_t mode = ((uint8_t*)pBlock)[0]; + for (uint32_t m = 0; m <= 7; m++) + { + if (mode & (1 << m)) + { + bc7_mode_hist[m]++; + break; + } + } + } + } + } + else +#endif + { +#pragma omp parallel for + for (int by = 0; by < (int)m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + color_quad_u8 pixels[16]; + + m_source_image.get_block(bx, by, 4, 4, pixels); + + switch (m_params.m_dxgi_format) + { + case DXGI_FORMAT_BC1_UNORM: + { + block8* pBlock = &m_packed_image8[bx + by * m_blocks_x]; + + rgbcx::encode_bc1(m_params.m_bc1_quality_level, pBlock, &pixels[0].m_c[0], m_params.m_use_bc1_3color_mode, m_params.m_use_bc1_3color_mode_for_black); + break; + } + case DXGI_FORMAT_BC3_UNORM: + { + block16* pBlock = &m_packed_image16[bx + by * m_blocks_x]; + + if (m_params.m_use_hq_bc345) + rgbcx::encode_bc3_hq(m_params.m_bc1_quality_level, pBlock, &pixels[0].m_c[0], m_params.m_bc345_search_rad, m_params.m_bc345_mode_mask); + else + rgbcx::encode_bc3(m_params.m_bc1_quality_level, pBlock, &pixels[0].m_c[0]); + break; + } + case DXGI_FORMAT_BC4_UNORM: + { + block8* pBlock = &m_packed_image8[bx + by * m_blocks_x]; + + if (m_params.m_use_hq_bc345) + rgbcx::encode_bc4_hq(pBlock, &pixels[0].m_c[m_params.m_bc45_channel0], 4, m_params.m_bc345_search_rad, m_params.m_bc345_mode_mask); + else + rgbcx::encode_bc4(pBlock, &pixels[0].m_c[m_params.m_bc45_channel0], 4); + break; + } + case DXGI_FORMAT_BC5_UNORM: + { + block16* pBlock = &m_packed_image16[bx + by * m_blocks_x]; + + if (m_params.m_use_hq_bc345) + rgbcx::encode_bc5_hq(pBlock, &pixels[0].m_c[0], m_params.m_bc45_channel0, m_params.m_bc45_channel1, 4, m_params.m_bc345_search_rad, m_params.m_bc345_mode_mask); + else + rgbcx::encode_bc5(pBlock, &pixels[0].m_c[0], m_params.m_bc45_channel0, m_params.m_bc45_channel1, 4); + break; + } + case DXGI_FORMAT_BC7_UNORM: + { + block16* pBlock = &m_packed_image16[bx + by * m_blocks_x]; + + bc7enc_compress_block(pBlock, pixels, &m_bc7enc_pack_params); + +#pragma omp critical + { + uint32_t mode = ((uint8_t*)pBlock)[0]; + for (uint32_t m = 0; m <= 7; m++) + { + if (mode & (1 << m)) + { + bc7_mode_hist[m]++; + break; + } + } + } + + break; + } + default: + { + assert(0); + break; + } + } + } + + if (m_params.m_status_output) + { + if ((by & 127) == 0) + printf("."); + } + } + } + + clock_t end_t = clock(); + + if (m_params.m_status_output) + { + printf("\nTotal encoding time: %f secs\n", (double)(end_t - start_t) / CLOCKS_PER_SEC); + + if (m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) + { + printf("BC7 mode histogram:\n"); + for (uint32_t i = 0; i < 8; i++) + printf("%u: %u\n", i, bc7_mode_hist[i]); + } + } + + return true; + } + + bool rdo_bc_encoder::postprocess_rdo() + { + m_prerdo_packed_image8 = m_packed_image8; + m_prerdo_packed_image16 = m_packed_image16; + + // Post-process the data with Rate Distortion Optimization + if (m_params.m_rdo_lambda <= 0.0f) + return true; + + const uint32_t MIN_RDO_MULTITHREADING_BLOCKS = 4096; + const int rdo_total_threads = (m_params.m_rdo_multithreading && (m_params.m_rdo_max_threads > 1) && (m_total_blocks >= MIN_RDO_MULTITHREADING_BLOCKS)) ? m_params.m_rdo_max_threads : 1; + + if (m_params.m_status_output) + printf("rdo_total_threads: %u\n", rdo_total_threads); + + int blocks_remaining = m_total_blocks, cur_block_index = 0; + std::vector blocks_to_do(rdo_total_threads), first_block_index(rdo_total_threads); + for (int p = 0; p < rdo_total_threads; p++) + { + const int num_blocks = (p == (rdo_total_threads - 1)) ? blocks_remaining : (m_total_blocks / rdo_total_threads); + + blocks_to_do[p] = num_blocks; + first_block_index[p] = cur_block_index; + + cur_block_index += num_blocks; + blocks_remaining -= num_blocks; + } + + assert(!blocks_remaining && cur_block_index == (int)m_total_blocks); + + ert::reduce_entropy_params ert_p; + + ert_p.m_lambda = m_params.m_rdo_lambda; + ert_p.m_lookback_window_size = m_params.m_lookback_window_size; + ert_p.m_smooth_block_max_mse_scale = m_params.m_rdo_smooth_block_error_scale; + ert_p.m_max_smooth_block_std_dev = m_params.m_rdo_max_smooth_block_std_dev; + ert_p.m_debug_output = m_params.m_rdo_debug_output; + ert_p.m_try_two_matches = m_params.m_rdo_try_2_matches; + ert_p.m_allow_relative_movement = m_params.m_rdo_allow_relative_movement; + ert_p.m_skip_zero_mse_blocks = false; + + std::vector block_rgb_mse_scales(compute_block_mse_scales(m_source_image, m_blocks_x, m_blocks_y, m_total_blocks, m_params.m_rdo_debug_output)); + + std::vector block_pixels(m_total_blocks * 16); + + for (uint32_t by = 0; by < m_blocks_y; by++) + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + m_source_image.get_block(bx, by, 4, 4, (color_quad_u8*)&block_pixels[(bx + by * m_blocks_x) * 16]); + + unpacker_funcs block_unpackers; + block_unpackers.m_allow_3color_mode = m_params.m_use_bc1_3color_mode; + block_unpackers.m_use_bc1_3color_mode_for_black = m_params.m_use_bc1_3color_mode_for_black; + block_unpackers.m_mode = m_params.m_bc1_mode; + + if (m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) + { + ert_p.m_lookback_window_size = std::max(16U, m_params.m_lookback_window_size); + + // BC7 RDO + const uint32_t NUM_COMPONENTS = 4; + + if (!m_params.m_custom_rdo_smooth_block_error_scale) + { + // Attempt to compute a decent conservative smooth block MSE max scaling factor. + // No single smooth block scale setting can work for all textures (unless it's ridiuclously large, killing efficiency). + ert_p.m_smooth_block_max_mse_scale = lerp(15.0f, 50.0f, std::min(1.0f, ert_p.m_lambda / 4.0f)); + + if (m_params.m_status_output) + printf("Using an automatically computed smooth block error scale of %f (use -zb# to override)\n", ert_p.m_smooth_block_max_mse_scale); + } + + for (uint32_t by = 0; by < m_blocks_y; by++) + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + float& s = block_rgb_mse_scales[bx + by * m_blocks_x]; + if (s > 0.0f) + s = std::max(ert_p.m_smooth_block_max_mse_scale, s * std::min(ert_p.m_lambda, 3.0f)); + } + + if (m_params.m_status_output) + { + printf("\nERT parameters:\n"); + ert_p.print(); + printf("\n"); + } + + uint32_t total_modified = 0; + + clock_t rdo_start_t = clock(); + +#pragma omp parallel for + for (int p = 0; p < rdo_total_threads; p++) + { + const int first_block_to_encode = first_block_index[p]; + const int num_blocks_to_encode = blocks_to_do[p]; + if (!num_blocks_to_encode) + continue; + + uint32_t total_modified_local = 0; + + std::vector local_block_rgb_mse_scales(num_blocks_to_encode); + for (int i = 0; i < num_blocks_to_encode; i++) + local_block_rgb_mse_scales[i] = block_rgb_mse_scales[first_block_to_encode + i]; + + ert::reduce_entropy(&m_packed_image16[first_block_to_encode], num_blocks_to_encode, + 16, 16, 4, 4, NUM_COMPONENTS, + (ert::color_rgba*)&block_pixels[16 * first_block_to_encode], ert_p, total_modified_local, + unpacker_funcs::unpack_bc7_block, &block_unpackers, + m_params.m_rdo_ultrasmooth_block_handling ? &local_block_rgb_mse_scales : nullptr); + +#pragma omp critical + { + total_modified += total_modified_local; + } + } // p + + clock_t rdo_end_t = clock(); + + if (m_params.m_status_output) + { + printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC); + + printf("Total blocks modified: %u %3.2f%%\n", total_modified, total_modified * 100.0f / m_total_blocks); + + uint32_t bc7_mode_hist[8]; + memset(bc7_mode_hist, 0, sizeof(bc7_mode_hist)); + + for (int by = 0; by < (int)m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + block16* pBlock = &m_packed_image16[bx + by * m_blocks_x]; + + const uint32_t mode_byte = ((uint8_t*)pBlock)[0]; + + uint32_t m; + for (m = 0; m <= 7; m++) + { + if (mode_byte & (1 << m)) + { + bc7_mode_hist[m]++; + break; + } + } + assert(m != 8); + } + } + + printf("BC7 mode histogram:\n"); + for (uint32_t i = 0; i < 8; i++) + printf("%u: %u\n", i, bc7_mode_hist[i]); + } + } + else if (m_params.m_dxgi_format == DXGI_FORMAT_BC5_UNORM) + { + // BC5 RDO - One BC4 block for R followed by one BC4 block for G + + ert_p.m_lookback_window_size = std::max(16U, m_params.m_lookback_window_size); + + std::vector block_pixels_r(m_total_blocks * 16), block_pixels_g(m_total_blocks * 16); + + for (uint32_t by = 0; by < m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + color_quad_u8 orig_block[16]; + m_source_image.get_block(bx, by, 4, 4, orig_block); + + color_quad_u8* pDst_block_r = (color_quad_u8*)&block_pixels_r[(bx + by * m_blocks_x) * 16]; + color_quad_u8* pDst_block_g = (color_quad_u8*)&block_pixels_g[(bx + by * m_blocks_x) * 16]; + + for (uint32_t i = 0; i < 16; i++) + { + pDst_block_r[i].set(orig_block[i].r, 0, 0, 0); + pDst_block_g[i].set(orig_block[i].g, 0, 0, 0); + } + } + } + + const uint32_t NUM_COMPONENTS = 1; + + ert_p.m_color_weights[1] = 0; + ert_p.m_color_weights[2] = 0; + ert_p.m_color_weights[3] = 0; + + if (!m_params.m_custom_rdo_smooth_block_error_scale) + { + // Attempt to compute a decent conservative smooth block MSE max scaling factor. + // No single smooth block scale setting can work for all textures (unless it's ridiuclously large, killing efficiency). + ert_p.m_smooth_block_max_mse_scale = lerp(10.0f, 30.0f, std::min(1.0f, ert_p.m_lambda / 4.0f)); + + if (m_params.m_status_output) + printf("Using an automatically computed smooth block error scale of %f (use -zb# to override)\n", ert_p.m_smooth_block_max_mse_scale); + } + + if (m_params.m_status_output) + { + printf("\nERT parameters:\n"); + ert_p.print(); + printf("\n"); + } + + uint32_t total_modified_r = 0, total_modified_g = 0; + + clock_t rdo_start_t = clock(); + +#pragma omp parallel for + for (int p = 0; p < rdo_total_threads; p++) + { + const int first_block_to_encode = first_block_index[p]; + const int num_blocks_to_encode = blocks_to_do[p]; + if (!num_blocks_to_encode) + continue; + + uint32_t total_modified_local_r = 0, total_modified_local_g = 0; + + ert::reduce_entropy(&m_packed_image16[first_block_to_encode], num_blocks_to_encode, + 2 * sizeof(rgbcx::bc4_block), sizeof(rgbcx::bc4_block), 4, 4, NUM_COMPONENTS, + (ert::color_rgba*)&block_pixels_r[16 * first_block_to_encode], ert_p, total_modified_local_r, + unpacker_funcs::unpack_bc4_block, &block_unpackers); + + ert::reduce_entropy((uint8_t*)&m_packed_image16[first_block_to_encode] + sizeof(rgbcx::bc4_block), num_blocks_to_encode, + 2 * sizeof(rgbcx::bc4_block), sizeof(rgbcx::bc4_block), 4, 4, NUM_COMPONENTS, + (ert::color_rgba*)&block_pixels_g[16 * first_block_to_encode], ert_p, total_modified_local_g, + unpacker_funcs::unpack_bc4_block, &block_unpackers); + +#pragma omp critical + { + total_modified_r += total_modified_local_r; + total_modified_g += total_modified_local_g; + } + } // p + + clock_t rdo_end_t = clock(); + + if (m_params.m_status_output) + { + printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC); + + printf("Total blocks modified R: %u %3.2f%%\n", total_modified_r, total_modified_r * 100.0f / m_total_blocks); + printf("Total blocks modified G: %u %3.2f%%\n", total_modified_g, total_modified_g * 100.0f / m_total_blocks); + } + } + else if (m_params.m_dxgi_format == DXGI_FORMAT_BC4_UNORM) + { + // BC4 RDO - One BC4 block for R + + const uint32_t NUM_COMPONENTS = 1; + + ert_p.m_color_weights[1] = 0; + ert_p.m_color_weights[2] = 0; + ert_p.m_color_weights[3] = 0; + + if (!m_params.m_custom_rdo_smooth_block_error_scale) + { + // Attempt to compute a decent conservative smooth block MSE max scaling factor. + // No single smooth block scale setting can work for all textures (unless it's ridiuclously large, killing efficiency). + ert_p.m_smooth_block_max_mse_scale = lerp(10.0f, 30.0f, std::min(1.0f, ert_p.m_lambda / 4.0f)); + + if (m_params.m_status_output) + printf("Using an automatically computed smooth block error scale of %f (use -zb# to override)\n", ert_p.m_smooth_block_max_mse_scale); + } + + if (m_params.m_status_output) + { + printf("\nERT parameters:\n"); + ert_p.print(); + printf("\n"); + } + + uint32_t total_modified = 0; + + clock_t rdo_start_t = clock(); + +#pragma omp parallel for + for (int p = 0; p < rdo_total_threads; p++) + { + const int first_block_to_encode = first_block_index[p]; + const int num_blocks_to_encode = blocks_to_do[p]; + if (!num_blocks_to_encode) + continue; + + uint32_t total_modified_local = 0; + + ert::reduce_entropy(&m_packed_image8[first_block_to_encode], num_blocks_to_encode, + sizeof(rgbcx::bc4_block), sizeof(rgbcx::bc4_block), 4, 4, NUM_COMPONENTS, + (ert::color_rgba*)&block_pixels[16 * first_block_to_encode], ert_p, total_modified_local, + unpacker_funcs::unpack_bc4_block, &block_unpackers); + +#pragma omp critical + { + total_modified += total_modified_local; + } + } // p + + clock_t rdo_end_t = clock(); + + if (m_params.m_status_output) + { + printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC); + + printf("Total blocks modified: %u %3.2f%%\n", total_modified, total_modified * 100.0f / m_total_blocks); + } + } + else if (m_params.m_dxgi_format == DXGI_FORMAT_BC1_UNORM) + { + // BC1 RDO - One BC1 block + const uint32_t NUM_COMPONENTS = 3; + + ert_p.m_color_weights[3] = 0; + + if (!m_params.m_custom_rdo_smooth_block_error_scale) + { + // This is just a hack - no single setting can work for all textures. + ert_p.m_smooth_block_max_mse_scale = lerp(15.0f, 50.0f, std::min(1.0f, ert_p.m_lambda / 8.0f)); + + if (m_params.m_status_output) + printf("Using an automatically computed smooth block error scale of %f (use -zb# to override)\n", ert_p.m_smooth_block_max_mse_scale); + } + + for (uint32_t by = 0; by < m_blocks_y; by++) + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + float& s = block_rgb_mse_scales[bx + by * m_blocks_x]; + if (s > 0.0f) + s = std::max(ert_p.m_smooth_block_max_mse_scale, s * std::min(ert_p.m_lambda, 3.0f)); + } + + printf("\nERT parameters:\n"); + ert_p.print(); + printf("\n"); + + uint32_t total_modified = 0; + + clock_t rdo_start_t = clock(); + +#pragma omp parallel for + for (int p = 0; p < rdo_total_threads; p++) + { + const int first_block_to_encode = first_block_index[p]; + const int num_blocks_to_encode = blocks_to_do[p]; + if (!num_blocks_to_encode) + continue; + + uint32_t total_modified_local = 0; + + std::vector local_block_rgb_mse_scales(num_blocks_to_encode); + for (int i = 0; i < num_blocks_to_encode; i++) + local_block_rgb_mse_scales[i] = block_rgb_mse_scales[first_block_to_encode + i]; + + ert::reduce_entropy(&m_packed_image8[first_block_to_encode], num_blocks_to_encode, + sizeof(rgbcx::bc1_block), sizeof(rgbcx::bc1_block), 4, 4, NUM_COMPONENTS, + (ert::color_rgba*)&block_pixels[16 * first_block_to_encode], ert_p, total_modified_local, + unpacker_funcs::unpack_bc1_block, &block_unpackers, + m_params.m_rdo_ultrasmooth_block_handling ? &local_block_rgb_mse_scales : nullptr); + +#pragma omp critical + { + total_modified += total_modified_local; + } + } // p + + clock_t rdo_end_t = clock(); + + if (m_params.m_status_output) + { + printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC); + + printf("Total blocks modified: %u %3.2f%%\n", + total_modified, total_modified * 100.0f / m_total_blocks); + } + } + else if (m_params.m_dxgi_format == DXGI_FORMAT_BC3_UNORM) + { + // BC3 RDO - One BC4 block followed by one BC1 block + + ert_p.m_lookback_window_size = std::max(16U, m_params.m_lookback_window_size); + + std::vector block_pixels_a(m_total_blocks * 16); + + for (uint32_t by = 0; by < m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + color_quad_u8 orig_block[16]; + m_source_image.get_block(bx, by, 4, 4, orig_block); + + color_quad_u8* pDst_block_a = (color_quad_u8*)&block_pixels_a[(bx + by * m_blocks_x) * 16]; + for (uint32_t i = 0; i < 16; i++) + pDst_block_a[i].set(orig_block[i].a, 0, 0, 0); + } + } + + ert_p.m_color_weights[3] = 0; + + ert::reduce_entropy_params ert_alpha_p(ert_p); + ert_alpha_p.m_color_weights[1] = 0; + ert_alpha_p.m_color_weights[2] = 0; + ert_alpha_p.m_color_weights[3] = 0; + + if (!m_params.m_custom_rdo_smooth_block_error_scale) + { + // This is just a hack - no single setting can work for all textures. + ert_p.m_smooth_block_max_mse_scale = lerp(15.0f, 50.0f, std::min(1.0f, ert_p.m_lambda / 8.0f)); + + if (m_params.m_status_output) + printf("Using an automatically computed smooth block error scale of %f (use -zb# to override) for RGB\n", ert_p.m_smooth_block_max_mse_scale); + + ert_alpha_p.m_smooth_block_max_mse_scale = lerp(10.0f, 30.0f, std::min(1.0f, ert_alpha_p.m_lambda / 4.0f)); + + if (m_params.m_status_output) + printf("Using an automatically computed smooth block error scale of %f for Alpha\n", ert_alpha_p.m_smooth_block_max_mse_scale); + } + + for (uint32_t by = 0; by < m_blocks_y; by++) + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + float& s = block_rgb_mse_scales[bx + by * m_blocks_x]; + if (s > 0.0f) + s = std::max(ert_p.m_smooth_block_max_mse_scale, s * std::min(ert_p.m_lambda, 3.0f)); + } + + if (m_params.m_status_output) + { + printf("\nERT RGB parameters:\n"); + ert_p.print(); + + printf("\nERT Alpha parameters:\n"); + ert_alpha_p.print(); + printf("\n"); + } + + uint32_t total_modified_rgb = 0, total_modified_alpha = 0; + + block_unpackers.m_allow_3color_mode = false; + block_unpackers.m_use_bc1_3color_mode_for_black = false; + + clock_t rdo_start_t = clock(); + +#pragma omp parallel for + for (int p = 0; p < rdo_total_threads; p++) + { + const int first_block_to_encode = first_block_index[p]; + const int num_blocks_to_encode = blocks_to_do[p]; + if (!num_blocks_to_encode) + continue; + + uint32_t total_modified_local_rgb = 0, total_modified_local_alpha = 0; + + ert::reduce_entropy((uint8_t*)&m_packed_image16[first_block_to_encode], num_blocks_to_encode, + sizeof(rgbcx::bc1_block) * 2, sizeof(rgbcx::bc4_block), 4, 4, 1, + (ert::color_rgba*)&block_pixels_a[16 * first_block_to_encode], ert_alpha_p, total_modified_local_alpha, + unpacker_funcs::unpack_bc4_block, &block_unpackers); + + std::vector local_block_rgb_mse_scales(num_blocks_to_encode); + for (int i = 0; i < num_blocks_to_encode; i++) + local_block_rgb_mse_scales[i] = block_rgb_mse_scales[first_block_to_encode + i]; + + ert::reduce_entropy((uint8_t*)&m_packed_image16[first_block_to_encode] + sizeof(rgbcx::bc1_block), num_blocks_to_encode, + sizeof(rgbcx::bc1_block) * 2, sizeof(rgbcx::bc1_block), 4, 4, 3, + (ert::color_rgba*)&block_pixels[16 * first_block_to_encode], ert_p, total_modified_local_rgb, + unpacker_funcs::unpack_bc1_block, &block_unpackers, + m_params.m_rdo_ultrasmooth_block_handling ? &local_block_rgb_mse_scales : nullptr); + +#pragma omp critical + { + total_modified_rgb += total_modified_local_rgb; + total_modified_alpha += total_modified_local_alpha; + } + } // p + + clock_t rdo_end_t = clock(); + + if (m_params.m_status_output) + { + printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC); + + printf("Total RGB blocks modified: %u %3.2f%%\n", total_modified_rgb, total_modified_rgb * 100.0f / m_total_blocks); + printf("Total Alpha blocks modified: %u %3.2f%%\n", total_modified_alpha, total_modified_alpha * 100.0f / m_total_blocks); + } + } + + return true; + } + + bool rdo_bc_encoder::unpack_blocks(image_u8& unpacked_image) const + { + unpacked_image.init(get_blocks_x() * 4, get_blocks_y() * 4); + + bool bc1_punchthrough_flag = false; + bool used_bc1_transparent_texels_for_black = false; + + bool unpack_failed = false; + +#pragma omp parallel for + for (int by = 0; by < (int)get_blocks_y(); by++) + { + for (uint32_t bx = 0; bx < get_blocks_x(); bx++) + { + const void* pBlock = (const uint8_t*)get_blocks() + (bx + by * get_blocks_x()) * get_bytes_per_block(); + + color_quad_u8 unpacked_pixels[16]; + for (uint32_t i = 0; i < 16; i++) + unpacked_pixels[i].set(0, 0, 0, 255); + + switch (m_params.m_dxgi_format) + { + case DXGI_FORMAT_BC1_UNORM: + { + const bool used_punchthrough = rgbcx::unpack_bc1(pBlock, unpacked_pixels, true, m_params.m_bc1_mode); + + if (used_punchthrough) + { + bc1_punchthrough_flag = true; + + const rgbcx::bc1_block* pBC1_block = (const rgbcx::bc1_block*)pBlock; + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + if (pBC1_block->get_selector(x, y) == 3) + used_bc1_transparent_texels_for_black = true; + } + + break; + } + case DXGI_FORMAT_BC3_UNORM: + { + if (!rgbcx::unpack_bc3(pBlock, unpacked_pixels, m_params.m_bc1_mode)) + bc1_punchthrough_flag = true; + break; + } + case DXGI_FORMAT_BC4_UNORM: + { + rgbcx::unpack_bc4(pBlock, &unpacked_pixels[0][0], 4); + +#if DECODE_BC4_TO_GRAYSCALE + for (uint32_t i = 0; i < 16; i++) + { + unpacked_pixels[i][1] = unpacked_pixels[i][0]; + unpacked_pixels[i][2] = unpacked_pixels[i][0]; + } +#endif + break; + } + case DXGI_FORMAT_BC5_UNORM: + { + rgbcx::unpack_bc5(pBlock, &unpacked_pixels[0][0], 0, 1, 4); + break; + } + case DXGI_FORMAT_BC7_UNORM: + { + if (!bc7decomp::unpack_bc7((const uint8_t*)pBlock, (bc7decomp::color_rgba*)unpacked_pixels)) + { + fprintf(stderr, "bc7decomp::unpack_bc7() failed!\n"); + unpack_failed = true; + } + + // Now unpack the block using the non-SSE reference decoder, to make sure we get the same exact unpacked bits. + color_quad_u8 unpacked_pixels_ref[16]; + if (!bc7decomp_ref::unpack_bc7((const uint8_t*)pBlock, (bc7decomp::color_rgba*)unpacked_pixels_ref)) + { + fprintf(stderr, "bc7decomp::unpack_bc7_ref() failed!\n"); + unpack_failed = true; + } + + if (memcmp(unpacked_pixels, unpacked_pixels_ref, sizeof(unpacked_pixels)) != 0) + { + fprintf(stderr, "BC7 unpack verification failed!\n"); + unpack_failed = true; + } + + break; + } + default: + assert(0); + break; + } + + unpacked_image.set_block(bx, by, 4, 4, unpacked_pixels); + } // bx + } // by + + if (unpack_failed) + return false; + + // Sanity check the BC1/BC3 output + if (m_params.m_dxgi_format == DXGI_FORMAT_BC3_UNORM) + { + if (bc1_punchthrough_flag) + fprintf(stderr, "WARNING: BC3 mode selected, but rgbcx::unpack_bc3() returned one or more blocks using 3-color mode!\n"); + } + else if (m_params.m_dxgi_format == DXGI_FORMAT_BC1_UNORM) + { + if ((bc1_punchthrough_flag) && (!m_params.m_use_bc1_3color_mode)) + fprintf(stderr, "WARNING: BC1 output used 3-color mode, when this was disabled!\n"); + + if ((used_bc1_transparent_texels_for_black) && (!used_bc1_transparent_texels_for_black)) + fprintf(stderr, "WARNING: BC1 output used the transparent selector for black, when this was disabled!\n"); + } + + if (m_params.m_status_output) + { + if ((m_params.m_dxgi_format == DXGI_FORMAT_BC1_UNORM) || (m_params.m_dxgi_format == DXGI_FORMAT_BC3_UNORM)) + printf("Output used 3-color mode: %u, output used transparent texels for black: %u\n", bc1_punchthrough_flag, used_bc1_transparent_texels_for_black); + } + + return true; + } + +} // namespace rdo_bc diff --git a/libkram/bc7enc/rdo_bc_encoder.h b/libkram/bc7enc/rdo_bc_encoder.h new file mode 100644 index 0000000..469211e --- /dev/null +++ b/libkram/bc7enc/rdo_bc_encoder.h @@ -0,0 +1,269 @@ +// rdo_bc_encoder.h +#pragma once + +#ifndef SUPPORT_BC7E +#define SUPPORT_BC7E 0 +#endif + +#include "utils.h" +#include "ert.h" + +#include "bc7decomp.h" +#include "rgbcx.h" + +#include "bc7enc.h" + +#if SUPPORT_BC7E +#include "bc7e_ispc.h" +#endif + +//#include "dds_defs.h" + +// TODO: code below doesn't handle srgb case +enum DXGI_FORMAT +{ + DXGI_FORMAT_BC1_UNORM = 71, + DXGI_FORMAT_BC1_UNORM_SRGB = 72, + DXGI_FORMAT_BC2_UNORM = 74, + DXGI_FORMAT_BC2_UNORM_SRGB = 75, + DXGI_FORMAT_BC3_UNORM = 77, + DXGI_FORMAT_BC3_UNORM_SRGB = 78, + DXGI_FORMAT_BC4_UNORM = 80, + DXGI_FORMAT_BC4_SNORM = 81, + DXGI_FORMAT_BC5_UNORM = 83, + DXGI_FORMAT_BC5_SNORM = 84, + DXGI_FORMAT_BC6H_UF16 = 95, + DXGI_FORMAT_BC6H_SF16 = 96, + DXGI_FORMAT_BC7_UNORM = 98, + DXGI_FORMAT_BC7_UNORM_SRGB = 99, +}; + +namespace rdo_bc +{ + + struct rdo_bc_params + { + rdo_bc_params() + { + clear(); + } + + void clear() + { + m_bc7_uber_level = 6; // BC7ENC_MAX_UBER_LEVEL; + m_bc7enc_max_partitions_to_scan = BC7ENC_MAX_PARTITIONS; + m_perceptual = false; + m_y_flip = false; + m_bc45_channel0 = 0; + m_bc45_channel1 = 1; + + m_bc1_mode = rgbcx::bc1_approx_mode::cBC1Ideal; + m_use_bc1_3color_mode = true; + + // We're just turning this on by default now, like NVDXT.EXE used to do back in the old original Xbox days. + m_use_bc1_3color_mode_for_black = true; // false; + + m_bc1_quality_level = rgbcx::MAX_LEVEL; + + m_dxgi_format = DXGI_FORMAT_BC7_UNORM; + + m_rdo_lambda = 0.0f; + m_rdo_debug_output = false; + m_rdo_smooth_block_error_scale = 15.0f; + m_custom_rdo_smooth_block_error_scale = false; + m_lookback_window_size = 128; + m_custom_lookback_window_size = false; + m_bc7enc_rdo_bc7_quant_mode6_endpoints = true; + m_bc7enc_rdo_bc7_weight_modes = true; + m_bc7enc_rdo_bc7_weight_low_frequency_partitions = true; + m_bc7enc_rdo_bc7_pbit1_weighting = true; + m_rdo_max_smooth_block_std_dev = 18.0f; + m_rdo_allow_relative_movement = false; + m_rdo_try_2_matches = true; + m_rdo_ultrasmooth_block_handling = true; + + m_use_hq_bc345 = true; + m_bc345_search_rad = 5; + m_bc345_mode_mask = rgbcx::BC4_USE_ALL_MODES; + + m_bc7enc_mode6_only = false; + m_rdo_multithreading = true; + + m_bc7enc_reduce_entropy = false; + + m_use_bc7e = false; + +#if SUPPORT_BC7E + // By default, if they've compiled in BC7E.ispc, then use that. In a rate distortion sense it's better overall. + // https://richg42.blogspot.com/2021/02/average-rate-distortion-curves-for.html + m_use_bc7e = true; +#endif + + m_status_output = false; + + m_rdo_max_threads = 128; + } + + int m_bc7_uber_level; + int m_bc7enc_max_partitions_to_scan; + bool m_perceptual; + bool m_y_flip; + uint32_t m_bc45_channel0; + uint32_t m_bc45_channel1; + + rgbcx::bc1_approx_mode m_bc1_mode; + bool m_use_bc1_3color_mode; + + bool m_use_bc1_3color_mode_for_black; + + int m_bc1_quality_level; + + DXGI_FORMAT m_dxgi_format; + + float m_rdo_lambda; + bool m_rdo_debug_output; + float m_rdo_smooth_block_error_scale; + bool m_custom_rdo_smooth_block_error_scale; + uint32_t m_lookback_window_size; + bool m_custom_lookback_window_size; + bool m_bc7enc_rdo_bc7_quant_mode6_endpoints; + bool m_bc7enc_rdo_bc7_weight_modes; + bool m_bc7enc_rdo_bc7_weight_low_frequency_partitions; + bool m_bc7enc_rdo_bc7_pbit1_weighting; + float m_rdo_max_smooth_block_std_dev; + bool m_rdo_allow_relative_movement; + bool m_rdo_try_2_matches; + bool m_rdo_ultrasmooth_block_handling; + + bool m_use_hq_bc345; + int m_bc345_search_rad; + uint32_t m_bc345_mode_mask; + + bool m_bc7enc_mode6_only; + bool m_rdo_multithreading; + + bool m_bc7enc_reduce_entropy; + + bool m_use_bc7e; + bool m_status_output; + + uint32_t m_rdo_max_threads; + }; + + class rdo_bc_encoder + { + public: + rdo_bc_encoder(); + + void clear(); + + bool init(const utils::image_u8& src_image, rdo_bc_params& params); + bool encode(); + + const rdo_bc_params &get_params() const { return m_params; } + + const utils::image_u8* get_orig_source_image() const { return m_pOrig_source_image; } + const utils::image_u8& get_source_image() const { return m_source_image; } + + const void* get_prerdo_blocks() const { return m_prerdo_packed_image8.size() ? (void*)m_prerdo_packed_image8.data() : (void*)m_prerdo_packed_image16.data(); } + const void* get_blocks() const { return m_packed_image8.size() ? (void*)m_packed_image8.data() : (void*)m_packed_image16.data(); } + + bool unpack_blocks(utils::image_u8& unpacked_image) const; + + DXGI_FORMAT get_pixel_format() const { return m_params.m_dxgi_format; } + + uint32_t get_orig_width() const { return m_orig_width; } + uint32_t get_orig_height() const { return m_orig_height; } + uint32_t get_blocks_x() const { return m_blocks_x; } + uint32_t get_blocks_y() const { return m_blocks_y; } + uint32_t get_total_blocks() const { return m_total_blocks; } + uint32_t get_total_blocks_size_in_bytes() const { return m_total_blocks * m_bytes_per_block; } + uint32_t get_bytes_per_block() const { return m_bytes_per_block; } + uint32_t get_pixel_format_bpp() const { return m_pixel_format_bpp; } + uint32_t get_total_texels() const { return m_total_texels; } + bool get_has_alpha() const { return m_has_alpha; } + + private: + const utils::image_u8* m_pOrig_source_image; + utils::image_u8 m_source_image; + rdo_bc_params m_params; + + uint32_t m_orig_width, m_orig_height; + uint32_t m_blocks_x, m_blocks_y, m_total_blocks, m_bytes_per_block, m_pixel_format_bpp; + uint32_t m_total_texels; + bool m_has_alpha; + + utils::block8_vec m_packed_image8; + utils::block16_vec m_packed_image16; + + utils::block8_vec m_prerdo_packed_image8; + utils::block16_vec m_prerdo_packed_image16; + + bc7enc_compress_block_params m_bc7enc_pack_params; +#if SUPPORT_BC7E + ispc::bc7e_compress_block_params m_bc7e_pack_params; +#endif + + void init_encoders(); + bool init_source_image(); + bool init_encoder_params(); + bool encode_texture(); + + struct unpacker_funcs + { + rgbcx::bc1_approx_mode m_mode; + bool m_allow_3color_mode; + bool m_use_bc1_3color_mode_for_black; + + static bool unpack_bc1_block(const void* pBlock, ert::color_rgba* pPixels, uint32_t block_index, void* pUser_data) + { + (void)block_index; + const unpacker_funcs* pState = (const unpacker_funcs*)pUser_data; + + bool used_3color_mode = rgbcx::unpack_bc1(pBlock, pPixels, true, pState->m_mode); + + if (used_3color_mode) + { + if (!pState->m_allow_3color_mode) + return false; + + if (!pState->m_use_bc1_3color_mode_for_black) + { + rgbcx::bc1_block* pBC1_block = (rgbcx::bc1_block*)pBlock; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + if (pBC1_block->get_selector(x, y) == 3) + return false; + } // x + } // y + } + } + + return true; + } + + // TODO: Enforce 6/8 color constraints + static bool unpack_bc4_block(const void* pBlock, ert::color_rgba* pPixels, uint32_t block_index, void* pUser_data) + { + (void)block_index; + (void)pUser_data; + memset(pPixels, 0, sizeof(ert::color_rgba) * 16); + rgbcx::unpack_bc4(pBlock, (uint8_t*)pPixels, 4); + return true; + } + + static bool unpack_bc7_block(const void* pBlock, ert::color_rgba* pPixels, uint32_t block_index, void* pUser_data) + { + (void)block_index; + (void)pUser_data; + return bc7decomp::unpack_bc7(pBlock, (bc7decomp::color_rgba*)pPixels); + } + }; + + bool postprocess_rdo(); + }; + +} // namespace rdo_bc diff --git a/libkram/bc7enc/rgbcx.cpp b/libkram/bc7enc/rgbcx.cpp new file mode 100644 index 0000000..b0c4088 --- /dev/null +++ b/libkram/bc7enc/rgbcx.cpp @@ -0,0 +1,3083 @@ +// rgbcx.cpp - see license at end of rgbcx.h +#include "rgbcx.h" +#include +#include +#include + +namespace rgbcx +{ + //const uint8_t g_bc1_to_linear[4] = { 0, 3, 1, 2 }; + + const uint32_t NUM_UNIQUE_TOTAL_ORDERINGS4 = 969; + +#ifdef _MSC_VER +#pragma region +#endif + // All total orderings for 16 pixels 2-bit selectors. + // BC1 selector order 0, 2, 3, 1 (i.e. the selectors are reordered into linear order). + static uint8_t g_unique_total_orders4[NUM_UNIQUE_TOTAL_ORDERINGS4][4] = + { + {0,8,2,6},{4,3,9,0},{4,8,1,3},{12,0,3,1},{11,3,2,0},{6,4,6,0},{7,5,0,4},{6,0,8,2},{1,0,0,15},{3,0,8,5},{1,1,13,1},{13,1,2,0},{0,14,1,1},{0,15,1,0},{0,13,0,3},{16,0,0,0},{4,3,4,5},{8,6,0,2},{0,10,0,6},{10,0,4,2},{7,2,1,6},{4,7,5,0},{1,4,7,4},{0,14,2,0},{2,7,2,5},{9,0,5,2},{9,2,2,3},{10,0,5,1},{2,3,7,4},{4,9,0,3},{1,5,0,10},{1,1,6,8}, + {6,6,4,0},{11,5,0,0},{11,2,0,3},{4,0,10,2},{2,3,10,1},{1,13,1,1},{0,14,0,2},{2,3,3,8},{12,3,1,0},{14,0,0,2},{9,1,3,3},{6,4,0,6},{1,1,5,9},{5,9,0,2},{2,10,1,3},{12,0,0,4},{4,6,6,0},{0,6,4,6},{3,7,4,2},{0,13,3,0},{3,10,0,3},{10,2,1,3},{1,12,1,2},{2,0,13,1},{11,0,5,0},{12,1,3,0},{6,4,5,1},{10,4,2,0},{3,6,1,6},{7,3,6,0},{10,4,0,2},{10,0,2,4}, + {0,5,9,2},{0,9,3,4},{6,4,2,4},{3,4,7,2},{3,3,5,5},{4,2,9,1},{6,2,8,0},{3,5,3,5},{4,10,1,1},{10,1,3,2},{5,7,0,4},{5,3,7,1},{6,8,1,1},{8,8,0,0},{11,1,0,4},{14,1,0,1},{9,3,2,2},{8,2,1,5},{0,0,2,14},{3,3,9,1},{10,1,5,0},{8,3,1,4},{1,5,8,2},{6,1,9,0},{3,2,1,10},{3,11,1,1},{7,6,3,0},{9,0,3,4},{5,2,5,4},{0,2,3,11},{15,0,0,1},{0,6,6,4}, + {3,4,9,0},{4,7,0,5},{0,4,4,8},{0,13,2,1},{2,4,1,9},{3,2,5,6},{10,6,0,0},{3,5,6,2},{8,0,4,4},{1,3,6,6},{7,7,0,2},{6,1,4,5},{0,11,1,4},{2,2,8,4},{0,1,2,13},{15,0,1,0},{7,2,6,1},{8,1,7,0},{1,8,4,3},{2,13,1,0},{1,0,7,8},{14,2,0,0},{1,8,1,6},{9,3,3,1},{0,0,7,9},{4,4,1,7},{9,0,6,1},{10,2,4,0},{1,7,3,5},{0,3,8,5},{5,2,4,5},{1,2,5,8}, + {0,8,7,1},{10,3,2,1},{12,0,4,0},{2,1,4,9},{5,2,2,7},{1,9,3,3},{15,1,0,0},{6,3,4,3},{9,5,0,2},{1,6,9,0},{6,6,0,4},{13,2,1,0},{5,1,8,2},{0,5,11,0},{7,1,0,8},{1,2,12,1},{0,3,3,10},{7,4,2,3},{5,1,4,6},{7,0,3,6},{3,12,0,1},{3,4,5,4},{1,10,0,5},{7,4,3,2},{10,5,0,1},{13,3,0,0},{2,5,4,5},{3,10,1,2},{5,1,2,8},{14,0,1,1},{1,5,4,6},{1,4,5,6}, + {2,3,11,0},{11,0,4,1},{11,2,2,1},{5,3,8,0},{1,3,10,2},{0,1,13,2},{3,1,4,8},{4,2,4,6},{1,5,6,4},{2,1,11,2},{1,2,9,4},{4,7,3,2},{6,2,5,3},{7,2,2,5},{8,1,4,3},{3,2,8,3},{12,1,0,3},{7,8,1,0},{7,0,2,7},{5,10,0,1},{0,2,14,0},{2,9,3,2},{7,0,0,9},{11,1,4,0},{10,4,1,1},{2,2,9,3},{5,7,2,2},{1,3,1,11},{13,2,0,1},{4,2,8,2},{2,3,1,10},{4,2,5,5}, + {7,0,7,2},{10,0,0,6},{0,8,5,3},{4,4,0,8},{12,4,0,0},{0,1,14,1},{8,0,1,7},{5,1,5,5},{11,0,3,2},{0,4,1,11},{0,8,8,0},{0,2,5,9},{7,3,2,4},{7,8,0,1},{1,0,3,12},{7,4,5,0},{1,6,7,2},{7,6,1,2},{9,6,1,0},{12,2,0,2},{4,1,6,5},{4,0,1,11},{8,4,4,0},{13,0,1,2},{8,6,2,0},{4,12,0,0},{2,7,5,2},{2,0,5,9},{5,4,5,2},{3,8,5,0},{7,3,3,3},{4,4,8,0}, + {2,1,3,10},{5,0,1,10},{6,4,3,3},{4,9,1,2},{1,4,0,11},{11,3,1,1},{4,0,12,0},{13,0,0,3},{6,1,6,3},{9,0,4,3},{8,0,0,8},{8,4,0,4},{0,12,1,3},{0,4,10,2},{3,4,8,1},{1,3,8,4},{9,2,5,0},{5,7,4,0},{1,0,11,4},{4,10,0,2},{1,3,12,0},{6,9,0,1},{5,0,9,2},{5,9,2,0},{13,1,0,2},{9,3,4,0},{9,4,0,3},{3,1,12,0},{2,4,3,7},{1,2,13,0},{2,2,4,8},{6,8,0,2}, + {9,2,1,4},{9,5,1,1},{2,0,4,10},{5,4,0,7},{0,0,6,10},{1,2,0,13},{4,7,2,3},{6,5,5,0},{3,3,1,9},{1,6,1,8},{12,2,1,1},{4,4,5,3},{1,0,6,9},{0,6,10,0},{4,8,3,1},{4,3,2,7},{2,1,7,6},{1,9,1,5},{3,1,3,9},{8,7,1,0},{1,2,3,10},{14,1,1,0},{5,4,4,3},{3,7,0,6},{7,4,1,4},{3,7,5,1},{1,1,0,14},{0,10,3,3},{0,4,3,9},{1,7,7,1},{2,0,10,4},{5,8,0,3}, + {6,7,3,0},{0,8,4,4},{5,7,3,1},{7,9,0,0},{7,6,2,1},{0,4,5,7},{6,3,5,2},{1,2,1,12},{5,2,0,9},{8,5,0,3},{4,6,1,5},{1,1,7,7},{10,5,1,0},{1,2,8,5},{1,8,2,5},{5,1,0,10},{6,9,1,0},{13,0,2,1},{8,3,5,0},{6,3,6,1},{2,11,3,0},{3,7,3,3},{1,5,2,8},{7,5,2,2},{0,6,7,3},{13,1,1,1},{5,3,4,4},{7,2,7,0},{5,8,3,0},{3,13,0,0},{0,7,9,0},{8,0,3,5}, + {1,3,7,5},{4,0,2,10},{12,0,1,3},{1,7,6,2},{3,9,0,4},{7,2,0,7},{0,1,7,8},{2,1,8,5},{0,13,1,2},{0,8,1,7},{5,0,11,0},{5,6,2,3},{0,3,0,13},{2,3,4,7},{5,6,3,2},{4,2,10,0},{3,3,7,3},{7,2,5,2},{1,1,11,3},{12,3,0,1},{5,1,1,9},{1,15,0,0},{9,7,0,0},{9,1,2,4},{0,7,3,6},{3,0,13,0},{3,0,11,2},{0,6,5,5},{8,2,2,4},{6,10,0,0},{4,8,4,0},{0,0,3,13}, + {0,4,12,0},{7,1,6,2},{3,5,0,8},{8,0,6,2},{6,2,3,5},{2,10,0,4},{4,11,0,1},{6,1,5,4},{5,1,3,7},{0,11,3,2},{4,6,0,6},{2,6,0,8},{3,1,7,5},{2,14,0,0},{2,9,2,3},{0,3,4,9},{11,0,1,4},{13,0,3,0},{8,3,0,5},{0,5,3,8},{5,11,0,0},{0,1,4,11},{2,1,9,4},{3,4,4,5},{7,1,2,6},{12,2,2,0},{9,4,1,2},{6,0,2,8},{4,6,2,4},{11,2,3,0},{3,2,2,9},{10,3,1,2}, + {1,1,2,12},{0,5,2,9},{0,1,11,4},{6,2,4,4},{2,8,2,4},{0,9,4,3},{11,0,2,3},{0,2,11,3},{6,0,7,3},{0,3,6,7},{4,5,5,2},{1,2,6,7},{7,5,1,3},{9,0,2,5},{2,6,4,4},{4,1,9,2},{4,8,2,2},{1,12,3,0},{0,9,6,1},{0,10,6,0},{3,1,5,7},{2,13,0,1},{2,2,1,11},{3,6,0,7},{5,6,5,0},{5,5,4,2},{4,0,3,9},{3,4,1,8},{0,11,2,3},{2,12,1,1},{7,1,3,5},{7,0,9,0}, + {8,0,8,0},{1,0,2,13},{3,3,10,0},{2,4,4,6},{2,3,8,3},{1,10,5,0},{7,3,0,6},{2,9,0,5},{1,4,6,5},{6,6,3,1},{5,6,0,5},{6,3,0,7},{3,10,2,1},{2,5,5,4},{3,8,4,1},{1,14,0,1},{10,3,3,0},{3,5,7,1},{1,1,3,11},{2,4,0,10},{9,3,1,3},{5,10,1,0},{3,0,6,7},{3,1,9,3},{11,2,1,2},{5,3,3,5},{0,5,1,10},{4,1,11,0},{10,2,0,4},{7,6,0,3},{2,7,0,7},{4,2,2,8}, + {6,1,7,2},{4,9,2,1},{0,0,8,8},{3,7,2,4},{9,6,0,1},{0,12,4,0},{6,7,1,2},{0,7,2,7},{1,0,10,5},{0,0,14,2},{2,7,3,4},{5,0,0,11},{7,7,1,1},{6,2,7,1},{4,5,3,4},{3,5,1,7},{5,9,1,1},{6,2,1,7},{3,2,0,11},{0,11,0,5},{3,11,2,0},{10,1,4,1},{7,0,4,5},{11,4,0,1},{10,3,0,3},{0,2,4,10},{0,15,0,1},{0,11,5,0},{6,7,2,1},{1,12,2,1},{4,1,3,8},{1,0,13,2}, + {1,8,5,2},{7,0,1,8},{3,12,1,0},{9,2,4,1},{1,7,4,4},{11,4,1,0},{4,3,8,1},{2,8,4,2},{1,11,3,1},{1,1,4,10},{4,10,2,0},{8,2,5,1},{1,0,9,6},{5,3,2,6},{0,9,7,0},{10,2,2,2},{5,8,1,2},{8,7,0,1},{0,3,12,1},{1,0,1,14},{4,8,0,4},{3,8,0,5},{4,6,5,1},{0,9,5,2},{10,2,3,1},{2,3,9,2},{1,0,12,3},{11,3,0,2},{4,5,2,5},{0,2,12,2},{9,1,0,6},{9,2,0,5}, + {1,2,7,6},{4,7,4,1},{0,12,2,2},{0,0,0,16},{2,8,3,3},{3,6,2,5},{0,6,3,7},{7,5,4,0},{3,3,3,7},{3,3,0,10},{5,0,6,5},{0,0,10,6},{8,5,3,0},{8,1,5,2},{6,0,9,1},{11,1,2,2},{2,11,2,1},{9,5,2,0},{3,0,4,9},{2,2,12,0},{2,6,6,2},{2,1,13,0},{6,0,5,5},{2,0,14,0},{2,11,1,2},{4,4,7,1},{2,0,11,3},{3,1,1,11},{2,9,4,1},{3,7,6,0},{14,0,2,0},{1,10,4,1}, + {8,0,7,1},{3,6,5,2},{0,3,11,2},{2,5,6,3},{11,1,3,1},{6,5,3,2},{3,8,1,4},{0,2,7,7},{2,10,2,2},{1,6,2,7},{11,0,0,5},{12,1,1,2},{12,1,2,1},{0,7,1,8},{0,3,9,4},{0,2,1,13},{7,1,4,4},{10,1,0,5},{4,0,8,4},{5,2,7,2},{0,2,0,14},{4,3,7,2},{2,7,1,6},{1,2,2,11},{6,3,3,4},{1,14,1,0},{2,4,6,4},{5,3,6,2},{5,3,5,3},{8,4,1,3},{1,3,0,12},{3,5,2,6}, + {1,8,7,0},{0,7,4,5},{2,1,6,7},{4,11,1,0},{7,2,4,3},{6,1,3,6},{4,5,4,3},{2,11,0,3},{1,5,7,3},{12,0,2,2},{5,0,4,7},{1,13,0,2},{7,7,2,0},{4,1,7,4},{4,5,0,7},{5,0,5,6},{6,5,4,1},{2,4,2,8},{1,10,1,4},{6,3,1,6},{3,3,8,2},{0,7,7,2},{4,4,2,6},{1,1,8,6},{1,12,0,3},{2,1,12,1},{1,9,2,4},{1,11,0,4},{2,5,2,7},{10,0,3,3},{4,6,3,3},{3,7,1,5}, + {1,9,0,6},{7,1,7,1},{1,6,5,4},{9,2,3,2},{6,2,2,6},{2,2,2,10},{8,3,3,2},{0,1,8,7},{2,0,8,6},{0,3,1,12},{9,4,2,1},{9,4,3,0},{6,2,6,2},{1,8,0,7},{5,1,10,0},{0,5,5,6},{8,2,4,2},{2,3,2,9},{6,0,3,7},{2,2,6,6},{2,6,2,6},{1,13,2,0},{9,3,0,4},{7,3,5,1},{6,5,2,3},{5,2,6,3},{2,0,12,2},{5,7,1,3},{8,1,3,4},{3,1,10,2},{1,0,15,0},{0,8,0,8}, + {5,0,7,4},{4,4,6,2},{0,1,0,15},{10,0,1,5},{7,3,4,2},{4,9,3,0},{2,5,7,2},{3,4,2,7},{8,3,2,3},{5,1,6,4},{0,10,2,4},{6,6,1,3},{6,0,0,10},{4,4,3,5},{1,3,9,3},{7,5,3,1},{3,0,7,6},{1,8,6,1},{4,3,0,9},{3,11,0,2},{6,0,6,4},{0,1,3,12},{0,4,2,10},{5,5,6,0},{4,1,4,7},{8,1,6,1},{5,6,4,1},{8,4,2,2},{4,3,1,8},{3,0,2,11},{1,11,4,0},{0,8,3,5}, + {5,1,7,3},{7,0,8,1},{4,3,5,4},{4,6,4,2},{3,2,4,7},{1,6,3,6},{0,7,8,1},{3,0,1,12},{9,1,4,2},{7,4,0,5},{1,7,0,8},{5,4,1,6},{9,1,5,1},{1,1,9,5},{4,1,1,10},{5,3,0,8},{2,2,5,7},{4,0,0,12},{9,0,7,0},{3,4,0,9},{0,2,6,8},{8,2,0,6},{3,2,6,5},{4,2,6,4},{3,6,4,3},{2,8,6,0},{5,0,3,8},{0,4,0,12},{0,16,0,0},{0,9,2,5},{4,0,11,1},{1,6,4,5}, + {0,1,6,9},{3,4,6,3},{3,0,10,3},{7,0,6,3},{1,4,9,2},{1,5,3,7},{8,5,2,1},{0,12,0,4},{7,2,3,4},{0,5,6,5},{11,1,1,3},{6,5,0,5},{2,1,5,8},{1,4,11,0},{9,1,1,5},{0,0,13,3},{5,8,2,1},{2,12,0,2},{3,3,6,4},{4,1,10,1},{4,0,5,7},{8,1,0,7},{5,1,9,1},{4,3,3,6},{0,2,2,12},{6,3,2,5},{0,0,12,4},{1,5,1,9},{2,6,5,3},{3,6,3,4},{2,12,2,0},{1,6,8,1}, + {10,1,1,4},{1,3,4,8},{7,4,4,1},{1,11,1,3},{1,2,10,3},{3,9,3,1},{8,5,1,2},{2,10,4,0},{4,2,0,10},{2,7,6,1},{8,2,3,3},{1,5,5,5},{3,1,0,12},{3,10,3,0},{8,0,5,3},{0,6,8,2},{0,3,13,0},{0,0,16,0},{1,9,4,2},{4,1,8,3},{1,6,6,3},{0,10,5,1},{0,1,12,3},{4,0,6,6},{3,8,3,2},{0,5,4,7},{1,0,14,1},{0,4,6,6},{3,9,1,3},{3,5,8,0},{3,6,6,1},{5,4,7,0}, + {3,0,12,1},{8,6,1,1},{2,9,5,0},{6,1,1,8},{4,1,2,9},{3,9,4,0},{5,2,9,0},{0,12,3,1},{1,4,10,1},{4,0,7,5},{3,1,2,10},{5,4,2,5},{5,5,5,1},{4,2,3,7},{1,7,5,3},{2,8,0,6},{8,1,2,5},{3,8,2,3},{6,1,2,7},{3,9,2,2},{9,0,0,7},{0,8,6,2},{8,4,3,1},{0,2,8,6},{6,5,1,4},{2,3,5,6},{2,10,3,1},{0,7,0,9},{4,2,7,3},{2,4,8,2},{7,1,1,7},{2,4,7,3}, + {2,4,10,0},{0,1,10,5},{4,7,1,4},{0,10,4,2},{9,0,1,6},{1,9,6,0},{3,3,4,6},{4,5,7,0},{5,5,2,4},{2,8,1,5},{2,3,6,5},{0,1,1,14},{3,2,3,8},{10,1,2,3},{9,1,6,0},{3,4,3,6},{2,2,0,12},{0,0,9,7},{4,0,9,3},{7,0,5,4},{4,5,6,1},{2,5,1,8},{2,5,9,0},{3,5,4,4},{1,3,11,1},{7,1,5,3},{3,2,7,4},{1,4,2,9},{1,11,2,2},{2,2,3,9},{5,0,10,1},{3,2,11,0}, + {1,10,3,2},{8,3,4,1},{3,6,7,0},{0,7,5,4},{1,3,3,9},{2,2,10,2},{1,9,5,1},{0,5,0,11},{3,0,3,10},{0,4,8,4},{2,7,7,0},{2,0,2,12},{1,2,11,2},{6,3,7,0},{0,6,2,8},{0,10,1,5},{0,9,0,7},{6,4,4,2},{6,0,1,9},{1,5,10,0},{5,4,6,1},{5,5,3,3},{0,0,4,12},{0,3,2,11},{1,4,1,10},{3,0,9,4},{5,5,0,6},{1,7,8,0},{2,0,3,11},{6,4,1,5},{10,0,6,0},{0,6,0,10}, + {0,4,11,1},{3,1,6,6},{2,5,8,1},{0,2,10,4},{3,1,11,1},{6,6,2,2},{1,1,10,4},{2,1,2,11},{6,1,8,1},{0,2,13,1},{0,7,6,3},{6,8,2,0},{3,0,0,13},{4,4,4,4},{6,2,0,8},{7,3,1,5},{0,11,4,1},{6,7,0,3},{2,6,3,5},{5,2,1,8},{7,1,8,0},{5,5,1,5},{1,8,3,4},{8,2,6,0},{6,0,10,0},{5,6,1,4},{1,4,4,7},{2,7,4,3},{1,4,8,3},{5,4,3,4},{1,10,2,3},{2,9,1,4}, + {2,2,11,1},{2,5,0,9},{0,0,1,15},{0,0,11,5},{0,4,7,5},{0,1,15,0},{2,1,0,13},{0,3,10,3},{8,0,2,6},{3,3,2,8},{3,5,5,3},{1,7,1,7},{1,3,2,10},{4,0,4,8},{2,0,9,5},{1,1,1,13},{2,2,7,5},{2,1,10,3},{4,2,1,9},{4,3,6,3},{1,3,5,7},{2,5,3,6},{1,0,8,7},{5,0,2,9},{2,8,5,1},{1,6,0,9},{0,0,5,11},{0,4,9,3},{2,0,7,7},{1,7,2,6},{2,1,1,12},{2,4,9,1}, + {0,5,7,4},{6,0,4,6},{3,2,10,1},{0,6,1,9},{2,6,1,7},{0,5,8,3},{4,1,0,11},{1,2,4,9},{4,1,5,6},{6,1,0,9},{1,4,3,8},{4,5,1,6},{1,0,5,10},{5,3,1,7},{0,9,1,6},{2,0,1,13},{2,0,6,8},{8,1,1,6},{1,5,9,1},{0,6,9,1},{0,3,5,8},{0,2,9,5},{5,2,8,1},{1,1,14,0},{3,2,9,2},{5,0,8,3},{0,5,10,1},{5,2,3,6},{2,6,7,1},{2,3,0,11},{0,1,9,6},{1,0,4,11}, + {3,0,5,8},{0,0,15,1},{2,4,5,5},{0,3,7,6},{2,0,0,14},{1,1,12,2},{2,6,8,0},{3,1,8,4},{0,1,5,10} + }; + + // All total orderings for 16 pixels [0,2] 2-bit selectors. + // BC1 selector order: 0, 1, 2 + // Note this is different from g_unique_total_orders4[], which reorders the selectors into linear order. + const uint32_t NUM_UNIQUE_TOTAL_ORDERINGS3 = 153; + static uint8_t g_unique_total_orders3[NUM_UNIQUE_TOTAL_ORDERINGS3][3] = + { + {6,0,10},{3,6,7},{3,0,13},{13,3,0},{12,4,0},{9,1,6},{2,13,1},{4,7,5},{7,5,4},{9,6,1},{7,4,5},{8,6,2},{16,0,0},{10,6,0},{2,7,7}, + {0,0,16},{0,3,13},{1,15,0},{0,2,14},{1,4,11},{15,1,0},{1,12,3},{9,2,5},{14,1,1},{8,2,6},{3,3,10},{4,2,10},{14,0,2},{0,14,2},{1,7,8},{6,6,4}, + {11,5,0},{6,4,6},{11,3,2},{4,3,9},{7,1,8},{10,4,2},{12,1,3},{11,0,5},{9,3,4},{1,0,15},{9,0,7},{2,6,8},{12,2,2},{6,2,8},{6,8,2},{15,0,1}, + {4,8,4},{0,4,12},{8,5,3},{5,9,2},{11,2,3},{12,3,1},{6,3,7},{1,1,14},{2,9,5},{1,8,7},{4,10,2},{7,7,2},{13,1,2},{0,15,1},{3,2,11},{7,0,9}, + {4,4,8},{3,8,5},{0,5,11},{13,2,1},{1,10,5},{4,11,1},{3,10,3},{5,10,1},{10,2,4},{0,6,10},{14,2,0},{11,4,1},{3,12,1},{1,13,2},{1,5,10},{5,11,0}, + {12,0,4},{8,1,7},{6,10,0},{3,13,0},{7,2,7},{0,7,9},{5,8,3},{0,12,4},{11,1,4},{13,0,3},{0,16,0},{5,7,4},{10,3,3},{10,0,6},{0,13,3},{4,6,6}, + {2,8,6},{2,5,9},{7,8,1},{2,1,13},{2,0,14},{7,3,6},{5,1,10},{3,11,2},{5,4,7},{8,3,5},{10,5,1},{6,9,1},{1,3,12},{4,5,7},{2,2,12},{4,1,11}, + {0,8,8},{4,12,0},{6,5,5},{8,7,1},{5,5,6},{3,7,6},{7,9,0},{4,9,3},{0,10,6},{8,0,8},{5,3,8},{10,1,5},{6,1,9},{7,6,3},{9,5,2},{0,1,15}, + {9,7,0},{2,14,0},{3,4,9},{8,4,4},{9,4,3},{0,9,7},{1,9,6},{3,9,4},{5,2,9},{2,3,11},{5,6,5},{1,14,1},{6,7,3},{2,4,10},{2,12,2},{8,8,0}, + {2,10,4},{4,0,12},{0,11,5},{2,11,3},{1,11,4},{3,5,8},{5,0,11},{3,1,12},{1,2,13},{1,6,9} + }; + + // For each total ordering, this table indicates which other total orderings are likely to improve quality using a least squares pass. Each array is sorted by usefulness. + static uint16_t g_best_total_orderings4[NUM_UNIQUE_TOTAL_ORDERINGS4][MAX_TOTAL_ORDERINGS4] = + { +#if RGBCX_USE_SMALLER_TABLES + #include "rgbcx_table4_small.h" +#else + #include "rgbcx_table4.h" +#endif + }; + + static uint8_t g_best_total_orderings3[NUM_UNIQUE_TOTAL_ORDERINGS3][32] = + { + { 12,1,3,5,27,2,4,38,8,7,16,18,6,10,41,79,40,23,46,9,20,88,22,37,14,19,24,126,99,119,35,11 }, + { 7,64,116,14,94,30,8,42,1,108,47,55,137,10,134,95,96,115,69,32,63,29,90,113,11,148,16,103,19,9,34,25 }, + { 12,1,0,5,3,7,4,27,8,6,38,40,41,16,18,46,9,10,20,23,79,62,14,22,88,99,37,126,92,19,120,11 }, + { 16,88,27,18,46,48,126,107,79,19,59,38,37,65,23,66,0,2,3,43,12,151,28,25,5,87,72,40,1,20,52,92 }, + { 79,48,88,16,27,65,18,38,46,19,37,4,72,33,126,41,52,0,12,92,5,1,2,107,3,77,23,91,43,51,22,74 }, + { 1,8,41,122,10,22,2,0,87,24,37,120,38,7,39,4,5,3,9,92,62,59,23,16,104,11,27,79,19,26,25,32 }, + { 2,76,99,28,40,86,93,21,138,60,6,0,17,128,145,119,98,144,141,82,147,54,67,75,5,12,27,132,146,1,38,14 }, + { 47,7,64,90,1,118,116,85,57,14,30,94,50,45,137,134,8,42,69,139,55,68,58,108,95,29,10,115,0,32,2,11 }, + { 49,8,10,30,124,11,32,113,130,58,125,9,100,53,104,115,131,103,24,7,1,39,45,36,139,0,137,22,90,44,114,105 }, + { 9,38,72,125,49,41,84,11,13,5,27,0,16,92,8,2,65,105,10,18,48,29,127,131,36,14,1,46,111,79,130,12 }, + { 130,8,10,100,104,131,49,32,53,39,30,36,113,24,11,22,124,44,83,58,7,103,1,4,9,125,5,0,91,33,115,74 }, + { 114,11,58,8,120,49,9,124,142,111,41,30,10,0,97,130,62,84,38,5,72,125,92,127,100,27,139,113,13,132,32,1 }, + { 60,46,28,27,40,20,0,17,18,2,126,16,6,38,86,23,79,54,1,93,5,88,41,14,21,111,7,48,3,84,72,62 }, + { 72,92,38,65,84,48,41,79,27,16,29,111,88,5,18,46,1,0,152,14,37,19,77,42,132,7,22,13,119,56,12,2 }, + { 7,55,1,95,29,56,64,116,143,8,14,30,47,94,152,90,65,67,10,133,42,72,146,84,16,48,6,0,25,108,77,21 }, + { 27,23,20,5,0,79,38,2,3,1,59,46,4,41,33,86,37,87,88,92,7,126,43,8,22,152,151,150,149,148,147,146 }, + { 12,0,1,2,7,6,3,5,28,4,8,14,60,40,17,19,21,86,126,93,10,18,9,29,48,99,65,25,84,119,72,41 }, + { 60,40,99,2,54,12,0,1,19,28,98,93,6,138,21,5,27,17,151,14,76,46,16,18,38,29,86,144,107,7,25,41 }, + { 12,0,1,2,3,5,6,7,4,28,8,60,14,40,16,17,21,10,19,9,86,38,126,41,93,27,29,48,62,84,79,99 }, + { 0,1,2,10,5,8,3,25,4,29,32,34,63,7,77,26,16,48,65,56,14,22,129,103,72,24,18,152,140,53,96,42 }, + { 46,126,18,54,12,16,1,0,5,2,27,98,20,23,6,3,88,48,28,7,19,8,4,60,151,38,37,21,79,14,65,40 }, + { 76,6,141,86,119,2,138,67,28,145,0,93,17,1,40,60,146,99,147,14,21,144,132,7,5,29,55,27,16,75,19,12 }, + { 71,5,51,39,22,80,0,43,10,122,8,62,41,24,104,87,35,37,2,91,33,120,36,38,1,131,9,100,130,66,3,4 }, + { 126,18,46,27,20,16,88,23,12,79,54,59,48,0,73,1,37,151,5,19,28,38,2,66,60,3,65,98,14,26,6,43 }, + { 22,10,8,5,0,71,35,80,104,39,24,51,100,1,62,32,2,130,11,41,7,9,53,43,49,83,122,120,30,44,37,38 }, + { 1,34,14,129,53,63,42,26,121,148,7,44,96,10,0,24,100,32,64,116,140,22,5,19,29,103,135,108,8,61,39,83 }, + { 1,7,34,63,44,25,135,14,24,108,22,0,83,94,5,129,35,101,47,121,2,19,42,53,6,110,103,8,148,10,16,123 }, + { 12,28,16,60,18,1,6,21,14,0,86,19,2,48,93,17,38,29,7,5,65,126,46,72,41,79,84,119,40,56,54,88 }, + { 0,2,12,27,5,46,38,40,41,79,88,99,3,23,1,62,20,4,22,37,92,35,18,8,16,24,10,60,7,120,98,54 }, + { 1,7,14,56,8,0,84,67,10,2,133,72,42,111,5,30,21,4,9,3,25,94,16,116,47,11,65,18,132,90,55,64 }, + { 30,8,124,139,45,11,58,90,113,137,7,115,10,32,1,49,94,85,9,47,108,103,0,97,63,14,50,114,53,106,100,25 }, + { 65,38,48,27,16,79,72,18,88,19,46,77,84,92,37,41,0,29,1,14,12,111,2,5,31,36,87,74,105,40,28,51 }, + { 10,8,30,113,130,100,53,32,115,103,104,7,1,121,39,49,131,44,24,36,63,137,34,45,22,90,108,83,26,11,94,139 }, + { 51,52,43,33,5,74,16,37,71,91,38,3,36,87,48,22,4,0,122,41,39,18,66,27,79,24,65,88,59,23,62,92 }, + { 1,7,63,53,108,121,94,44,103,100,14,10,129,47,32,26,24,25,148,42,135,22,0,61,83,8,39,104,5,64,115,34 }, + { 1,8,10,7,5,0,80,32,62,2,24,44,53,83,9,41,30,22,100,11,14,25,120,4,26,6,3,16,122,34,19,35 }, + { 74,4,36,48,33,91,39,79,22,16,65,5,131,38,24,71,27,52,0,105,51,18,88,104,3,31,10,37,72,19,41,130 }, + { 59,43,38,79,23,27,92,51,0,16,46,5,18,88,41,37,66,3,87,20,48,2,122,4,22,12,1,126,19,65,33,24 }, + { 12,28,1,27,0,16,2,46,65,60,21,3,5,18,6,19,48,14,4,7,79,88,86,29,22,72,93,40,23,8,17,41 }, + { 22,91,39,33,24,71,5,131,36,10,51,0,130,8,104,2,35,125,9,43,52,49,83,80,100,41,122,3,37,38,4,16 }, + { 12,0,1,2,5,3,4,8,7,27,18,38,10,6,16,46,9,20,41,23,126,79,22,14,19,99,88,54,37,48,62,35 }, + { 12,27,1,2,3,0,46,4,38,16,8,28,7,79,18,5,84,6,88,10,14,21,23,20,40,22,60,19,9,29,72,65 }, + { 1,14,7,55,95,29,8,94,30,56,10,108,77,116,152,64,32,48,63,42,143,148,16,25,137,65,11,0,115,9,19,72 }, + { 37,79,66,38,16,52,48,59,43,27,87,33,41,4,23,51,3,5,88,18,92,46,73,122,22,71,20,0,65,19,2,120 }, + { 24,32,83,22,53,1,8,10,7,30,35,5,103,0,100,101,121,113,34,123,63,2,44,25,71,115,80,14,26,108,51,39 }, + { 97,45,111,58,85,139,0,90,47,7,120,106,142,30,50,132,41,62,84,1,119,114,14,56,117,8,38,29,2,64,116,5 }, + { 12,28,16,18,1,60,6,14,2,21,0,86,126,19,48,93,7,27,17,29,5,65,54,38,72,79,84,88,119,145,8,111 }, + { 118,47,64,116,57,85,7,14,50,1,42,0,45,68,86,69,2,111,134,28,90,55,16,29,56,48,84,144,60,30,112,41 }, + { 12,1,2,0,7,6,28,5,3,4,8,14,60,21,18,40,17,86,10,9,16,29,19,93,126,79,38,84,72,27,111,119 }, + { 11,8,49,130,10,125,9,124,100,114,131,30,58,104,32,39,24,113,36,105,0,41,22,120,5,53,111,38,142,44,83,35 }, + { 50,70,47,118,85,57,106,0,45,7,64,90,81,14,2,134,28,62,86,55,69,1,78,119,68,56,18,67,16,60,29,21 }, + { 43,37,33,87,51,41,66,5,122,38,22,59,92,0,23,91,27,16,71,79,18,52,120,4,3,24,46,20,73,39,62,36 }, + { 79,48,4,16,27,88,43,33,18,38,65,37,46,3,19,51,52,22,66,87,74,5,41,91,23,59,0,71,122,72,20,92 }, + { 32,100,10,8,30,104,24,44,39,113,83,103,1,7,22,53,115,63,135,121,26,35,34,5,0,108,137,90,91,45,2,130 }, + { 0,1,2,5,16,12,6,7,14,3,19,18,29,20,4,21,40,8,17,35,23,48,126,22,25,56,26,10,98,27,38,65 }, + { 143,67,56,146,1,7,133,55,64,141,134,69,6,47,14,29,84,21,111,147,57,16,95,72,118,132,50,0,2,18,119,42 }, + { 1,7,67,14,133,111,8,84,0,21,2,47,64,132,55,10,95,147,119,42,16,5,72,56,4,3,6,29,9,25,18,30 }, + { 68,57,69,112,144,86,102,2,134,55,0,70,118,64,75,47,14,28,93,143,67,7,50,149,1,21,29,56,119,95,60,78 }, + { 58,97,114,30,124,45,11,139,8,90,0,142,7,10,41,113,84,62,49,111,85,1,9,5,137,120,32,14,2,117,47,38 }, + { 23,66,18,79,38,20,43,27,16,88,46,59,126,37,87,12,73,92,3,5,48,0,19,54,2,51,28,1,41,65,122,22 }, + { 0,12,2,27,5,40,46,38,1,41,3,79,88,23,99,4,20,62,22,54,92,18,8,37,16,35,10,7,19,120,144,24 }, + { 1,14,25,26,0,7,44,34,129,42,24,5,135,22,19,148,6,96,83,2,29,16,63,35,101,64,140,136,116,110,3,10 }, + { 12,1,2,27,3,4,38,5,7,8,18,16,46,6,0,40,41,10,79,23,88,9,20,22,14,19,37,92,48,126,28,21 }, + { 7,1,10,32,108,103,94,47,8,53,25,14,34,115,100,129,121,130,148,42,64,116,63,26,44,0,24,30,113,4,104,22 }, + { 47,134,7,14,55,69,64,95,1,29,85,118,56,116,45,57,102,143,50,90,42,30,16,94,0,8,67,75,133,2,18,48 }, + { 12,1,2,0,7,6,28,8,14,5,3,4,40,21,17,18,60,86,16,93,126,10,9,29,99,38,119,25,19,54,27,84 }, + { 59,16,27,18,23,88,79,37,46,66,38,20,73,126,3,43,48,87,92,51,41,12,19,5,52,107,65,0,151,122,54,2 }, + { 1,21,147,7,119,14,76,132,55,0,86,145,2,6,69,67,16,143,111,138,17,28,29,60,18,93,8,19,40,56,84,5 }, + { 144,86,112,2,68,102,69,0,149,93,75,28,57,55,145,60,21,67,99,134,143,40,146,119,82,110,62,6,29,26,78,14 }, + { 102,57,55,69,143,75,146,67,56,68,134,2,29,141,0,21,6,14,133,118,64,1,7,95,47,84,111,28,147,82,72,119 }, + { 0,70,57,119,50,145,2,86,28,118,69,78,149,47,60,68,67,55,93,81,134,21,14,62,64,7,5,1,132,85,41,16 }, + { 51,5,43,71,122,87,41,37,91,39,0,22,33,36,38,24,66,120,62,2,80,16,92,10,59,4,27,23,35,79,8,3 }, + { 12,1,2,0,7,6,28,5,8,14,3,21,40,4,60,17,86,18,16,93,10,9,126,119,99,29,19,41,38,27,25,92 }, + { 27,18,46,126,23,16,88,79,20,151,59,73,48,38,0,54,12,2,37,1,19,5,28,60,66,41,3,109,86,65,40,6 }, + { 48,79,4,33,16,74,65,38,88,27,91,52,18,36,22,19,46,0,37,3,51,5,71,39,72,43,24,41,92,87,2,10 }, + { 86,2,144,93,28,112,141,6,102,21,99,60,75,0,68,82,69,146,67,149,55,40,145,76,111,147,56,119,110,143,26,132 }, + { 6,138,2,99,86,17,40,93,28,21,145,141,0,60,119,147,128,76,67,54,1,12,5,27,144,14,38,98,146,41,29,19 }, + { 1,8,0,10,2,29,7,5,3,56,4,25,14,152,63,32,65,72,96,42,34,108,48,9,26,16,84,103,67,148,22,129 }, + { 149,145,0,86,2,28,93,144,62,60,119,101,21,41,5,35,78,99,26,40,12,68,57,67,110,120,69,18,55,76,132,70 }, + { 12,28,16,1,48,19,6,60,2,14,18,21,0,27,46,65,86,29,5,7,72,93,40,3,17,84,56,88,126,4,38,8 }, + { 1,8,5,10,7,24,2,62,0,41,22,122,120,9,4,3,32,87,11,37,38,83,100,44,25,104,16,26,39,80,14,6 }, + { 0,119,62,86,145,149,28,132,93,2,120,67,60,41,35,5,144,21,123,38,111,81,84,56,12,44,24,50,92,55,40,22 }, + { 2,93,99,28,40,144,60,0,86,150,76,21,149,98,6,25,1,61,82,26,12,5,54,141,7,18,145,16,27,138,110,38 }, + { 24,8,10,22,32,35,100,5,1,53,0,7,71,80,30,123,83,104,51,11,2,39,44,113,9,62,25,103,34,101,43,41 }, + { 12,1,2,0,7,6,28,5,40,60,8,16,3,18,14,4,86,21,17,93,41,10,9,99,27,119,38,19,126,22,48,145 }, + { 45,47,50,7,85,90,97,1,64,139,116,118,30,58,14,106,70,111,0,57,94,42,137,142,29,120,8,56,18,134,84,41 }, + { 12,0,2,5,27,38,1,46,41,40,79,144,3,22,88,23,28,60,99,62,6,24,26,7,4,16,10,35,37,18,14,20 }, + { 37,38,59,92,0,5,23,51,79,41,27,22,2,3,87,16,46,4,1,43,20,33,18,88,24,71,8,10,48,19,126,122 }, + { 12,28,16,60,1,18,6,21,19,14,48,0,2,86,93,5,46,29,17,27,65,7,3,72,38,126,119,40,84,37,56,4 }, + { 0,2,5,1,16,6,27,28,18,38,60,7,14,21,46,40,86,41,19,48,93,8,3,79,22,4,10,37,62,23,24,111 }, + { 85,7,90,30,47,139,45,50,94,58,137,1,8,64,14,116,118,115,113,11,124,108,0,10,97,57,32,70,42,106,29,114 }, + { 33,36,22,71,51,5,91,39,0,52,43,24,131,74,16,37,38,122,41,3,87,48,4,104,35,80,10,2,105,62,27,18 }, + { 12,1,27,2,0,16,3,28,46,18,4,6,5,72,21,79,38,7,14,60,88,8,65,19,48,29,23,40,22,20,86,126 }, + { 0,12,2,27,5,38,46,41,1,40,79,3,88,23,22,99,20,37,62,4,18,6,16,35,60,28,24,7,92,8,14,10 }, + { 7,47,1,30,137,8,116,94,90,64,14,115,108,118,57,10,148,113,42,85,32,11,63,50,103,45,124,134,55,9,69,34 }, + { 55,7,1,29,56,143,64,47,67,133,14,146,95,72,84,8,116,111,6,134,141,21,65,0,69,30,16,45,85,42,50,10 }, + { 14,1,42,8,10,29,108,63,55,148,95,32,7,19,25,115,103,34,56,129,77,0,16,152,94,30,113,26,2,5,48,4 }, + { 111,120,142,97,58,0,41,45,62,132,114,84,139,30,5,8,38,2,7,85,119,90,117,1,124,11,56,47,28,27,35,72 }, + { 1,0,14,2,6,5,16,19,7,29,42,18,3,25,12,35,21,8,26,17,40,4,20,48,109,99,22,96,55,101,10,61 }, + { 12,0,1,5,3,2,4,7,27,8,38,6,40,18,16,10,20,46,9,41,23,22,79,14,62,19,37,126,88,11,92,48 }, + { 10,8,104,39,24,32,22,83,44,100,30,130,53,91,113,5,11,1,35,33,7,49,0,2,103,71,36,124,9,80,131,34 }, + { 1,7,0,14,8,34,5,25,35,26,6,63,10,123,2,16,103,19,44,32,135,121,108,80,62,30,115,94,149,144,53,18 }, + { 75,68,146,141,102,67,2,21,6,57,69,143,0,55,82,86,28,144,147,29,93,112,56,119,133,14,76,60,84,134,111,145 }, + { 10,32,115,7,8,53,1,108,30,113,94,137,100,63,90,34,130,103,121,47,44,25,104,39,24,26,85,14,49,36,22,131 }, + { 39,24,10,22,8,130,91,104,83,49,5,33,100,11,0,35,32,131,71,36,9,44,53,2,80,51,30,1,41,7,43,62 }, + { 38,36,65,105,27,72,31,79,41,131,5,48,125,39,0,16,92,46,22,13,18,84,24,37,88,2,33,74,91,71,130,49 }, + { 0,106,62,50,45,119,85,81,132,28,2,86,41,47,38,60,35,117,5,29,7,30,145,90,55,70,14,111,18,67,93,56 }, + { 0,2,5,1,3,25,19,26,4,34,29,10,22,16,8,7,24,14,48,65,53,18,6,77,44,56,72,61,121,21,136,40 }, + { 7,1,94,8,47,115,10,32,113,103,30,108,137,63,14,64,116,148,129,42,90,25,34,118,53,57,11,49,85,9,96,50 }, + { 14,0,1,26,19,5,42,2,25,24,29,22,6,44,61,16,7,96,136,3,140,34,35,55,135,18,48,77,83,4,8,10 }, + { 1,7,14,0,25,6,34,5,26,16,63,2,19,8,35,101,108,29,94,10,18,42,123,144,129,47,61,21,3,62,149,4 }, + { 12,0,2,1,28,5,6,120,7,60,40,16,18,86,27,14,21,93,8,62,41,38,3,17,4,119,99,48,19,126,10,9 }, + { 86,144,93,2,28,149,0,60,99,112,110,145,40,21,102,26,75,62,69,1,12,101,119,25,76,67,7,68,55,5,6,14 }, + { 8,30,10,32,113,49,115,137,124,103,45,90,7,139,11,1,58,53,130,94,108,100,9,63,85,125,34,47,0,24,44,104 }, + { 120,142,111,41,58,114,97,0,11,62,84,124,5,30,8,38,132,127,27,139,92,10,72,45,49,9,28,2,29,56,16,1 }, + { 8,113,30,137,7,32,10,90,94,115,1,103,108,63,47,85,49,53,11,45,34,50,14,25,9,124,100,130,139,121,42,26 }, + { 64,7,14,47,134,55,1,42,95,69,116,90,94,30,8,29,56,137,45,108,85,10,57,16,102,143,118,19,63,32,11,50 }, + { 62,132,0,119,120,41,111,86,35,28,5,84,56,38,2,93,145,60,67,12,92,27,29,72,55,117,21,24,133,149,22,45 }, + { 57,68,69,118,134,64,50,47,55,14,7,2,102,144,0,112,70,86,85,1,95,29,116,143,42,75,16,56,28,45,21,48 }, + { 0,12,2,1,5,28,6,40,60,27,7,38,16,14,86,18,93,41,62,46,99,35,8,23,3,17,22,21,10,19,79,20 }, + { 12,1,2,27,16,3,38,111,4,0,18,5,7,46,40,8,79,6,14,28,88,10,48,41,19,84,21,9,22,23,20,72 }, + { 53,103,32,7,1,100,22,63,71,44,10,115,108,24,92,104,26,30,122,94,8,39,83,34,137,135,90,91,121,5,87,47 }, + { 87,37,41,0,22,38,2,92,1,24,4,8,3,59,10,5,39,23,71,79,122,27,16,46,33,7,91,20,18,51,9,120 }, + { 1,7,8,10,0,5,35,32,53,44,14,30,2,80,25,34,6,62,26,103,16,19,63,9,149,24,121,41,22,11,113,83 }, + { 11,58,8,30,124,49,10,113,9,114,139,45,97,32,7,137,90,1,0,130,115,125,100,24,5,94,53,41,14,13,35,38 }, + { 125,105,9,36,131,49,8,130,39,11,10,5,22,38,41,104,0,31,13,24,27,16,2,72,65,91,48,32,84,18,100,74 }, + { 12,1,0,2,6,3,7,5,4,8,14,28,16,60,18,10,21,17,19,9,40,27,86,93,29,38,54,11,25,48,46,41 }, + { 84,41,38,72,92,29,111,5,65,120,79,0,27,56,48,14,132,16,119,22,86,88,46,28,62,12,1,2,93,18,24,127 }, + { 99,28,40,60,2,93,138,0,98,17,86,54,76,12,27,1,21,144,128,38,5,14,46,18,25,16,109,6,41,145,7,29 }, + { 1,63,10,32,148,14,103,34,42,7,8,108,116,53,64,96,25,121,26,94,140,0,29,19,55,24,100,136,5,4,44,115 }, + { 131,100,130,49,10,8,36,104,39,0,48,41,11,38,4,24,27,22,16,44,79,5,33,2,53,9,125,74,91,120,32,83 }, + { 36,39,131,74,4,91,22,33,125,104,130,48,10,24,16,5,49,8,100,105,79,0,9,65,71,2,18,83,31,11,19,44 }, + { 0,12,2,1,6,5,7,28,40,60,16,14,18,62,86,27,93,8,17,38,21,41,35,99,3,19,10,23,22,4,9,48 }, + { 1,7,67,14,21,147,111,55,132,119,0,8,2,76,64,16,47,84,6,18,86,95,145,10,42,29,133,5,56,134,17,72 }, + { 69,55,47,134,102,143,7,57,118,95,14,64,29,56,1,50,75,67,146,2,0,133,68,16,21,6,141,85,116,18,72,65 }, + { 1,44,7,24,83,63,34,103,22,121,53,32,25,35,0,115,108,5,14,8,10,101,94,30,2,123,110,26,137,47,90,19 }, + { 14,1,25,42,34,0,26,96,19,29,140,5,53,10,2,121,3,24,44,22,55,77,129,7,63,16,8,4,6,61,100,48 }, + { 30,90,7,8,137,94,85,1,47,113,115,108,45,139,124,11,10,32,50,58,103,14,63,64,9,116,49,42,25,148,0,53 }, + { 40,99,2,60,28,17,0,54,93,98,86,138,6,12,21,76,1,5,27,144,128,38,19,46,14,41,145,7,16,67,3,109 }, + { 45,58,30,139,90,7,85,137,97,8,124,47,1,11,106,114,50,94,0,113,10,115,14,32,9,64,108,41,49,29,62,116 }, + { 14,42,10,1,63,96,32,25,34,8,129,29,0,103,55,19,26,53,77,5,95,2,4,7,3,16,148,56,18,24,121,108 }, + { 21,2,75,86,6,76,144,28,119,99,93,147,141,67,102,145,60,132,146,128,0,82,40,138,55,111,143,17,133,112,69,14 }, + { 111,120,41,62,84,132,0,5,38,119,56,92,72,142,27,28,29,35,58,80,2,86,65,79,12,14,1,24,145,16,21,48 }, + { 146,67,141,69,133,21,6,143,57,55,111,147,56,1,14,132,7,2,134,102,0,119,29,84,76,64,86,72,28,68,47,75 }, + { 12,1,0,5,27,3,7,4,38,8,6,41,16,40,46,10,18,79,2,9,23,86,20,22,62,14,37,88,92,19,24,11 }, + { 0,12,2,1,27,5,38,28,60,6,40,7,16,46,18,14,41,99,93,62,3,79,86,23,149,8,22,35,88,17,19,10 }, + { 141,6,21,67,147,102,146,2,76,119,132,69,55,111,86,75,28,133,143,0,1,145,14,128,56,99,17,60,29,93,84,68 }, + { 21,76,1,119,86,145,2,0,14,7,6,138,146,55,17,28,132,93,67,40,60,143,29,147,111,16,69,141,5,56,19,133 }, + { 1,8,108,14,7,116,64,42,10,63,94,32,115,103,113,96,30,34,55,47,95,148,29,140,129,25,134,53,69,26,19,11 }, + { 12,1,3,5,4,2,0,7,8,38,27,16,18,6,10,20,41,40,79,46,9,23,22,88,92,37,14,24,62,19,48,99 }, + { 1,14,7,0,6,25,5,16,19,2,42,26,29,35,61,8,18,129,101,21,3,110,34,148,96,10,17,4,22,40,12,20 }, + { 0,2,5,1,3,19,22,26,16,24,29,7,14,6,4,25,18,44,8,48,12,61,20,21,10,35,65,56,23,40,17,107 }, + { 1,7,8,29,56,0,10,14,2,42,72,5,4,65,3,30,84,94,67,9,25,133,111,11,32,108,16,63,21,96,26,48 } + }; +#ifdef _MSC_VER +#pragma endregion +#endif + + static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } + //static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } + + static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + + template inline T square(T a) { return a * a; } + + static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } + + template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } + static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; } + + static inline int squarei(int a) { return a * a; } + //static inline int absi(int a) { return (a < 0) ? -a : a; } + + template inline F lerp(F a, F b, F s) { return a + (b - a) * s; } + + static const uint32_t TOTAL_ORDER_4_0_16 = 15; + static const uint32_t TOTAL_ORDER_4_1_16 = 700; + static const uint32_t TOTAL_ORDER_4_2_16 = 753; + static const uint32_t TOTAL_ORDER_4_3_16 = 515; + static uint16_t g_total_ordering4_hash[4096]; + static float g_selector_factors4[NUM_UNIQUE_TOTAL_ORDERINGS4][3]; + + static const uint32_t TOTAL_ORDER_3_0_16 = 12; + static const uint32_t TOTAL_ORDER_3_1_16 = 15; + static const uint32_t TOTAL_ORDER_3_2_16 = 89; + static uint16_t g_total_ordering3_hash[256]; + static float g_selector_factors3[NUM_UNIQUE_TOTAL_ORDERINGS3][3]; + + struct hist4 + { + uint8_t m_hist[4]; + + hist4() + { + memset(m_hist, 0, sizeof(m_hist)); + } + + hist4(uint32_t i, uint32_t j, uint32_t k, uint32_t l) + { + m_hist[0] = (uint8_t)i; + m_hist[1] = (uint8_t)j; + m_hist[2] = (uint8_t)k; + m_hist[3] = (uint8_t)l; + } + + inline bool operator== (const hist4& h) const + { + if (m_hist[0] != h.m_hist[0]) return false; + if (m_hist[1] != h.m_hist[1]) return false; + if (m_hist[2] != h.m_hist[2]) return false; + if (m_hist[3] != h.m_hist[3]) return false; + return true; + } + + inline bool any_16() const + { + return (m_hist[0] == 16) || (m_hist[1] == 16) || (m_hist[2] == 16) || (m_hist[3] == 16); + } + + inline uint32_t lookup_total_ordering_index() const + { + if (m_hist[0] == 16) + return TOTAL_ORDER_4_0_16; + else if (m_hist[1] == 16) + return TOTAL_ORDER_4_1_16; + else if (m_hist[2] == 16) + return TOTAL_ORDER_4_2_16; + else if (m_hist[3] == 16) + return TOTAL_ORDER_4_3_16; + + // Must sum to 16, so m_hist[3] isn't needed. + return g_total_ordering4_hash[m_hist[0] | (m_hist[1] << 4) | (m_hist[2] << 8)]; + } + }; + + struct hist3 + { + uint8_t m_hist[3]; + + hist3() + { + memset(m_hist, 0, sizeof(m_hist)); + } + + hist3(uint32_t i, uint32_t j, uint32_t k) + { + m_hist[0] = (uint8_t)i; + m_hist[1] = (uint8_t)j; + m_hist[2] = (uint8_t)k; + } + + inline bool operator== (const hist3& h) const + { + if (m_hist[0] != h.m_hist[0]) return false; + if (m_hist[1] != h.m_hist[1]) return false; + if (m_hist[2] != h.m_hist[2]) return false; + return true; + } + + inline bool any_16() const + { + return (m_hist[0] == 16) || (m_hist[1] == 16) || (m_hist[2] == 16); + } + + inline uint32_t lookup_total_ordering_index() const + { + if (m_hist[0] == 16) + return TOTAL_ORDER_3_0_16; + else if (m_hist[1] == 16) + return TOTAL_ORDER_3_1_16; + else if (m_hist[2] == 16) + return TOTAL_ORDER_3_2_16; + + // Must sum to 16, so m_hist[2] isn't needed. + return g_total_ordering3_hash[m_hist[0] | (m_hist[1] << 4)]; + } + }; + + struct bc1_match_entry + { + uint8_t m_hi; + uint8_t m_lo; + uint8_t m_e; + }; + + static bc1_approx_mode g_bc1_approx_mode; + static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; + static bc1_match_entry g_bc1_match5_half[256], g_bc1_match6_half[256]; + + static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); } + static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); } + + // v0, v1 = unexpanded DXT1 endpoint values (5/6-bits) + // c0, c1 = expanded DXT1 endpoint values (8-bits) + static inline int interp_5_6_ideal(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 2 + c1) / 3; } + static inline int interp_5_6_ideal_round(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 2 + c1 + 1) / 3; } + static inline int interp_half_5_6_ideal(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1) / 2; } + + static inline int interp_5_nv(int v0, int v1) { assert(v0 < 32 && v1 < 32); return ((2 * v0 + v1) * 22) / 8; } + static inline int interp_6_nv(int c0, int c1) { assert(c0 < 256 && c1 < 256); const int gdiff = c1 - c0; return (256 * c0 + (gdiff / 4) + 128 + gdiff * 80) / 256; } + + static inline int interp_half_5_nv(int v0, int v1) { assert(v0 < 32 && v1 < 32); return ((v0 + v1) * 33) / 8; } + static inline int interp_half_6_nv(int c0, int c1) { assert(c0 < 256 && c1 < 256); const int gdiff = c1 - c0; return (256 * c0 + gdiff / 4 + 128 + gdiff * 128) / 256; } + + static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; } + static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; } + + static inline int interp_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) + { + assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1); + switch (mode) + { + case bc1_approx_mode::cBC1NVidia: return interp_5_nv(v0, v1); + case bc1_approx_mode::cBC1AMD: return interp_5_6_amd(c0, c1); + default: + case bc1_approx_mode::cBC1Ideal: return interp_5_6_ideal(c0, c1); + case bc1_approx_mode::cBC1IdealRound4: return interp_5_6_ideal_round(c0, c1); + } + } + + static inline int interp_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) + { + (void)v0; (void)v1; + assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1); + switch (mode) + { + case bc1_approx_mode::cBC1NVidia: return interp_6_nv(c0, c1); + case bc1_approx_mode::cBC1AMD: return interp_5_6_amd(c0, c1); + default: + case bc1_approx_mode::cBC1Ideal: return interp_5_6_ideal(c0, c1); + case bc1_approx_mode::cBC1IdealRound4: return interp_5_6_ideal_round(c0, c1); + } + } + + static inline int interp_half_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) + { + assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1); + switch (mode) + { + case bc1_approx_mode::cBC1NVidia: return interp_half_5_nv(v0, v1); + case bc1_approx_mode::cBC1AMD: return interp_half_5_6_amd(c0, c1); + case bc1_approx_mode::cBC1Ideal: + case bc1_approx_mode::cBC1IdealRound4: + default: + return interp_half_5_6_ideal(c0, c1); + } + } + + static inline int interp_half_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) + { + (void)v0; (void)v1; + assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1); + switch (mode) + { + case bc1_approx_mode::cBC1NVidia: return interp_half_6_nv(c0, c1); + case bc1_approx_mode::cBC1AMD: return interp_half_5_6_amd(c0, c1); + case bc1_approx_mode::cBC1Ideal: + case bc1_approx_mode::cBC1IdealRound4: + default: + return interp_half_5_6_ideal(c0, c1); + } + } + + static void prepare_bc1_single_color_table_half(bc1_match_entry* pTable, const uint8_t* pExpand, int size, bc1_approx_mode mode) + { + for (int i = 0; i < 256; i++) + { + int lowest_e = 256; + for (int lo = 0; lo < size; lo++) + { + const int lo_e = pExpand[lo]; + + for (int hi = 0; hi < size; hi++) + { + const int hi_e = pExpand[hi]; + + const int v = (size == 32) ? interp_half_5(hi, lo, hi_e, lo_e, mode) : interp_half_6(hi, lo, hi_e, lo_e, mode); + + int e = iabs(v - i); + + // We only need to factor in 3% error in BC1 ideal mode. + if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4)) + e += (iabs(hi_e - lo_e) * 3) / 100; + + // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation. + if ((e < lowest_e) || ((e == lowest_e) && (lo == hi))) + { + pTable[i].m_hi = static_cast(hi); + pTable[i].m_lo = static_cast(lo); + + assert(e <= UINT8_MAX); + pTable[i].m_e = static_cast(e); + + lowest_e = e; + } + + } // hi + } // lo + } + } + + static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size, bc1_approx_mode mode) + { + for (int i = 0; i < 256; i++) + { + int lowest_e = 256; + for (int lo = 0; lo < size; lo++) + { + const int lo_e = pExpand[lo]; + + for (int hi = 0; hi < size; hi++) + { + const int hi_e = pExpand[hi]; + + const int v = (size == 32) ? interp_5(hi, lo, hi_e, lo_e, mode) : interp_6(hi, lo, hi_e, lo_e, mode); + + int e = iabs(v - i); + + if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4)) + e += (iabs(hi_e - lo_e) * 3) / 100; + + // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation. + if ((e < lowest_e) || ((e == lowest_e) && (lo == hi))) + { + pTable[i].m_hi = static_cast(hi); + pTable[i].m_lo = static_cast(lo); + + assert(e <= UINT8_MAX); + pTable[i].m_e = static_cast(e); + + lowest_e = e; + } + + } // hi + } // lo + } + } + + // This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w)) + // where w is [0,1/3,2/3,1]. 9 is the perfect multiplier. + static const uint32_t g_weight_vals4[4] = { 0x000009, 0x010204, 0x040201, 0x090000 }; + + // multiplier is 4 for 3-color + static const uint32_t g_weight_vals3[3] = { 0x000004, 0x040000, 0x010101 }; + + static inline void compute_selector_factors4(const hist4& h, float& iz00, float& iz10, float& iz11) + { + uint32_t weight_accum = 0; + for (uint32_t sel = 0; sel < 4; sel++) + weight_accum += g_weight_vals4[sel] * h.m_hist[sel]; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + det = 0.0f; + else + det = (3.0f / 255.0f) / det; + + iz00 = z11 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + } + + static inline void compute_selector_factors3(const hist3& h, float& iz00, float& iz10, float& iz11) + { + uint32_t weight_accum = 0; + for (uint32_t sel = 0; sel < 3; sel++) + weight_accum += g_weight_vals3[sel] * h.m_hist[sel]; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + det = 0.0f; + else + det = (2.0f / 255.0f) / det; + + iz00 = z11 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + } + + static bool g_initialized; + + void init(bc1_approx_mode mode) + { + g_bc1_approx_mode = mode; + + uint8_t bc1_expand5[32]; + for (int i = 0; i < 32; i++) + bc1_expand5[i] = static_cast((i << 3) | (i >> 2)); + prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, mode); + prepare_bc1_single_color_table_half(g_bc1_match5_half, bc1_expand5, 32, mode); + + uint8_t bc1_expand6[64]; + for (int i = 0; i < 64; i++) + bc1_expand6[i] = static_cast((i << 2) | (i >> 4)); + prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, mode); + prepare_bc1_single_color_table_half(g_bc1_match6_half, bc1_expand6, 64, mode); + + for (uint32_t i = 0; i < NUM_UNIQUE_TOTAL_ORDERINGS4; i++) + { + hist4 h; + h.m_hist[0] = (uint8_t)g_unique_total_orders4[i][0]; + h.m_hist[1] = (uint8_t)g_unique_total_orders4[i][1]; + h.m_hist[2] = (uint8_t)g_unique_total_orders4[i][2]; + h.m_hist[3] = (uint8_t)g_unique_total_orders4[i][3]; + + if (!h.any_16()) + { + const uint32_t index = h.m_hist[0] | (h.m_hist[1] << 4) | (h.m_hist[2] << 8); + assert(index < 4096); + g_total_ordering4_hash[index] = (uint16_t)i; + } + + compute_selector_factors4(h, g_selector_factors4[i][0], g_selector_factors4[i][1], g_selector_factors4[i][2]); + } + + for (uint32_t i = 0; i < NUM_UNIQUE_TOTAL_ORDERINGS3; i++) + { + hist3 h; + h.m_hist[0] = (uint8_t)g_unique_total_orders3[i][0]; + h.m_hist[1] = (uint8_t)g_unique_total_orders3[i][1]; + h.m_hist[2] = (uint8_t)g_unique_total_orders3[i][2]; + + if (!h.any_16()) + { + const uint32_t index = h.m_hist[0] | (h.m_hist[1] << 4); + assert(index < 256); + g_total_ordering3_hash[index] = (uint16_t)i; + } + + compute_selector_factors3(h, g_selector_factors3[i][0], g_selector_factors3[i][1], g_selector_factors3[i][2]); + } + + g_initialized = true; + } + + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb, bool allow_3color) + { + bc1_block* pDst_block = static_cast(pDst); + + uint32_t mask = 0xAA; + int max16 = -1, min16 = 0; + + if (allow_3color) + { + const uint32_t err4 = g_bc1_match5_equals_1[fr].m_e + g_bc1_match6_equals_1[fg].m_e + g_bc1_match5_equals_1[fb].m_e; + const uint32_t err3 = g_bc1_match5_half[fr].m_e + g_bc1_match6_half[fg].m_e + g_bc1_match5_half[fb].m_e; + + if (err3 < err4) + { + max16 = (g_bc1_match5_half[fr].m_hi << 11) | (g_bc1_match6_half[fg].m_hi << 5) | g_bc1_match5_half[fb].m_hi; + min16 = (g_bc1_match5_half[fr].m_lo << 11) | (g_bc1_match6_half[fg].m_lo << 5) | g_bc1_match5_half[fb].m_lo; + + if (max16 > min16) + std::swap(max16, min16); + } + } + + if (max16 == -1) + { + max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi; + min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo; + + if (min16 == max16) + { + // Always forbid 3 color blocks + // This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's. + mask = 0; + + // Make l > h + if (min16 > 0) + min16--; + else + { + // l = h = 0 + assert(min16 == max16 && max16 == 0); + + max16 = 1; + min16 = 0; + mask = 0x55; + } + + assert(max16 > min16); + } + + if (max16 < min16) + { + std::swap(max16, min16); + mask ^= 0x55; + } + } + + pDst_block->set_low_color(static_cast(max16)); + pDst_block->set_high_color(static_cast(min16)); + pDst_block->m_selectors[0] = static_cast(mask); + pDst_block->m_selectors[1] = static_cast(mask); + pDst_block->m_selectors[2] = static_cast(mask); + pDst_block->m_selectors[3] = static_cast(mask); + } + + static const float g_midpoint5[32] = { .015686f, .047059f, .078431f, .111765f, .145098f, .176471f, .207843f, .241176f, .274510f, .305882f, .337255f, .370588f, .403922f, .435294f, .466667f, .5f, .533333f, .564706f, .596078f, .629412f, .662745f, .694118f, .725490f, .758824f, .792157f, .823529f, .854902f, .888235f, .921569f, .952941f, .984314f, 1e+37f }; + static const float g_midpoint6[64] = { .007843f, .023529f, .039216f, .054902f, .070588f, .086275f, .101961f, .117647f, .133333f, .149020f, .164706f, .180392f, .196078f, .211765f, .227451f, .245098f, .262745f, .278431f, .294118f, .309804f, .325490f, .341176f, .356863f, .372549f, .388235f, .403922f, .419608f, .435294f, .450980f, .466667f, .482353f, .500000f, .517647f, .533333f, .549020f, .564706f, .580392f, .596078f, .611765f, .627451f, .643137f, .658824f, .674510f, .690196f, .705882f, .721569f, .737255f, .754902f, .772549f, .788235f, .803922f, .819608f, .835294f, .850980f, .866667f, .882353f, .898039f, .913725f, .929412f, .945098f, .960784f, .976471f, .992157f, 1e+37f }; + + struct vec3F { float c[3]; }; + + static inline void compute_least_squares_endpoints4_rgb( + vec3F* pXl, vec3F* pXh, + int total_r, int total_g, int total_b, + float iz00, float iz10, float iz11, + uint32_t s, const uint32_t r_sum[17], const uint32_t g_sum[17], const uint32_t b_sum[17]) + { + const float iz01 = iz10; + + const uint32_t f1 = g_unique_total_orders4[s][0]; + const uint32_t f2 = g_unique_total_orders4[s][0] + g_unique_total_orders4[s][1]; + const uint32_t f3 = g_unique_total_orders4[s][0] + g_unique_total_orders4[s][1] + g_unique_total_orders4[s][2]; + uint32_t uq00_r = (r_sum[f2] - r_sum[f1]) + (r_sum[f3] - r_sum[f2]) * 2 + (r_sum[16] - r_sum[f3]) * 3; + uint32_t uq00_g = (g_sum[f2] - g_sum[f1]) + (g_sum[f3] - g_sum[f2]) * 2 + (g_sum[16] - g_sum[f3]) * 3; + uint32_t uq00_b = (b_sum[f2] - b_sum[f1]) + (b_sum[f3] - b_sum[f2]) * 2 + (b_sum[16] - b_sum[f3]) * 3; + + float q10_r = (float)(total_r * 3 - uq00_r); + float q10_g = (float)(total_g * 3 - uq00_g); + float q10_b = (float)(total_b * 3 - uq00_b); + + pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; + pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; + + pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; + pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; + + pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; + pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; + } + + static inline bool compute_least_squares_endpoints4_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh, int total_r, int total_g, int total_b) + { + uint32_t uq00_r = 0, uq00_g = 0, uq00_b = 0; + uint32_t weight_accum = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; + const uint8_t sel = pSelectors[i]; + + weight_accum += g_weight_vals4[sel]; + uq00_r += sel * r; + uq00_g += sel * g; + uq00_b += sel * b; + } + + int q10_r = total_r * 3 - uq00_r; + int q10_g = total_g * 3 - uq00_g; + int q10_b = total_b * 3 - uq00_b; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = (3.0f / 255.0f) / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; + pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; + + pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; + pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; + + pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; + pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; + + return true; + } + + static inline void compute_least_squares_endpoints3_rgb( + vec3F* pXl, vec3F* pXh, + int total_r, int total_g, int total_b, + float iz00, float iz10, float iz11, + uint32_t s, const uint32_t r_sum[17], const uint32_t g_sum[17], const uint32_t b_sum[17]) + { + const float iz01 = iz10; + + // Compensates for BC1 3-color ordering, which is selector 0, 2, 1 + const uint32_t f1 = g_unique_total_orders3[s][0]; + const uint32_t f2 = g_unique_total_orders3[s][0] + g_unique_total_orders3[s][2]; + uint32_t uq00_r = (r_sum[16] - r_sum[f2]) * 2 + (r_sum[f2] - r_sum[f1]); + uint32_t uq00_g = (g_sum[16] - g_sum[f2]) * 2 + (g_sum[f2] - g_sum[f1]); + uint32_t uq00_b = (b_sum[16] - b_sum[f2]) * 2 + (b_sum[f2] - b_sum[f1]); + + float q10_r = (float)(total_r * 2 - uq00_r); + float q10_g = (float)(total_g * 2 - uq00_g); + float q10_b = (float)(total_b * 2 - uq00_b); + + pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; + pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; + + pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; + pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; + + pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; + pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; + } + + static inline bool compute_least_squares_endpoints3_rgb(bool use_black, const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh) + { + int uq00_r = 0, uq00_g = 0, uq00_b = 0; + uint32_t weight_accum = 0; + int total_r = 0, total_g = 0, total_b = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; + if (use_black) + { + if ((r | g | b) < 4) + continue; + } + + const uint8_t sel = pSelectors[i]; + assert(sel <= 3); + if (sel == 3) + continue; + + weight_accum += g_weight_vals3[sel]; + + static const uint8_t s_tran[3] = { 0, 2, 1 }; + const uint8_t tsel = s_tran[sel]; + uq00_r += tsel * r; + uq00_g += tsel * g; + uq00_b += tsel * b; + + total_r += r; + total_g += g; + total_b += b; + } + + int q10_r = total_r * 2 - uq00_r; + int q10_g = total_g * 2 - uq00_g; + int q10_b = total_b * 2 - uq00_b; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = (2.0f / 255.0f) / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; + pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; + + pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; + pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; + + pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; + pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; + + return true; + } + + static inline void bc1_get_block_colors4(uint32_t block_r[4], uint32_t block_g[4], uint32_t block_b[4], uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb) + { + block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); + block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2); + + if (g_bc1_approx_mode == bc1_approx_mode::cBC1Ideal) + { + block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3; + block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3; + } + else if (g_bc1_approx_mode == bc1_approx_mode::cBC1IdealRound4) + { + block_r[1] = (block_r[0] * 2 + block_r[3] + 1) / 3; block_g[1] = (block_g[0] * 2 + block_g[3] + 1) / 3; block_b[1] = (block_b[0] * 2 + block_b[3] + 1) / 3; + block_r[2] = (block_r[3] * 2 + block_r[0] + 1) / 3; block_g[2] = (block_g[3] * 2 + block_g[0] + 1) / 3; block_b[2] = (block_b[3] * 2 + block_b[0] + 1) / 3; + } + else if (g_bc1_approx_mode == bc1_approx_mode::cBC1AMD) + { + block_r[1] = interp_5_6_amd(block_r[0], block_r[3]); block_g[1] = interp_5_6_amd(block_g[0], block_g[3]); block_b[1] = interp_5_6_amd(block_b[0], block_b[3]); + block_r[2] = interp_5_6_amd(block_r[3], block_r[0]); block_g[2] = interp_5_6_amd(block_g[3], block_g[0]); block_b[2] = interp_5_6_amd(block_b[3], block_b[0]); + } + else + { + block_r[1] = interp_5_nv(lr, hr); block_g[1] = interp_6_nv(block_g[0], block_g[3]); block_b[1] = interp_5_nv(lb, hb); + block_r[2] = interp_5_nv(hr, lr); block_g[2] = interp_6_nv(block_g[3], block_g[0]); block_b[2] = interp_5_nv(hb, lb); + } + } + + static inline void bc1_get_block_colors3(uint32_t block_r[3], uint32_t block_g[3], uint32_t block_b[3], uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb) + { + block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); + block_r[1] = (hr << 3) | (hr >> 2); block_g[1] = (hg << 2) | (hg >> 4); block_b[1] = (hb << 3) | (hb >> 2); + + if ((g_bc1_approx_mode == bc1_approx_mode::cBC1Ideal) || (g_bc1_approx_mode == bc1_approx_mode::cBC1IdealRound4)) + { + block_r[2] = (block_r[0] + block_r[1]) / 2; block_g[2] = (block_g[0] + block_g[1]) / 2; block_b[2] = (block_b[0] + block_b[1]) / 2; + } + else if (g_bc1_approx_mode == bc1_approx_mode::cBC1AMD) + { + block_r[2] = interp_half_5_6_amd(block_r[0], block_r[1]); block_g[2] = interp_half_5_6_amd(block_g[0], block_g[1]); block_b[2] = interp_half_5_6_amd(block_b[0], block_b[1]); + } + else + { + block_r[2] = interp_half_5_nv(lr, hr); block_g[2] = interp_half_6_nv(block_g[0], block_g[1]); block_b[2] = interp_half_5_nv(lb, hb); + } + } + + static inline void bc1_find_sels4_noerr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], const uint8_t* pForce_selectors) + { + if (pForce_selectors) + { + memcpy(sels, pForce_selectors, 16); + return; + } + + uint32_t block_r[4], block_g[4], block_b[4]; + bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; + + int dots[4]; + for (uint32_t i = 0; i < 4; i++) + dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; + + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; + + ar *= 2; ag *= 2; ab *= 2; + + static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; + + for (uint32_t i = 0; i < 16; i += 4) + { + const int d0 = pSrc_pixels[i + 0].r * ar + pSrc_pixels[i + 0].g * ag + pSrc_pixels[i + 0].b * ab; + const int d1 = pSrc_pixels[i + 1].r * ar + pSrc_pixels[i + 1].g * ag + pSrc_pixels[i + 1].b * ab; + const int d2 = pSrc_pixels[i + 2].r * ar + pSrc_pixels[i + 2].g * ag + pSrc_pixels[i + 2].b * ab; + const int d3 = pSrc_pixels[i + 3].r * ar + pSrc_pixels[i + 3].g * ag + pSrc_pixels[i + 3].b * ab; + + sels[i + 0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; + sels[i + 1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; + sels[i + 2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)]; + sels[i + 3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)]; + } + } + + static inline uint32_t bc1_find_sels4_fasterr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) + { + uint32_t block_r[4], block_g[4], block_b[4]; + bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; + + int dots[4]; + for (uint32_t i = 0; i < 4; i++) + dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; + + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; + + ar *= 2; ag *= 2; ab *= 2; + + static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; + + uint32_t total_err = 0; + + for (uint32_t i = 0; i < 16; i += 4) + { + const int d0 = pSrc_pixels[i + 0].r * ar + pSrc_pixels[i + 0].g * ag + pSrc_pixels[i + 0].b * ab; + const int d1 = pSrc_pixels[i + 1].r * ar + pSrc_pixels[i + 1].g * ag + pSrc_pixels[i + 1].b * ab; + const int d2 = pSrc_pixels[i + 2].r * ar + pSrc_pixels[i + 2].g * ag + pSrc_pixels[i + 2].b * ab; + const int d3 = pSrc_pixels[i + 3].r * ar + pSrc_pixels[i + 3].g * ag + pSrc_pixels[i + 3].b * ab; + + uint8_t sel0 = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; + uint8_t sel1 = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; + uint8_t sel2 = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)]; + uint8_t sel3 = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)]; + + sels[i + 0] = sel0; + sels[i + 1] = sel1; + sels[i + 2] = sel2; + sels[i + 3] = sel3; + + total_err += squarei(pSrc_pixels[i + 0].r - block_r[sel0]) + squarei(pSrc_pixels[i + 0].g - block_g[sel0]) + squarei(pSrc_pixels[i + 0].b - block_b[sel0]); + total_err += squarei(pSrc_pixels[i + 1].r - block_r[sel1]) + squarei(pSrc_pixels[i + 1].g - block_g[sel1]) + squarei(pSrc_pixels[i + 1].b - block_b[sel1]); + total_err += squarei(pSrc_pixels[i + 2].r - block_r[sel2]) + squarei(pSrc_pixels[i + 2].g - block_g[sel2]) + squarei(pSrc_pixels[i + 2].b - block_b[sel2]); + total_err += squarei(pSrc_pixels[i + 3].r - block_r[sel3]) + squarei(pSrc_pixels[i + 3].g - block_g[sel3]) + squarei(pSrc_pixels[i + 3].b - block_b[sel3]); + + if (total_err >= cur_err) + break; + } + + return total_err; + } + + static inline uint32_t bc1_find_sels4_check2_err(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) + { + uint32_t block_r[4], block_g[4], block_b[4]; + bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + int dr = block_r[3] - block_r[0], dg = block_g[3] - block_g[0], db = block_b[3] - block_b[0]; + + const float f = 4.0f / (float)(squarei(dr) + squarei(dg) + squarei(db) + .00000125f); + + uint32_t total_err = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; + + int sel = (int)((float)((r - (int)block_r[0]) * dr + (g - (int)block_g[0]) * dg + (b - (int)block_b[0]) * db) * f + .5f); + sel = clampi(sel, 1, 3); + + uint32_t err0 = squarei((int)block_r[sel - 1] - (int)r) + squarei((int)block_g[sel - 1] - (int)g) + squarei((int)block_b[sel - 1] - (int)b); + uint32_t err1 = squarei((int)block_r[sel] - (int)r) + squarei((int)block_g[sel] - (int)g) + squarei((int)block_b[sel] - (int)b); + + int best_sel = sel; + uint32_t best_err = err1; + if (err0 == err1) + { + // Prefer non-interpolation + if ((best_sel - 1) == 0) + best_sel = 0; + } + else if (err0 < best_err) + { + best_sel = sel - 1; + best_err = err0; + } + + total_err += best_err; + + if (total_err >= cur_err) + break; + + sels[i] = (uint8_t)best_sel; + } + return total_err; + } + + static inline uint32_t bc1_find_sels4_fullerr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) + { + uint32_t block_r[4], block_g[4], block_b[4]; + bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + uint32_t total_err = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; + + uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b); + uint8_t best_sel = 0; + + for (uint32_t j = 1; (j < 4) && best_err; j++) + { + uint32_t err = squarei((int)block_r[j] - (int)r) + squarei((int)block_g[j] - (int)g) + squarei((int)block_b[j] - (int)b); + if ((err < best_err) || ((err == best_err) && (j == 3))) + { + best_err = err; + best_sel = (uint8_t)j; + } + } + + total_err += best_err; + + if (total_err >= cur_err) + break; + + sels[i] = (uint8_t)best_sel; + } + return total_err; + } + + static inline uint32_t bc1_find_sels4(uint32_t flags, const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err, const uint8_t* pForce_selectors) + { + uint32_t err; + + if (pForce_selectors) + { + memcpy(sels, pForce_selectors, 16); + + uint32_t block_r[4], block_g[4], block_b[4]; + bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + err = 0; + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; + + const uint32_t sel = pForce_selectors[i]; + assert(sel <= 3); + + err += squarei((int)block_r[sel] - (int)r) + squarei((int)block_g[sel] - (int)g) + squarei((int)block_b[sel] - (int)b); + } + } + else + { + if (flags & cEncodeBC1UseFasterMSEEval) + err = bc1_find_sels4_fasterr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err); + else if (flags & cEncodeBC1UseFullMSEEval) + err = bc1_find_sels4_fullerr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err); + else + err = bc1_find_sels4_check2_err(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err); + } + + return err; + } + + static inline uint32_t bc1_find_sels3_fullerr(bool use_black, const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err, const uint8_t* pForce_selectors) + { + uint32_t block_r[4], block_g[4], block_b[4]; + bc1_get_block_colors3(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + uint32_t total_err = 0; + + if (pForce_selectors) + { + memcpy(sels, pForce_selectors, 16); + + //uint32_t block_r[4], block_g[4], block_b[4]; + //bc1_get_block_colors3(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + block_r[3] = 0; block_g[3] = 0; block_b[3] = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; + + const uint32_t sel = pForce_selectors[i]; + assert(sel <= 3); + + total_err += squarei((int)block_r[sel] - (int)r) + squarei((int)block_g[sel] - (int)g) + squarei((int)block_b[sel] - (int)b); + } + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; + + uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b); + uint32_t best_sel = 0; + + uint32_t err1 = squarei((int)block_r[1] - (int)r) + squarei((int)block_g[1] - (int)g) + squarei((int)block_b[1] - (int)b); + if (err1 < best_err) + { + best_err = err1; + best_sel = 1; + } + + uint32_t err2 = squarei((int)block_r[2] - (int)r) + squarei((int)block_g[2] - (int)g) + squarei((int)block_b[2] - (int)b); + if (err2 < best_err) + { + best_err = err2; + best_sel = 2; + } + + if (use_black) + { + uint32_t err3 = squarei(r) + squarei(g) + squarei(b); + if (err3 < best_err) + { + best_err = err3; + best_sel = 3; + } + } + + total_err += best_err; + if (total_err >= cur_err) + return total_err; + + sels[i] = (uint8_t)best_sel; + } + } + + return total_err; + } + + static inline void precise_round_565(const vec3F& xl, const vec3F& xh, + int& trial_lr, int& trial_lg, int& trial_lb, + int& trial_hr, int& trial_hg, int& trial_hb) + { + trial_lr = (int)(xl.c[0] * 31.0f); + trial_lg = (int)(xl.c[1] * 63.0f); + trial_lb = (int)(xl.c[2] * 31.0f); + + trial_hr = (int)(xh.c[0] * 31.0f); + trial_hg = (int)(xh.c[1] * 63.0f); + trial_hb = (int)(xh.c[2] * 31.0f); + + if ((uint32_t)(trial_lr | trial_lb | trial_hr | trial_hb) > 31U) + { + trial_lr = ((uint32_t)trial_lr > 31U) ? (~trial_lr >> 31) & 31 : trial_lr; + trial_hr = ((uint32_t)trial_hr > 31U) ? (~trial_hr >> 31) & 31 : trial_hr; + + trial_lb = ((uint32_t)trial_lb > 31U) ? (~trial_lb >> 31) & 31 : trial_lb; + trial_hb = ((uint32_t)trial_hb > 31U) ? (~trial_hb >> 31) & 31 : trial_hb; + } + + if ((uint32_t)(trial_lg | trial_hg) > 63U) + { + trial_lg = ((uint32_t)trial_lg > 63U) ? (~trial_lg >> 31) & 63 : trial_lg; + trial_hg = ((uint32_t)trial_hg > 63U) ? (~trial_hg >> 31) & 63 : trial_hg; + } + + trial_lr = (trial_lr + (xl.c[0] > g_midpoint5[trial_lr])) & 31; + trial_lg = (trial_lg + (xl.c[1] > g_midpoint6[trial_lg])) & 63; + trial_lb = (trial_lb + (xl.c[2] > g_midpoint5[trial_lb])) & 31; + + trial_hr = (trial_hr + (xh.c[0] > g_midpoint5[trial_hr])) & 31; + trial_hg = (trial_hg + (xh.c[1] > g_midpoint6[trial_hg])) & 63; + trial_hb = (trial_hb + (xh.c[2] > g_midpoint5[trial_hb])) & 31; + } + + static inline void precise_round_565_noscale(vec3F xl, vec3F xh, + int& trial_lr, int& trial_lg, int& trial_lb, + int& trial_hr, int& trial_hg, int& trial_hb) + { + xl.c[0] *= 1.0f / 255.0f; + xl.c[1] *= 1.0f / 255.0f; + xl.c[2] *= 1.0f / 255.0f; + + xh.c[0] *= 1.0f / 255.0f; + xh.c[1] *= 1.0f / 255.0f; + xh.c[2] *= 1.0f / 255.0f; + + precise_round_565(xl, xh, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb); + } + + static inline void bc1_encode4(bc1_block* pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) + { + uint32_t lc16 = bc1_block::pack_unscaled_color(lr, lg, lb); + uint32_t hc16 = bc1_block::pack_unscaled_color(hr, hg, hb); + + // Always forbid 3 color blocks + if (lc16 == hc16) + { + uint8_t mask = 0; + + // Make l > h + if (hc16 > 0) + hc16--; + else + { + // lc16 = hc16 = 0 + assert(lc16 == hc16 && hc16 == 0); + + hc16 = 0; + lc16 = 1; + mask = 0x55; // select hc16 + } + + assert(lc16 > hc16); + pDst_block->set_low_color(static_cast(lc16)); + pDst_block->set_high_color(static_cast(hc16)); + + pDst_block->m_selectors[0] = mask; + pDst_block->m_selectors[1] = mask; + pDst_block->m_selectors[2] = mask; + pDst_block->m_selectors[3] = mask; + } + else + { + uint8_t invert_mask = 0; + if (lc16 < hc16) + { + std::swap(lc16, hc16); + invert_mask = 0x55; + } + + assert(lc16 > hc16); + pDst_block->set_low_color((uint16_t)lc16); + pDst_block->set_high_color((uint16_t)hc16); + + uint32_t packed_sels = 0; + static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 }; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); + + pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask; + pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask; + pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask; + pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; + } + } + + static inline void bc1_encode3(bc1_block* pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) + { + uint32_t lc16 = bc1_block::pack_unscaled_color(lr, lg, lb); + uint32_t hc16 = bc1_block::pack_unscaled_color(hr, hg, hb); + + bool invert_flag = false; + if (lc16 > hc16) + { + std::swap(lc16, hc16); + invert_flag = true; + } + + assert(lc16 <= hc16); + + pDst_block->set_low_color((uint16_t)lc16); + pDst_block->set_high_color((uint16_t)hc16); + + uint32_t packed_sels = 0; + + if (invert_flag) + { + static const uint8_t s_sel_trans_inv[4] = { 1, 0, 2, 3 }; + + for (uint32_t i = 0; i < 16; i++) + packed_sels |= ((uint32_t)s_sel_trans_inv[sels[i]] << (i * 2)); + } + else + { + for (uint32_t i = 0; i < 16; i++) + packed_sels |= ((uint32_t)sels[i] << (i * 2)); + } + + pDst_block->m_selectors[0] = (uint8_t)packed_sels; + pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8); + pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16); + pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24); + } + + struct bc1_encode_results + { + int lr, lg, lb; + int hr, hg, hb; + uint8_t sels[16]; + bool m_3color; + }; + + static bool try_3color_block_useblack(const color32* pSrc_pixels, uint32_t flags, uint32_t& cur_err, bc1_encode_results& results, const uint8_t* pForce_selectors) + { + int total_r = 0, total_g = 0, total_b = 0; + int max_r = 0, max_g = 0, max_b = 0; + int min_r = 255, min_g = 255, min_b = 255; + int total_pixels = 0; + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + if ((r | g | b) < 4) + continue; + + max_r = std::max(max_r, r); max_g = std::max(max_g, g); max_b = std::max(max_b, b); + min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b); + total_r += r; total_g += g; total_b += b; + + total_pixels++; + } + + if (!total_pixels) + return false; + + int half_total_pixels = total_pixels >> 1; + int avg_r = (total_r + half_total_pixels) / total_pixels; + int avg_g = (total_g + half_total_pixels) / total_pixels; + int avg_b = (total_b + half_total_pixels) / total_pixels; + + uint32_t low_c = 0, high_c = 0; + + int icov[6] = { 0, 0, 0, 0, 0, 0 }; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r; + int g = (int)pSrc_pixels[i].g; + int b = (int)pSrc_pixels[i].b; + + if ((r | g | b) < 4) + continue; + + r -= avg_r; + g -= avg_g; + b -= avg_b; + + icov[0] += r * r; + icov[1] += r * g; + icov[2] += r * b; + icov[3] += g * g; + icov[4] += g * b; + icov[5] += b * b; + } + + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = (float)(icov[i]) * (1.0f / 255.0f); + + float xr = (float)(max_r - min_r); + float xg = (float)(max_g - min_g); + float xb = (float)(max_b - min_b); + + if (icov[2] < 0) + xr = -xr; + + if (icov[4] < 0) + xg = -xg; + + for (uint32_t power_iter = 0; power_iter < 4; power_iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + xr = r; xg = g; xb = b; + } + + float k = maximum(fabsf(xr), fabsf(xg), fabsf(xb)); + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + if (k >= 2) + { + float m = 1024.0f / k; + saxis_r = (int)(xr * m); + saxis_g = (int)(xg * m); + saxis_b = (int)(xb * m); + } + + int low_dot = INT_MAX, high_dot = INT_MIN; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r, g = (int)pSrc_pixels[i].g, b = (int)pSrc_pixels[i].b; + + if ((r | g | b) < 4) + continue; + + int dot = r * saxis_r + g * saxis_g + b * saxis_b; + if (dot < low_dot) + { + low_dot = dot; + low_c = i; + } + if (dot > high_dot) + { + high_dot = dot; + high_c = i; + } + } + + int lr = to_5(pSrc_pixels[low_c].r); + int lg = to_6(pSrc_pixels[low_c].g); + int lb = to_5(pSrc_pixels[low_c].b); + + int hr = to_5(pSrc_pixels[high_c].r); + int hg = to_6(pSrc_pixels[high_c].g); + int hb = to_5(pSrc_pixels[high_c].b); + + uint8_t trial_sels[16]; + uint32_t trial_err = bc1_find_sels3_fullerr(true, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX, pForce_selectors); + + if (trial_err) + { + const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; + for (uint32_t trials = 0; trials < total_ls_passes; trials++) + { + vec3F xl, xh; + int lr2, lg2, lb2, hr2, hg2, hb2; + if (!compute_least_squares_endpoints3_rgb(true, pSrc_pixels, trial_sels, &xl, &xh)) + { + lr2 = g_bc1_match5_half[avg_r].m_hi; + lg2 = g_bc1_match6_half[avg_g].m_hi; + lb2 = g_bc1_match5_half[avg_b].m_hi; + + hr2 = g_bc1_match5_half[avg_r].m_lo; + hg2 = g_bc1_match6_half[avg_g].m_lo; + hb2 = g_bc1_match5_half[avg_b].m_lo; + } + else + { + precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2); + } + + if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2)) + break; + + uint8_t trial_sels2[16]; + uint32_t trial_err2 = bc1_find_sels3_fullerr(true, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err, pForce_selectors); + + if (trial_err2 < trial_err) + { + trial_err = trial_err2; + lr = lr2; lg = lg2; lb = lb2; + hr = hr2; hg = hg2; hb = hb2; + memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); + } + else + break; + } + } + + if (trial_err < cur_err) + { + results.m_3color = true; + results.lr = lr; + results.lg = lg; + results.lb = lb; + results.hr = hr; + results.hg = hg; + results.hb = hb; + memcpy(results.sels, trial_sels, 16); + + cur_err = trial_err; + + return true; + } + + return false; + } + + static bool try_3color_block(const color32* pSrc_pixels, uint32_t flags, uint32_t& cur_err, + int avg_r, int avg_g, int avg_b, int lr, int lg, int lb, int hr, int hg, int hb, int total_r, int total_g, int total_b, uint32_t total_orderings_to_try, + bc1_encode_results& results, const uint8_t* pForce_selectors) + { + if (pForce_selectors) + { + for (uint32_t i = 0; i < 16; i++) + if (pForce_selectors[i] == 3) + return false; + } + + uint8_t trial_sels[16]; + uint32_t trial_err = bc1_find_sels3_fullerr(false, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX, pForce_selectors); + + if (trial_err) + { + const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; + for (uint32_t trials = 0; trials < total_ls_passes; trials++) + { + vec3F xl, xh; + int lr2, lg2, lb2, hr2, hg2, hb2; + if (!compute_least_squares_endpoints3_rgb(false, pSrc_pixels, trial_sels, &xl, &xh)) + { + lr2 = g_bc1_match5_half[avg_r].m_hi; + lg2 = g_bc1_match6_half[avg_g].m_hi; + lb2 = g_bc1_match5_half[avg_b].m_hi; + + hr2 = g_bc1_match5_half[avg_r].m_lo; + hg2 = g_bc1_match6_half[avg_g].m_lo; + hb2 = g_bc1_match5_half[avg_b].m_lo; + } + else + { + precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2); + } + + if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2)) + break; + + uint8_t trial_sels2[16]; + uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err, pForce_selectors); + + if (trial_err2 < trial_err) + { + trial_err = trial_err2; + lr = lr2; lg = lg2; lb = lb2; + hr = hr2; hg = hg2; hb = hb2; + memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); + } + else + break; + } + } + + if ((trial_err) && (flags & cEncodeBC1UseLikelyTotalOrderings) && (total_orderings_to_try)) + { + hist3 h; + for (uint32_t i = 0; i < 16; i++) + { + assert(trial_sels[i] < 3); + h.m_hist[trial_sels[i]]++; + } + + const uint32_t orig_total_order_index = h.lookup_total_ordering_index(); + + int r0, g0, b0, r3, g3, b3; + r0 = (lr << 3) | (lr >> 2); g0 = (lg << 2) | (lg >> 4); b0 = (lb << 3) | (lb >> 2); + r3 = (hr << 3) | (hr >> 2); g3 = (hg << 2) | (hg >> 4); b3 = (hb << 3) | (hb >> 2); + + int ar = r3 - r0, ag = g3 - g0, ab = b3 - b0; + + int dots[16]; + for (uint32_t i = 0; i < 16; i++) + { + int r = pSrc_pixels[i].r; + int g = pSrc_pixels[i].g; + int b = pSrc_pixels[i].b; + int d = 0x1000000 + (r * ar + g * ag + b * ab); + assert(d >= 0); + dots[i] = (d << 4) + i; + } + + std::sort(dots, dots + 16); + + uint32_t r_sum[17], g_sum[17], b_sum[17]; + uint32_t r = 0, g = 0, b = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t p = dots[i] & 15; + + r_sum[i] = r; + g_sum[i] = g; + b_sum[i] = b; + + r += pSrc_pixels[p].r; + g += pSrc_pixels[p].g; + b += pSrc_pixels[p].b; + } + + r_sum[16] = total_r; + g_sum[16] = total_g; + b_sum[16] = total_b; + + const uint32_t q_total = (flags & cEncodeBC1Exhaustive) ? NUM_UNIQUE_TOTAL_ORDERINGS3 : std::min(total_orderings_to_try, MAX_TOTAL_ORDERINGS3); + for (uint32_t q = 0; q < q_total; q++) + { + const uint32_t s = (flags & cEncodeBC1Exhaustive) ? q : g_best_total_orderings3[orig_total_order_index][q]; + + int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; + + vec3F xl, xh; + + if ((s == TOTAL_ORDER_3_0_16) || (s == TOTAL_ORDER_3_1_16) || (s == TOTAL_ORDER_3_2_16)) + { + trial_lr = g_bc1_match5_half[avg_r].m_hi; + trial_lg = g_bc1_match6_half[avg_g].m_hi; + trial_lb = g_bc1_match5_half[avg_b].m_hi; + + trial_hr = g_bc1_match5_half[avg_r].m_lo; + trial_hg = g_bc1_match6_half[avg_g].m_lo; + trial_hb = g_bc1_match5_half[avg_b].m_lo; + } + else + { + compute_least_squares_endpoints3_rgb(&xl, &xh, total_r, total_g, total_b, + g_selector_factors3[s][0], g_selector_factors3[s][1], g_selector_factors3[s][2], s, r_sum, g_sum, b_sum); + + precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); + } + + uint8_t trial_sels2[16]; + uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels2, UINT32_MAX, pForce_selectors); + + if (trial_err2 < trial_err) + { + trial_err = trial_err2; + + lr = trial_lr; + lg = trial_lg; + lb = trial_lb; + + hr = trial_hr; + hg = trial_hg; + hb = trial_hb; + + memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); + } + + } // s + } + + if (trial_err < cur_err) + { + results.m_3color = true; + results.lr = lr; + results.lg = lg; + results.lb = lb; + results.hr = hr; + results.hg = hg; + results.hb = hb; + memcpy(results.sels, trial_sels, 16); + + cur_err = trial_err; + + return true; + } + + return false; + } + + void encode_bc1(uint32_t level, void* pDst, const uint8_t* pPixels, bool allow_3color, bool allow_transparent_texels_for_black, const uint8_t* pForce_selectors) + { + uint32_t flags = 0, total_orderings4 = 1, total_orderings3 = 1; + + static_assert(MAX_TOTAL_ORDERINGS3 >= 32, "MAX_TOTAL_ORDERINGS3 >= 32"); + static_assert(MAX_TOTAL_ORDERINGS4 >= 32, "MAX_TOTAL_ORDERINGS4 >= 32"); + + switch (level) + { + case 0: + // Faster/higher quality than stb_dxt default. + flags = cEncodeBC1BoundingBoxInt; + break; + case 1: + // Faster/higher quality than stb_dxt default. A bit higher average quality vs. mode 0. + flags = cEncodeBC1Use2DLS; + break; + case 2: + // On average mode 2 is a little weaker than modes 0/1, but it's stronger on outliers (very tough textures). + // Slightly stronger than stb_dxt. + flags = 0; + break; + case 3: + // Slightly stronger than stb_dxt HIGHQUAL. + flags = cEncodeBC1TwoLeastSquaresPasses; + break; + case 4: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1Use6PowerIters; + break; + default: + case 5: + // stb_dxt HIGHQUAL + permit 3 color (if it's enabled). + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + break; + case 6: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + break; + case 7: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 4; + break; + case 8: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 8; + break; + case 9: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 11; + total_orderings3 = 3; + break; + case 10: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 20; + total_orderings3 = 8; + break; + case 11: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 28; + total_orderings3 = 16; + break; + case 12: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 32; + total_orderings3 = 32; + break; + case 13: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (20 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 32; + total_orderings3 = 32; + break; + case 14: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 32; + total_orderings3 = 32; + break; + case 15: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = ((((32 + MAX_TOTAL_ORDERINGS4) / 2) + 32) / 2); + total_orderings3 = 32; + break; + case 16: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = (32 + MAX_TOTAL_ORDERINGS4) / 2; + total_orderings3 = 32; + break; + case 17: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = MAX_TOTAL_ORDERINGS4; + total_orderings3 = 32; + break; + case 18: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = MAX_TOTAL_ORDERINGS4; + total_orderings3 = 32; + break; + case 19: + // This hidden mode is *extremely* slow and abuses the encoder. It's just for testing/training. + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | cEncodeBC1Exhaustive | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 32; + total_orderings3 = 32; + break; + } + + encode_bc1(pDst, pPixels, flags, total_orderings4, total_orderings3, pForce_selectors); + } + + static inline void encode_bc1_pick_initial(const color32* pSrc_pixels, uint32_t flags, bool grayscale_flag, + int min_r, int min_g, int min_b, int max_r, int max_g, int max_b, + int avg_r, int avg_g, int avg_b, int total_r, int total_g, int total_b, + int& lr, int& lg, int& lb, int& hr, int& hg, int& hb) + { + if (grayscale_flag) + { + const int fr = pSrc_pixels[0].r; + + // Grayscale blocks are a common enough case to specialize. + if ((max_r - min_r) < 2) + { + lr = lb = hr = hb = to_5(fr); + lg = hg = to_6(fr); + } + else + { + lr = lb = to_5(min_r); + lg = to_6(min_r); + + hr = hb = to_5(max_r); + hg = to_6(max_r); + } + } + else if (flags & cEncodeBC1Use2DLS) + { + // 2D Least Squares approach from Humus's example, with added inset and optimal rounding. + int big_chan = 0, min_chan_val = min_r, max_chan_val = max_r; + if ((max_g - min_g) > (max_chan_val - min_chan_val)) + big_chan = 1, min_chan_val = min_g, max_chan_val = max_g; + + if ((max_b - min_b) > (max_chan_val - min_chan_val)) + big_chan = 2, min_chan_val = min_b, max_chan_val = max_b; + + int sum_xy_r = 0, sum_xy_g = 0, sum_xy_b = 0; + vec3F l, h; + if (big_chan == 0) + { + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + sum_xy_r += r * r, sum_xy_g += r * g, sum_xy_b += r * b; + } + + int sum_x = total_r; + int sum_x2 = sum_xy_r; + + float div = (float)(16 * sum_x2 - sum_x * sum_x); + float b_y = 0.0f, b_z = 0.0f; + if (fabs(div) > 1e-8f) + { + div = 1.0f / div; + b_y = (16 * sum_xy_g - sum_x * total_g) * div; + b_z = (16 * sum_xy_b - sum_x * total_b) * div; + } + + float a_y = (total_g - b_y * sum_x) / 16.0f; + float a_z = (total_b - b_z * sum_x) / 16.0f; + + l.c[1] = a_y + b_y * min_chan_val; + l.c[2] = a_z + b_z * min_chan_val; + + h.c[1] = a_y + b_y * max_chan_val; + h.c[2] = a_z + b_z * max_chan_val; + + float dg = (h.c[1] - l.c[1]); + float db = (h.c[2] - l.c[2]); + + h.c[1] = l.c[1] + dg * (15.0f / 16.0f); + h.c[2] = l.c[2] + db * (15.0f / 16.0f); + + l.c[1] = l.c[1] + dg * (1.0f / 16.0f); + l.c[2] = l.c[2] + db * (1.0f / 16.0f); + + float d = (float)(max_chan_val - min_chan_val); + float fmin_chan_val = min_chan_val + d * (1.0f / 16.0f); + float fmax_chan_val = min_chan_val + d * (15.0f / 16.0f); + + l.c[0] = fmin_chan_val; + h.c[0] = fmax_chan_val; + } + else if (big_chan == 1) + { + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + sum_xy_r += g * r, sum_xy_g += g * g, sum_xy_b += g * b; + } + + int sum_x = total_g; + int sum_x2 = sum_xy_g; + + float div = (float)(16 * sum_x2 - sum_x * sum_x); + float b_x = 0.0f, b_z = 0.0f; + if (fabs(div) > 1e-8f) + { + div = 1.0f / div; + b_x = (16 * sum_xy_r - sum_x * total_r) * div; + b_z = (16 * sum_xy_b - sum_x * total_b) * div; + } + + float a_x = (total_r - b_x * sum_x) / 16.0f; + float a_z = (total_b - b_z * sum_x) / 16.0f; + + l.c[0] = a_x + b_x * min_chan_val; + l.c[2] = a_z + b_z * min_chan_val; + + h.c[0] = a_x + b_x * max_chan_val; + h.c[2] = a_z + b_z * max_chan_val; + + float dr = (h.c[0] - l.c[0]); + float db = (h.c[2] - l.c[2]); + + h.c[0] = l.c[0] + dr * (15.0f / 16.0f); + h.c[2] = l.c[2] + db * (15.0f / 16.0f); + + l.c[0] = l.c[0] + dr * (1.0f / 16.0f); + l.c[2] = l.c[2] + db * (1.0f / 16.0f); + + float d = (float)(max_chan_val - min_chan_val); + float fmin_chan_val = min_chan_val + d * (1.0f / 16.0f); + float fmax_chan_val = min_chan_val + d * (15.0f / 16.0f); + + l.c[1] = fmin_chan_val; + h.c[1] = fmax_chan_val; + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + sum_xy_r += b * r, sum_xy_g += b * g, sum_xy_b += b * b; + } + + int sum_x = total_b; + int sum_x2 = sum_xy_b; + + float div = (float)(16 * sum_x2 - sum_x * sum_x); + float b_x = 0.0f, b_y = 0.0f; + if (fabs(div) > 1e-8f) + { + div = 1.0f / div; + b_x = (16 * sum_xy_r - sum_x * total_r) * div; + b_y = (16 * sum_xy_g - sum_x * total_g) * div; + } + + float a_x = (total_r - b_x * sum_x) / 16.0f; + float a_y = (total_g - b_y * sum_x) / 16.0f; + + l.c[0] = a_x + b_x * min_chan_val; + l.c[1] = a_y + b_y * min_chan_val; + + h.c[0] = a_x + b_x * max_chan_val; + h.c[1] = a_y + b_y * max_chan_val; + + float dr = (h.c[0] - l.c[0]); + float dg = (h.c[1] - l.c[1]); + + h.c[0] = l.c[0] + dr * (15.0f / 16.0f); + h.c[1] = l.c[1] + dg * (15.0f / 16.0f); + + l.c[0] = l.c[0] + dr * (1.0f / 16.0f); + l.c[1] = l.c[1] + dg * (1.0f / 16.0f); + + float d = (float)(max_chan_val - min_chan_val); + float fmin_chan_val = min_chan_val + d * (1.0f / 16.0f); + float fmax_chan_val = min_chan_val + d * (15.0f / 16.0f); + + l.c[2] = fmin_chan_val; + h.c[2] = fmax_chan_val; + } + + precise_round_565_noscale(l, h, lr, lg, lb, hr, hg, hb); + } + else if (flags & cEncodeBC1BoundingBox) + { + // Algorithm from icbc.h compress_dxt1_fast() + vec3F l, h; + l.c[0] = min_r * (1.0f / 255.0f); + l.c[1] = min_g * (1.0f / 255.0f); + l.c[2] = min_b * (1.0f / 255.0f); + + h.c[0] = max_r * (1.0f / 255.0f); + h.c[1] = max_g * (1.0f / 255.0f); + h.c[2] = max_b * (1.0f / 255.0f); + + const float bias = 8.0f / 255.0f; + float inset_r = (h.c[0] - l.c[0] - bias) * (1.0f / 16.0f); + float inset_g = (h.c[1] - l.c[1] - bias) * (1.0f / 16.0f); + float inset_b = (h.c[2] - l.c[2] - bias) * (1.0f / 16.0f); + + l.c[0] = clampf(l.c[0] + inset_r, 0.0f, 1.0f); + l.c[1] = clampf(l.c[1] + inset_g, 0.0f, 1.0f); + l.c[2] = clampf(l.c[2] + inset_b, 0.0f, 1.0f); + + h.c[0] = clampf(h.c[0] - inset_r, 0.0f, 1.0f); + h.c[1] = clampf(h.c[1] - inset_g, 0.0f, 1.0f); + h.c[2] = clampf(h.c[2] - inset_b, 0.0f, 1.0f); + + int icov_xz = 0, icov_yz = 0; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r - avg_r; + int g = (int)pSrc_pixels[i].g - avg_g; + int b = (int)pSrc_pixels[i].b - avg_b; + icov_xz += r * b; + icov_yz += g * b; + } + + if (icov_xz < 0) + std::swap(l.c[0], h.c[0]); + + if (icov_yz < 0) + std::swap(l.c[1], h.c[1]); + + precise_round_565(l, h, lr, lg, lb, hr, hg, hb); + } + else if (flags & cEncodeBC1BoundingBoxInt) + { + // Algorithm from icbc.h compress_dxt1_fast(), but converted to integer. + int inset_r = (max_r - min_r - 8) >> 4; + int inset_g = (max_g - min_g - 8) >> 4; + int inset_b = (max_b - min_b - 8) >> 4; + + min_r += inset_r; + min_g += inset_g; + min_b += inset_b; + if ((uint32_t)(min_r | min_g | min_b) > 255U) + { + min_r = clampi(min_r, 0, 255); + min_g = clampi(min_g, 0, 255); + min_b = clampi(min_b, 0, 255); + } + + max_r -= inset_r; + max_g -= inset_g; + max_b -= inset_b; + if ((uint32_t)(max_r | max_g | max_b) > 255U) + { + max_r = clampi(max_r, 0, 255); + max_g = clampi(max_g, 0, 255); + max_b = clampi(max_b, 0, 255); + } + + int icov_xz = 0, icov_yz = 0; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r - avg_r; + int g = (int)pSrc_pixels[i].g - avg_g; + int b = (int)pSrc_pixels[i].b - avg_b; + icov_xz += r * b; + icov_yz += g * b; + } + + int x0 = min_r; + int y0 = min_g; + int x1 = max_r; + int y1 = max_g; + + if (icov_xz < 0) + std::swap(x0, x1); + + if (icov_yz < 0) + std::swap(y0, y1); + + lr = to_5(x0); + lg = to_6(y0); + lb = to_5(min_b); + + hr = to_5(x1); + hg = to_6(y1); + hb = to_5(max_b); + } + else + { + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) + uint32_t low_c = 0, high_c = 0; + + int icov[6] = { 0, 0, 0, 0, 0, 0 }; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r - avg_r; + int g = (int)pSrc_pixels[i].g - avg_g; + int b = (int)pSrc_pixels[i].b - avg_b; + icov[0] += r * r; + icov[1] += r * g; + icov[2] += r * b; + icov[3] += g * g; + icov[4] += g * b; + icov[5] += b * b; + } + + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + + float xr = (float)(max_r - min_r); + float xg = (float)(max_g - min_g); + float xb = (float)(max_b - min_b); + + if (icov[2] < 0) + xr = -xr; + + if (icov[4] < 0) + xg = -xg; + + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = (float)(icov[i]) * (1.0f / 255.0f); + + const uint32_t total_power_iters = (flags & cEncodeBC1Use6PowerIters) ? 6 : 4; + for (uint32_t power_iter = 0; power_iter < total_power_iters; power_iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + xr = r; xg = g; xb = b; + } + + float k = maximum(fabsf(xr), fabsf(xg), fabsf(xb)); + if (k >= 2) + { + float m = 2048.0f / k; + saxis_r = (int)(xr * m); + saxis_g = (int)(xg * m); + saxis_b = (int)(xb * m); + } + + int low_dot = INT_MAX, high_dot = INT_MIN; + + saxis_r = (int)((uint32_t)saxis_r << 4U); + saxis_g = (int)((uint32_t)saxis_g << 4U); + saxis_b = (int)((uint32_t)saxis_b << 4U); + + for (uint32_t i = 0; i < 16; i += 4) + { + int dot0 = ((pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b) & ~0xF) + i; + int dot1 = ((pSrc_pixels[i + 1].r * saxis_r + pSrc_pixels[i + 1].g * saxis_g + pSrc_pixels[i + 1].b * saxis_b) & ~0xF) + i + 1; + int dot2 = ((pSrc_pixels[i + 2].r * saxis_r + pSrc_pixels[i + 2].g * saxis_g + pSrc_pixels[i + 2].b * saxis_b) & ~0xF) + i + 2; + int dot3 = ((pSrc_pixels[i + 3].r * saxis_r + pSrc_pixels[i + 3].g * saxis_g + pSrc_pixels[i + 3].b * saxis_b) & ~0xF) + i + 3; + + int min_d01 = std::min(dot0, dot1); + int max_d01 = std::max(dot0, dot1); + + int min_d23 = std::min(dot2, dot3); + int max_d23 = std::max(dot2, dot3); + + int min_d = std::min(min_d01, min_d23); + int max_d = std::max(max_d01, max_d23); + + low_dot = std::min(low_dot, min_d); + high_dot = std::max(high_dot, max_d); + } + low_c = low_dot & 15; + high_c = high_dot & 15; + + lr = to_5(pSrc_pixels[low_c].r); + lg = to_6(pSrc_pixels[low_c].g); + lb = to_5(pSrc_pixels[low_c].b); + + hr = to_5(pSrc_pixels[high_c].r); + hg = to_6(pSrc_pixels[high_c].g); + hb = to_5(pSrc_pixels[high_c].b); + } + } + + static const int8_t s_adjacent_voxels[16][4] = + { + { 1,0,0, 3 }, // 0 + { 0,1,0, 4 }, // 1 + { 0,0,1, 5 }, // 2 + { -1,0,0, 0 }, // 3 + { 0,-1,0, 1 }, // 4 + { 0,0,-1, 2 }, // 5 + { 1,1,0, 9 }, // 6 + { 1,0,1, 10 }, // 7 + { 0,1,1, 11 }, // 8 + { -1,-1,0, 6 }, // 9 + { -1,0,-1, 7 }, // 10 + { 0,-1,-1, 8 }, // 11 + { -1,1,0, 13 }, // 12 + { 1,-1,0, 12 }, // 13 + { 0,-1,1, 15 }, // 14 + { 0,1,-1, 14 }, // 15 + }; + + // From icbc's high quality mode. + static inline void encode_bc1_endpoint_search(const color32* pSrc_pixels, bool any_black_pixels, + uint32_t flags, bc1_encode_results& results, uint32_t cur_err, const uint8_t* pForce_selectors) + { + int& lr = results.lr, & lg = results.lg, & lb = results.lb, & hr = results.hr, & hg = results.hg, & hb = results.hb; + uint8_t* sels = results.sels; + + int prev_improvement_index = 0, forbidden_direction = -1; + + const int endpoint_search_rounds = (flags & cEncodeBC1EndpointSearchRoundsMask) >> cEncodeBC1EndpointSearchRoundsShift; + for (int i = 0; i < endpoint_search_rounds; i++) + { + assert(s_adjacent_voxels[s_adjacent_voxels[i & 15][3]][3] == (i & 15)); + + if (forbidden_direction == (i & 31)) + continue; + + const int8_t delta[3] = { s_adjacent_voxels[i & 15][0], s_adjacent_voxels[i & 15][1], s_adjacent_voxels[i & 15][2] }; + + int trial_lr = lr, trial_lg = lg, trial_lb = lb, trial_hr = hr, trial_hg = hg, trial_hb = hb; + + if ((i >> 4) & 1) + { + trial_lr = clampi(trial_lr + delta[0], 0, 31); + trial_lg = clampi(trial_lg + delta[1], 0, 63); + trial_lb = clampi(trial_lb + delta[2], 0, 31); + } + else + { + trial_hr = clampi(trial_hr + delta[0], 0, 31); + trial_hg = clampi(trial_hg + delta[1], 0, 63); + trial_hb = clampi(trial_hb + delta[2], 0, 31); + } + + uint8_t trial_sels[16]; + + uint32_t trial_err; + if (results.m_3color) + { + trial_err = bc1_find_sels3_fullerr( + ((any_black_pixels) && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)), + pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err, pForce_selectors); + } + else + { + trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err, pForce_selectors); + } + + if (trial_err < cur_err) + { + cur_err = trial_err; + + forbidden_direction = s_adjacent_voxels[i & 15][3] | (i & 16); + + lr = trial_lr, lg = trial_lg, lb = trial_lb, hr = trial_hr, hg = trial_hg, hb = trial_hb; + + memcpy(sels, trial_sels, 16); + + prev_improvement_index = i; + } + + if (i - prev_improvement_index > 32) + break; + } + } + + void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try, uint32_t total_orderings_to_try3, const uint8_t* pForce_selectors) + { + assert(g_initialized); + + const color32* pSrc_pixels = (const color32*)pPixels; + bc1_block* pDst_block = static_cast(pDst); + + int avg_r, avg_g, avg_b, min_r, min_g, min_b, max_r, max_g, max_b; + + const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b; + + uint32_t j; + for (j = 15; j >= 1; --j) + if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) + break; + + if (j == 0) + { + encode_bc1_solid_block(pDst, fr, fg, fb, (flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0); + return; + } + + int total_r = fr, total_g = fg, total_b = fb; + + max_r = fr, max_g = fg, max_b = fb; + min_r = fr, min_g = fg, min_b = fb; + + uint32_t grayscale_flag = (fr == fg) && (fr == fb); + uint32_t any_black_pixels = (fr | fg | fb) < 4; + + for (uint32_t i = 1; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + grayscale_flag &= ((r == g) && (r == b)); + any_black_pixels |= ((r | g | b) < 4); + + max_r = std::max(max_r, r); max_g = std::max(max_g, g); max_b = std::max(max_b, b); + min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b); + total_r += r; total_g += g; total_b += b; + } + + avg_r = (total_r + 8) >> 4, avg_g = (total_g + 8) >> 4, avg_b = (total_b + 8) >> 4; + + bc1_encode_results results; + results.m_3color = false; + + uint8_t* sels = results.sels; + int& lr = results.lr, & lg = results.lg, & lb = results.lb, & hr = results.hr, & hg = results.hg, & hb = results.hb; + int orig_lr = 0, orig_lg = 0, orig_lb = 0, orig_hr = 0, orig_hg = 0, orig_hb = 0; + + lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; + + const bool needs_block_error = ((flags & (cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use3ColorBlocks | cEncodeBC1UseFullMSEEval | cEncodeBC1EndpointSearchRoundsMask)) != 0) || + (any_black_pixels && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)); + + uint32_t cur_err = UINT32_MAX; + + if (!needs_block_error) + { + assert((flags & cEncodeBC1TryAllInitialEndponts) == 0); + + encode_bc1_pick_initial(pSrc_pixels, flags, grayscale_flag != 0, + min_r, min_g, min_b, max_r, max_g, max_b, + avg_r, avg_g, avg_b, total_r, total_g, total_b, + lr, lg, lb, hr, hg, hb); + + orig_lr = lr, orig_lg = lg, orig_lb = lb, orig_hr = hr, orig_hg = hg, orig_hb = hb; + + bc1_find_sels4_noerr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, pForce_selectors); + + const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; + for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) + { + int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; + + vec3F xl, xh; + if (!compute_least_squares_endpoints4_rgb(pSrc_pixels, sels, &xl, &xh, total_r, total_g, total_b)) + { + // All selectors equal - treat it as a solid block which should always be equal or better. + trial_lr = g_bc1_match5_equals_1[avg_r].m_hi; + trial_lg = g_bc1_match6_equals_1[avg_g].m_hi; + trial_lb = g_bc1_match5_equals_1[avg_b].m_hi; + + trial_hr = g_bc1_match5_equals_1[avg_r].m_lo; + trial_hg = g_bc1_match6_equals_1[avg_g].m_lo; + trial_hb = g_bc1_match5_equals_1[avg_b].m_lo; + + // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. + } + else + { + precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); + } + + if ((lr == trial_lr) && (lg == trial_lg) && (lb == trial_lb) && (hr == trial_hr) && (hg == trial_hg) && (hb == trial_hb)) + break; + + bc1_find_sels4_noerr(pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, sels, pForce_selectors); + + lr = trial_lr; + lg = trial_lg; + lb = trial_lb; + hr = trial_hr; + hg = trial_hg; + hb = trial_hb; + + } // ls_pass + } + else + { + const uint32_t total_rounds = (flags & cEncodeBC1TryAllInitialEndponts) ? 2 : 1; + for (uint32_t round = 0; round < total_rounds; round++) + { + uint32_t modified_flags = flags; + if (round == 1) + { + modified_flags &= ~(cEncodeBC1Use2DLS | cEncodeBC1BoundingBox); + modified_flags |= cEncodeBC1BoundingBox; + } + + int round_lr, round_lg, round_lb, round_hr, round_hg, round_hb; + uint8_t round_sels[16]; + + encode_bc1_pick_initial(pSrc_pixels, modified_flags, grayscale_flag != 0, + min_r, min_g, min_b, max_r, max_g, max_b, + avg_r, avg_g, avg_b, total_r, total_g, total_b, + round_lr, round_lg, round_lb, round_hr, round_hg, round_hb); + + int orig_round_lr = round_lr, orig_round_lg = round_lg, orig_round_lb = round_lb, orig_round_hr = round_hr, orig_round_hg = round_hg, orig_round_hb = round_hb; + + uint32_t round_err = bc1_find_sels4(flags, pSrc_pixels, round_lr, round_lg, round_lb, round_hr, round_hg, round_hb, round_sels, UINT32_MAX, pForce_selectors); + + const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; + for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) + { + int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; + + vec3F xl, xh; + if (!compute_least_squares_endpoints4_rgb(pSrc_pixels, round_sels, &xl, &xh, total_r, total_g, total_b)) + { + // All selectors equal - treat it as a solid block which should always be equal or better. + trial_lr = g_bc1_match5_equals_1[avg_r].m_hi; + trial_lg = g_bc1_match6_equals_1[avg_g].m_hi; + trial_lb = g_bc1_match5_equals_1[avg_b].m_hi; + + trial_hr = g_bc1_match5_equals_1[avg_r].m_lo; + trial_hg = g_bc1_match6_equals_1[avg_g].m_lo; + trial_hb = g_bc1_match5_equals_1[avg_b].m_lo; + + // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. + } + else + { + precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); + } + + if ((round_lr == trial_lr) && (round_lg == trial_lg) && (round_lb == trial_lb) && (round_hr == trial_hr) && (round_hg == trial_hg) && (round_hb == trial_hb)) + break; + + uint8_t trial_sels[16]; + uint32_t trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, round_err, pForce_selectors); + + if (trial_err < round_err) + { + round_lr = trial_lr; + round_lg = trial_lg; + round_lb = trial_lb; + + round_hr = trial_hr; + round_hg = trial_hg; + round_hb = trial_hb; + + round_err = trial_err; + memcpy(round_sels, trial_sels, 16); + } + else + break; + + } // ls_pass + + if (round_err <= cur_err) + { + cur_err = round_err; + + lr = round_lr; + lg = round_lg; + lb = round_lb; + hr = round_hr; + hg = round_hg; + hb = round_hb; + + orig_lr = orig_round_lr; + orig_lg = orig_round_lg; + orig_lb = orig_round_lb; + orig_hr = orig_round_hr; + orig_hg = orig_round_hg; + orig_hb = orig_round_hb; + + memcpy(sels, round_sels, 16); + } + + } // round + } + + if ((cur_err) && (flags & cEncodeBC1UseLikelyTotalOrderings)) + { + assert(needs_block_error); + + const uint32_t total_iters = (flags & cEncodeBC1Iterative) ? 2 : 1; + for (uint32_t iter_index = 0; iter_index < total_iters; iter_index++) + { + const uint32_t orig_err = cur_err; + + hist4 h; + for (uint32_t i = 0; i < 16; i++) + { + assert(sels[i] < 4); + h.m_hist[sels[i]]++; + } + + const uint32_t orig_total_order_index = h.lookup_total_ordering_index(); + + int r0, g0, b0, r3, g3, b3; + r0 = (lr << 3) | (lr >> 2); g0 = (lg << 2) | (lg >> 4); b0 = (lb << 3) | (lb >> 2); + r3 = (hr << 3) | (hr >> 2); g3 = (hg << 2) | (hg >> 4); b3 = (hb << 3) | (hb >> 2); + + int ar = r3 - r0, ag = g3 - g0, ab = b3 - b0; + + int dots[16]; + for (uint32_t i = 0; i < 16; i++) + { + int r = pSrc_pixels[i].r; + int g = pSrc_pixels[i].g; + int b = pSrc_pixels[i].b; + int d = 0x1000000 + (r * ar + g * ag + b * ab); + assert(d >= 0); + dots[i] = (d << 4) + i; + } + + std::sort(dots, dots + 16); + + uint32_t r_sum[17], g_sum[17], b_sum[17]; + uint32_t r = 0, g = 0, b = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t p = dots[i] & 15; + + r_sum[i] = r; + g_sum[i] = g; + b_sum[i] = b; + + r += pSrc_pixels[p].r; + g += pSrc_pixels[p].g; + b += pSrc_pixels[p].b; + } + + r_sum[16] = total_r; + g_sum[16] = total_g; + b_sum[16] = total_b; + + const uint32_t q_total = (flags & cEncodeBC1Exhaustive) ? NUM_UNIQUE_TOTAL_ORDERINGS4 : clampi(total_orderings_to_try, MIN_TOTAL_ORDERINGS, MAX_TOTAL_ORDERINGS4); + for (uint32_t q = 0; q < q_total; q++) + { + const uint32_t s = (flags & cEncodeBC1Exhaustive) ? q : g_best_total_orderings4[orig_total_order_index][q]; + + int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; + + vec3F xl, xh; + + if ((s == TOTAL_ORDER_4_0_16) || (s == TOTAL_ORDER_4_1_16) || (s == TOTAL_ORDER_4_2_16) || (s == TOTAL_ORDER_4_3_16)) + { + trial_lr = g_bc1_match5_equals_1[avg_r].m_hi; + trial_lg = g_bc1_match6_equals_1[avg_g].m_hi; + trial_lb = g_bc1_match5_equals_1[avg_b].m_hi; + + trial_hr = g_bc1_match5_equals_1[avg_r].m_lo; + trial_hg = g_bc1_match6_equals_1[avg_g].m_lo; + trial_hb = g_bc1_match5_equals_1[avg_b].m_lo; + } + else + { + compute_least_squares_endpoints4_rgb(&xl, &xh, total_r, total_g, total_b, + g_selector_factors4[s][0], g_selector_factors4[s][1], g_selector_factors4[s][2], s, r_sum, g_sum, b_sum); + + precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); + } + + uint8_t trial_sels[16]; + + uint32_t trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err, pForce_selectors); + + if (trial_err < cur_err) + { + cur_err = trial_err; + + lr = trial_lr; + lg = trial_lg; + lb = trial_lb; + + hr = trial_hr; + hg = trial_hg; + hb = trial_hb; + + memcpy(sels, trial_sels, 16); + } + + } // s + + if ((!cur_err) || (cur_err == orig_err)) + break; + + } // iter_index + } + + if (((flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0) && (cur_err)) + { + if (flags & cEncodeBC1Use3ColorBlocks) + { + assert(needs_block_error); + try_3color_block(pSrc_pixels, flags, cur_err, avg_r, avg_g, avg_b, orig_lr, orig_lg, orig_lb, orig_hr, orig_hg, orig_hb, total_r, total_g, total_b, total_orderings_to_try3, results, pForce_selectors); + } + + if ((any_black_pixels) && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)) + { + assert(needs_block_error); + try_3color_block_useblack(pSrc_pixels, flags, cur_err, results, pForce_selectors); + } + } + + if ((flags & cEncodeBC1EndpointSearchRoundsMask) && (cur_err)) + { + assert(needs_block_error); + + encode_bc1_endpoint_search(pSrc_pixels, any_black_pixels != 0, flags, results, cur_err, pForce_selectors); + } + + if (results.m_3color) + bc1_encode3(pDst_block, results.lr, results.lg, results.lb, results.hr, results.hg, results.hb, results.sels); + else + bc1_encode4(pDst_block, results.lr, results.lg, results.lb, results.hr, results.hg, results.hb, results.sels); + } + + // BC3-5 + + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride) + { + assert(g_initialized); + + uint32_t min0_v, max0_v, min1_v, max1_v, min2_v, max2_v, min3_v, max3_v; + + { + min0_v = max0_v = pPixels[0 * stride]; + min1_v = max1_v = pPixels[1 * stride]; + min2_v = max2_v = pPixels[2 * stride]; + min3_v = max3_v = pPixels[3 * stride]; + } + + { + uint32_t v0 = pPixels[4 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0); + uint32_t v1 = pPixels[5 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1); + uint32_t v2 = pPixels[6 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2); + uint32_t v3 = pPixels[7 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3); + } + + { + uint32_t v0 = pPixels[8 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0); + uint32_t v1 = pPixels[9 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1); + uint32_t v2 = pPixels[10 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2); + uint32_t v3 = pPixels[11 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3); + } + + { + uint32_t v0 = pPixels[12 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0); + uint32_t v1 = pPixels[13 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1); + uint32_t v2 = pPixels[14 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2); + uint32_t v3 = pPixels[15 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3); + } + + const uint32_t min_v = minimum(min0_v, min1_v, min2_v, min3_v); + const uint32_t max_v = maximum(max0_v, max1_v, max2_v, max3_v); + + uint8_t* pDst_bytes = static_cast(pDst); + pDst_bytes[0] = (uint8_t)max_v; + pDst_bytes[1] = (uint8_t)min_v; + + if (max_v == min_v) + { + memset(pDst_bytes + 2, 0, 6); + return; + } + + const uint32_t delta = max_v - min_v; + + // min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors. + const int t0 = delta * 13; + const int t1 = delta * 11; + const int t2 = delta * 9; + const int t3 = delta * 7; + const int t4 = delta * 5; + const int t5 = delta * 3; + const int t6 = delta * 1; + + // BC4 floors in its divisions, which we compensate for with the 4 bias. + // This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one). + const int bias = 4 - min_v * 14; + + static const uint32_t s_tran0[8] = { 1U , 7U , 6U , 5U , 4U , 3U , 2U , 0U }; + static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U }; + static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U }; + static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U }; + + uint64_t a0, a1, a2, a3; + { + const int v0 = pPixels[0 * stride] * 14 + bias; + const int v1 = pPixels[1 * stride] * 14 + bias; + const int v2 = pPixels[2 * stride] * 14 + bias; + const int v3 = pPixels[3 * stride] * 14 + bias; + a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]; + a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]; + a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]; + a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]; + } + + { + const int v0 = pPixels[4 * stride] * 14 + bias; + const int v1 = pPixels[5 * stride] * 14 + bias; + const int v2 = pPixels[6 * stride] * 14 + bias; + const int v3 = pPixels[7 * stride] * 14 + bias; + a0 |= (uint64_t)(s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U); + a1 |= (uint64_t)(s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U); + a2 |= (uint64_t)(s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); + a3 |= (uint64_t)(s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); + } + + { + const int v0 = pPixels[8 * stride] * 14 + bias; + const int v1 = pPixels[9 * stride] * 14 + bias; + const int v2 = pPixels[10 * stride] * 14 + bias; + const int v3 = pPixels[11 * stride] * 14 + bias; + a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U); + a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U); + a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U); + a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U); + } + + { + const int v0 = pPixels[12 * stride] * 14 + bias; + const int v1 = pPixels[13 * stride] * 14 + bias; + const int v2 = pPixels[14 * stride] * 14 + bias; + const int v3 = pPixels[15 * stride] * 14 + bias; + a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U); + a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U); + a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U); + a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U); + } + + const uint64_t f = a0 | a1 | a2 | a3; + + pDst_bytes[2] = (uint8_t)f; + pDst_bytes[3] = (uint8_t)(f >> 8U); + pDst_bytes[4] = (uint8_t)(f >> 16U); + pDst_bytes[5] = (uint8_t)(f >> 24U); + pDst_bytes[6] = (uint8_t)(f >> 32U); + pDst_bytes[7] = (uint8_t)(f >> 40U); + } + + uint32_t encode_bc4_hq(void* pDst, const uint8_t* pPixels, uint32_t stride, uint32_t search_rad, uint32_t mode_flag, const uint8_t* pForce_selectors) + { + assert(mode_flag); + + uint8_t* pDst_bytes = static_cast(pDst); + + uint32_t min_val = 255, max_val = 0; + for (uint32_t i = 0; i < 16; i++) + { + uint32_t val = pPixels[i * stride]; + min_val = std::min(val, min_val); + max_val = std::max(val, max_val); + } + + if (min_val == max_val) + { + if (mode_flag & BC4_USE_MODE6_FLAG) + { + pDst_bytes[0] = (uint8_t)min_val; + pDst_bytes[1] = (uint8_t)min_val; + + memset(pDst_bytes + 2, 0, 6); + + assert(!(pDst_bytes[0] > pDst_bytes[1])); + } + else + { + // Use an 8 value encoding + if (min_val > 0) + { + pDst_bytes[0] = (uint8_t)min_val; + pDst_bytes[1] = (uint8_t)min_val - 1; + + memset(pDst_bytes + 2, 0, 6); + } + else + { + static const uint8_t s_const_1_vals[8] = { 1, 0, 0x49, 0x92, 0x24, 0x49, 0x92, 0x24 }; + memcpy(pDst_bytes, s_const_1_vals, 8); + } + + assert(pDst_bytes[0] > pDst_bytes[1]); + } + +#if defined(_DEBUG) || defined(DEBUG) + { + bc4_block* pBlock = (bc4_block*)pDst; + uint8_t pixels[16]; + unpack_bc4(pDst, pixels, 1); + for (uint32_t i = 0; i < 16; i++) + assert(pixels[i] == min_val); + if (mode_flag & BC4_USE_MODE6_FLAG) + { + assert(pBlock->is_alpha6_block()); + } + else + { + assert(!pBlock->is_alpha6_block()); + } + } +#endif + + return 0; + } + + uint32_t best_err = UINT32_MAX; + for (uint32_t mode = 0; mode < 2; mode++) + { + if ((mode_flag & (1 << mode)) == 0) + continue; + + for (int lo_delta = -(int)search_rad; lo_delta <= (int)search_rad; lo_delta++) + { + for (int hi_delta = -(int)search_rad; hi_delta <= (int)search_rad; hi_delta++) + { + bc4_block trial_block; + trial_block.m_endpoints[0] = (uint8_t)clamp(max_val + hi_delta, 0, 255); + trial_block.m_endpoints[1] = (uint8_t)clamp(min_val + lo_delta, 0, 255); + + if (trial_block.m_endpoints[0] == trial_block.m_endpoints[1]) + continue; + + if (mode == 0) + { + if (trial_block.is_alpha6_block()) + std::swap(trial_block.m_endpoints[0], trial_block.m_endpoints[1]); + } + else if (!trial_block.is_alpha6_block()) + std::swap(trial_block.m_endpoints[0], trial_block.m_endpoints[1]); + + uint8_t block_vals[8]; + trial_block.get_block_values(block_vals, trial_block.m_endpoints[0], trial_block.m_endpoints[1]); + + uint32_t trial_err = 0; + uint8_t trial_sels[16]; + + if (pForce_selectors) + { + memcpy(trial_sels, pForce_selectors, 16); + + for (uint32_t i = 0; i < 16; i++) + trial_err += squarei(block_vals[pForce_selectors[i]] - pPixels[i * stride]); + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + uint32_t best_index_err = UINT32_MAX; + uint32_t best_index = 0; + for (uint32_t j = 0; j < 8; j++) + { + uint32_t err = squarei(block_vals[j] - pPixels[i * stride]); + if (err < best_index_err) + { + best_index_err = err; + best_index = j; + if (!err) + break; + } + } + + trial_err += best_index_err; + if (trial_err >= best_err) + break; + + trial_sels[i] = (uint8_t)best_index; + } // i + } + + if (trial_err < best_err) + { + best_err = trial_err; + + uint64_t sel_vals = 0; + for (uint32_t i = 0; i < 16; i++) + sel_vals |= ((uint64_t)trial_sels[i] << (i * 3)); + + trial_block.m_selectors[0] = (uint8_t)sel_vals; + trial_block.m_selectors[1] = (uint8_t)(sel_vals >> 8); + trial_block.m_selectors[2] = (uint8_t)(sel_vals >> 16); + trial_block.m_selectors[3] = (uint8_t)(sel_vals >> 24); + trial_block.m_selectors[4] = (uint8_t)(sel_vals >> 32); + trial_block.m_selectors[5] = (uint8_t)(sel_vals >> 40); + + memcpy(pDst_bytes, &trial_block, sizeof(bc4_block)); + } // if (trial_err < best_err) + + } // hi_delta + + } // lo_delta + + } // mode + + return best_err; + } + + void encode_bc3(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try) + { + assert(g_initialized); + + // 3-color blocks are not allowed with BC3 (on most GPU's). + flags &= ~(cEncodeBC1Use3ColorBlocksForBlackPixels | cEncodeBC1Use3ColorBlocks); + + encode_bc4(pDst, pPixels + 3, 4); + encode_bc1(static_cast(pDst) + 8, pPixels, flags, total_orderings_to_try); + } + + void encode_bc3(uint32_t level, void* pDst, const uint8_t* pPixels) + { + assert(g_initialized); + + encode_bc4(pDst, pPixels + 3, 4); + encode_bc1(level, static_cast(pDst) + 8, pPixels, false, false); + } + + void encode_bc3_hq(uint32_t level, void* pDst, const uint8_t* pPixels, uint32_t alpha_search_rad, uint32_t alpha_modes) + { + assert(g_initialized); + + encode_bc4_hq(pDst, pPixels + 3, 4, alpha_search_rad, alpha_modes); + encode_bc1(level, static_cast(pDst) + 8, pPixels, false, false); + } + + void encode_bc5(void* pDst, const uint8_t* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride) + { + assert(g_initialized); + + encode_bc4(pDst, pPixels + chan0, stride); + encode_bc4(static_cast(pDst) + 8, pPixels + chan1, stride); + } + + void encode_bc5_hq(void* pDst, const uint8_t* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride, uint32_t alpha_search_rad, uint32_t alpha_modes) + { + assert(g_initialized); + + encode_bc4_hq(pDst, pPixels + chan0, stride, alpha_search_rad, alpha_modes); + encode_bc4_hq(static_cast(pDst) + 8, pPixels + chan1, stride, alpha_search_rad, alpha_modes); + } + + bool unpack_bc1_block_colors(const void* pBlock_bits, color32* c, bc1_approx_mode mode) + { + const bc1_block* pBlock = static_cast(pBlock_bits); + + const uint32_t l = pBlock->get_low_color(); + const uint32_t h = pBlock->get_high_color(); + + const int cr0 = (l >> 11) & 31; + const int cg0 = (l >> 5) & 63; + const int cb0 = l & 31; + const int r0 = (cr0 << 3) | (cr0 >> 2); + const int g0 = (cg0 << 2) | (cg0 >> 4); + const int b0 = (cb0 << 3) | (cb0 >> 2); + + const int cr1 = (h >> 11) & 31; + const int cg1 = (h >> 5) & 63; + const int cb1 = h & 31; + const int r1 = (cr1 << 3) | (cr1 >> 2); + const int g1 = (cg1 << 2) | (cg1 >> 4); + const int b1 = (cb1 << 3) | (cb1 >> 2); + + bool used_punchthrough = false; + + if (l > h) + { + c[0].set_noclamp_rgba(r0, g0, b0, 255); + c[1].set_noclamp_rgba(r1, g1, b1, 255); + switch (mode) + { + case bc1_approx_mode::cBC1Ideal: + c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); + c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); + break; + case bc1_approx_mode::cBC1IdealRound4: + c[2].set_noclamp_rgba((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255); + c[3].set_noclamp_rgba((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255); + break; + case bc1_approx_mode::cBC1NVidia: + c[2].set_noclamp_rgba(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255); + c[3].set_noclamp_rgba(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255); + break; + case bc1_approx_mode::cBC1AMD: + c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); + c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); + break; + } + } + else + { + c[0].set_noclamp_rgba(r0, g0, b0, 255); + c[1].set_noclamp_rgba(r1, g1, b1, 255); + switch (mode) + { + case bc1_approx_mode::cBC1Ideal: + case bc1_approx_mode::cBC1IdealRound4: + c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); + break; + case bc1_approx_mode::cBC1NVidia: + c[2].set_noclamp_rgba(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255); + break; + case bc1_approx_mode::cBC1AMD: + c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); + break; + } + + c[3].set_noclamp_rgba(0, 0, 0, 0); + used_punchthrough = true; + } + + return used_punchthrough; + } + + // Returns true if the block uses 3 color punchthrough alpha mode. + bool unpack_bc1(const void* pBlock_bits, void* pPixels, bool set_alpha, bc1_approx_mode mode) + { + color32* pDst_pixels = static_cast(pPixels); + + static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8"); + static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8"); + + const bc1_block* pBlock = static_cast(pBlock_bits); + + color32 c[4]; + const bool used_punchthrough = unpack_bc1_block_colors(pBlock_bits, c, mode); + + if (set_alpha) + { + for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) + { + pDst_pixels[0] = c[pBlock->get_selector(0, y)]; + pDst_pixels[1] = c[pBlock->get_selector(1, y)]; + pDst_pixels[2] = c[pBlock->get_selector(2, y)]; + pDst_pixels[3] = c[pBlock->get_selector(3, y)]; + } + } + else + { + for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) + { + pDst_pixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pDst_pixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pDst_pixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pDst_pixels[3].set_rgb(c[pBlock->get_selector(3, y)]); + } + } + + return used_punchthrough; + } + + void unpack_bc4(const void* pBlock_bits, uint8_t* pPixels, uint32_t stride) + { + static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8"); + + const bc4_block* pBlock = static_cast(pBlock_bits); + + uint8_t sel_values[8]; + bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha()); + + const uint64_t selector_bits = pBlock->get_selector_bits(); + + for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U)) + { + pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)]; + pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)]; + pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)]; + pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)]; + } + } + + // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3. + bool unpack_bc3(const void* pBlock_bits, void* pPixels, bc1_approx_mode mode) + { + color32* pDst_pixels = static_cast(pPixels); + + bool success = true; + + if (unpack_bc1((const uint8_t*)pBlock_bits + sizeof(bc4_block), pDst_pixels, true, mode)) + success = false; + + unpack_bc4(pBlock_bits, &pDst_pixels[0].a, sizeof(color32)); + + return success; + } + + // writes RG + void unpack_bc5(const void* pBlock_bits, void* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride) + { + unpack_bc4(pBlock_bits, (uint8_t*)pPixels + chan0, stride); + unpack_bc4((const uint8_t*)pBlock_bits + sizeof(bc4_block), (uint8_t*)pPixels + chan1, stride); + } + +} // namespace rgbcx + + + diff --git a/libkram/bc7enc/rgbcx.h b/libkram/bc7enc/rgbcx.h index 748d39e..cf79392 100644 --- a/libkram/bc7enc/rgbcx.h +++ b/libkram/bc7enc/rgbcx.h @@ -1,7 +1,9 @@ -// rgbcx.h v1.12 -// High-performance scalar BC1-5 encoders. Public Domain or MIT license (you choose - see below), written by Richard Geldreich 2020 . +// rgbcx.h v1.13 +// High-performance scalar encoders and RDO (Rate Distortion Optimization) post processors for BC1-5. +// Public Domain or MIT license (you choose - see below), written by Richard Geldreich 2020 . // // Influential references: +// https://tinyurl.com/y3vxz457 (Ortego and Ramchandran, "Rate-distortion Methods for Image and Video Compression", 1998) // http://sjbrown.co.uk/2006/01/19/dxt-compression-techniques/ // https://github.com/nothings/stb/blob/master/stb_dxt.h // https://gist.github.com/castano/c92c7626f288f9e99e158520b14a61cf @@ -56,9 +58,13 @@ #ifndef RGBCX_INCLUDE_H #define RGBCX_INCLUDE_H +#ifdef _MSC_VER +#pragma warning (disable:4201) //nameless struct/union +#endif + #include #include -//#include +#include #include #include @@ -87,6 +93,57 @@ namespace rgbcx cBC1IdealRound4 = 3 }; + enum class eNoClamp { cNoClamp }; + static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } + + template inline S maximum(S a, S b) { return (a > b) ? a : b; } + template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } + template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } + + template inline S minimum(S a, S b) { return (a < b) ? a : b; } + template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } + template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } + + struct color32 + { + union + { + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + + uint8_t c[4]; + + uint32_t m; + }; + + color32() { } + + color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); } + + void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); c[3] = static_cast(va); } + + void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); } + void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + + void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); } + + uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; } + uint8_t& operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; } + + bool operator== (const color32& rhs) const { return m == rhs.m; } + + void set_rgb(const color32& other) { c[0] = static_cast(other.c[0]); c[1] = static_cast(other.c[1]); c[2] = static_cast(other.c[2]); } + + static color32 comp_min(const color32& a, const color32& b) { return color32(eNoClamp::cNoClamp, std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3])); } + static color32 comp_max(const color32& a, const color32& b) { return color32(eNoClamp::cNoClamp, std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); } + }; + // init() MUST be called once before using the BC1 encoder. // This function may be called multiple times to change the BC1 approximation mode. // This function initializes global state, so don't call it while other threads inside the encoder. @@ -177,30 +234,41 @@ namespace rgbcx // Note that the 3 color modes won't be used at all until level 5 or higher. // No transparency supported, however if you set use_transparent_texels_for_black to true the encocer will use transparent selectors on very dark/black texels to reduce MSE. const uint32_t MIN_LEVEL = 0, MAX_LEVEL = 18; - void encode_bc1(uint32_t level, void* pDst, const uint8_t* pPixels, bool allow_3color, bool use_transparent_texels_for_black); + void encode_bc1(uint32_t level, void* pDst, const uint8_t* pPixels, bool allow_3color, bool use_transparent_texels_for_black, const uint8_t* pForce_selectors = nullptr); // Low-level interface for BC1 encoding. // Always returns a 4 color block, unless cEncodeBC1Use3ColorBlocksForBlackPixels or cEncodeBC1Use3ColorBlock flags are specified. // total_orderings_to_try controls the perf. vs. quality tradeoff on 4-color blocks when the cEncodeBC1UseLikelyTotalOrderings flag is used. It must range between [MIN_TOTAL_ORDERINGS, MAX_TOTAL_ORDERINGS4]. // total_orderings_to_try3 controls the perf. vs. quality tradeoff on 3-color bocks when the cEncodeBC1UseLikelyTotalOrderings and the cEncodeBC1Use3ColorBlocks flags are used. Valid range is [0,MAX_TOTAL_ORDERINGS3] (0=disabled). - void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags = 0, uint32_t total_orderings_to_try = DEFAULT_TOTAL_ORDERINGS_TO_TRY, uint32_t total_orderings_to_try3 = DEFAULT_TOTAL_ORDERINGS_TO_TRY3); - + void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags = 0, uint32_t total_orderings_to_try = DEFAULT_TOTAL_ORDERINGS_TO_TRY, uint32_t total_orderings_to_try3 = DEFAULT_TOTAL_ORDERINGS_TO_TRY3, const uint8_t *pForce_selectors = nullptr); + + // Constants used for high quality BC4/BC5 encoding (and alpha of BC3) + const uint32_t BC4_DEFAULT_SEARCH_RAD = 3; + const uint32_t BC4_USE_MODE8_FLAG = 1; + const uint32_t BC4_USE_MODE6_FLAG = 2; + const uint32_t BC4_USE_ALL_MODES = 3; + // Encodes a 4x4 block of RGBA pixels to BC3 format. // There are two encode_bc3() functions. // The first is the recommended function, which accepts a level parameter. // The second is a low-level version that allows fine control over BC1 encoding. void encode_bc3(uint32_t level, void* pDst, const uint8_t* pPixels); void encode_bc3(void* pDst, const uint8_t* pPixels, uint32_t flags = 0, uint32_t total_orderings_to_try = DEFAULT_TOTAL_ORDERINGS_TO_TRY); - + void encode_bc3_hq(uint32_t level, void* pDst, const uint8_t* pPixels, uint32_t alpha_search_rad = BC4_DEFAULT_SEARCH_RAD, uint32_t alpha_modes = BC4_USE_ALL_MODES); + // Encodes a single channel to BC4. // stride is the source pixel stride in bytes. void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride = 4); + uint32_t encode_bc4_hq(void* pDst, const uint8_t* pPixels, uint32_t stride = 4, uint32_t search_rad = BC4_DEFAULT_SEARCH_RAD, uint32_t mode_flag = BC4_USE_ALL_MODES, const uint8_t* pForce_selectors = nullptr); // Encodes two channels to BC5. // chan0/chan1 control which channels, stride is the source pixel stride in bytes. void encode_bc5(void* pDst, const uint8_t* pPixels, uint32_t chan0 = 0, uint32_t chan1 = 1, uint32_t stride = 4); + void encode_bc5_hq(void* pDst, const uint8_t* pPixels, uint32_t chan0 = 0, uint32_t chan1 = 1, uint32_t stride = 4, uint32_t alpha_search_rad = BC4_DEFAULT_SEARCH_RAD, uint32_t alpha_modes = BC4_USE_ALL_MODES); // Decompression functions. + + bool unpack_bc1_block_colors(const void* pBlock_bits, color32* c, bc1_approx_mode mode = bc1_approx_mode::cBC1Ideal); // Returns true if the block uses 3 color punchthrough alpha mode. bool unpack_bc1(const void* pBlock_bits, void* pPixels, bool set_alpha = true, bc1_approx_mode mode = bc1_approx_mode::cBC1Ideal); @@ -211,1273 +279,8 @@ namespace rgbcx bool unpack_bc3(const void* pBlock_bits, void* pPixels, bc1_approx_mode mode = bc1_approx_mode::cBC1Ideal); void unpack_bc5(const void* pBlock_bits, void* pPixels, uint32_t chan0 = 0, uint32_t chan1 = 1, uint32_t stride = 4); -} -#endif // #ifndef RGBCX_INCLUDE_H - -#ifdef RGBCX_IMPLEMENTATION -namespace rgbcx -{ - const uint32_t NUM_UNIQUE_TOTAL_ORDERINGS4 = 969; - - // All total orderings for 16 pixels 2-bit selectors. - // BC1 selector order 0, 2, 3, 1 (i.e. the selectors are reordered into linear order). - static uint8_t g_unique_total_orders4[NUM_UNIQUE_TOTAL_ORDERINGS4][4] = - { - {0,8,2,6},{4,3,9,0},{4,8,1,3},{12,0,3,1},{11,3,2,0},{6,4,6,0},{7,5,0,4},{6,0,8,2},{1,0,0,15},{3,0,8,5},{1,1,13,1},{13,1,2,0},{0,14,1,1},{0,15,1,0},{0,13,0,3},{16,0,0,0},{4,3,4,5},{8,6,0,2},{0,10,0,6},{10,0,4,2},{7,2,1,6},{4,7,5,0},{1,4,7,4},{0,14,2,0},{2,7,2,5},{9,0,5,2},{9,2,2,3},{10,0,5,1},{2,3,7,4},{4,9,0,3},{1,5,0,10},{1,1,6,8}, - {6,6,4,0},{11,5,0,0},{11,2,0,3},{4,0,10,2},{2,3,10,1},{1,13,1,1},{0,14,0,2},{2,3,3,8},{12,3,1,0},{14,0,0,2},{9,1,3,3},{6,4,0,6},{1,1,5,9},{5,9,0,2},{2,10,1,3},{12,0,0,4},{4,6,6,0},{0,6,4,6},{3,7,4,2},{0,13,3,0},{3,10,0,3},{10,2,1,3},{1,12,1,2},{2,0,13,1},{11,0,5,0},{12,1,3,0},{6,4,5,1},{10,4,2,0},{3,6,1,6},{7,3,6,0},{10,4,0,2},{10,0,2,4}, - {0,5,9,2},{0,9,3,4},{6,4,2,4},{3,4,7,2},{3,3,5,5},{4,2,9,1},{6,2,8,0},{3,5,3,5},{4,10,1,1},{10,1,3,2},{5,7,0,4},{5,3,7,1},{6,8,1,1},{8,8,0,0},{11,1,0,4},{14,1,0,1},{9,3,2,2},{8,2,1,5},{0,0,2,14},{3,3,9,1},{10,1,5,0},{8,3,1,4},{1,5,8,2},{6,1,9,0},{3,2,1,10},{3,11,1,1},{7,6,3,0},{9,0,3,4},{5,2,5,4},{0,2,3,11},{15,0,0,1},{0,6,6,4}, - {3,4,9,0},{4,7,0,5},{0,4,4,8},{0,13,2,1},{2,4,1,9},{3,2,5,6},{10,6,0,0},{3,5,6,2},{8,0,4,4},{1,3,6,6},{7,7,0,2},{6,1,4,5},{0,11,1,4},{2,2,8,4},{0,1,2,13},{15,0,1,0},{7,2,6,1},{8,1,7,0},{1,8,4,3},{2,13,1,0},{1,0,7,8},{14,2,0,0},{1,8,1,6},{9,3,3,1},{0,0,7,9},{4,4,1,7},{9,0,6,1},{10,2,4,0},{1,7,3,5},{0,3,8,5},{5,2,4,5},{1,2,5,8}, - {0,8,7,1},{10,3,2,1},{12,0,4,0},{2,1,4,9},{5,2,2,7},{1,9,3,3},{15,1,0,0},{6,3,4,3},{9,5,0,2},{1,6,9,0},{6,6,0,4},{13,2,1,0},{5,1,8,2},{0,5,11,0},{7,1,0,8},{1,2,12,1},{0,3,3,10},{7,4,2,3},{5,1,4,6},{7,0,3,6},{3,12,0,1},{3,4,5,4},{1,10,0,5},{7,4,3,2},{10,5,0,1},{13,3,0,0},{2,5,4,5},{3,10,1,2},{5,1,2,8},{14,0,1,1},{1,5,4,6},{1,4,5,6}, - {2,3,11,0},{11,0,4,1},{11,2,2,1},{5,3,8,0},{1,3,10,2},{0,1,13,2},{3,1,4,8},{4,2,4,6},{1,5,6,4},{2,1,11,2},{1,2,9,4},{4,7,3,2},{6,2,5,3},{7,2,2,5},{8,1,4,3},{3,2,8,3},{12,1,0,3},{7,8,1,0},{7,0,2,7},{5,10,0,1},{0,2,14,0},{2,9,3,2},{7,0,0,9},{11,1,4,0},{10,4,1,1},{2,2,9,3},{5,7,2,2},{1,3,1,11},{13,2,0,1},{4,2,8,2},{2,3,1,10},{4,2,5,5}, - {7,0,7,2},{10,0,0,6},{0,8,5,3},{4,4,0,8},{12,4,0,0},{0,1,14,1},{8,0,1,7},{5,1,5,5},{11,0,3,2},{0,4,1,11},{0,8,8,0},{0,2,5,9},{7,3,2,4},{7,8,0,1},{1,0,3,12},{7,4,5,0},{1,6,7,2},{7,6,1,2},{9,6,1,0},{12,2,0,2},{4,1,6,5},{4,0,1,11},{8,4,4,0},{13,0,1,2},{8,6,2,0},{4,12,0,0},{2,7,5,2},{2,0,5,9},{5,4,5,2},{3,8,5,0},{7,3,3,3},{4,4,8,0}, - {2,1,3,10},{5,0,1,10},{6,4,3,3},{4,9,1,2},{1,4,0,11},{11,3,1,1},{4,0,12,0},{13,0,0,3},{6,1,6,3},{9,0,4,3},{8,0,0,8},{8,4,0,4},{0,12,1,3},{0,4,10,2},{3,4,8,1},{1,3,8,4},{9,2,5,0},{5,7,4,0},{1,0,11,4},{4,10,0,2},{1,3,12,0},{6,9,0,1},{5,0,9,2},{5,9,2,0},{13,1,0,2},{9,3,4,0},{9,4,0,3},{3,1,12,0},{2,4,3,7},{1,2,13,0},{2,2,4,8},{6,8,0,2}, - {9,2,1,4},{9,5,1,1},{2,0,4,10},{5,4,0,7},{0,0,6,10},{1,2,0,13},{4,7,2,3},{6,5,5,0},{3,3,1,9},{1,6,1,8},{12,2,1,1},{4,4,5,3},{1,0,6,9},{0,6,10,0},{4,8,3,1},{4,3,2,7},{2,1,7,6},{1,9,1,5},{3,1,3,9},{8,7,1,0},{1,2,3,10},{14,1,1,0},{5,4,4,3},{3,7,0,6},{7,4,1,4},{3,7,5,1},{1,1,0,14},{0,10,3,3},{0,4,3,9},{1,7,7,1},{2,0,10,4},{5,8,0,3}, - {6,7,3,0},{0,8,4,4},{5,7,3,1},{7,9,0,0},{7,6,2,1},{0,4,5,7},{6,3,5,2},{1,2,1,12},{5,2,0,9},{8,5,0,3},{4,6,1,5},{1,1,7,7},{10,5,1,0},{1,2,8,5},{1,8,2,5},{5,1,0,10},{6,9,1,0},{13,0,2,1},{8,3,5,0},{6,3,6,1},{2,11,3,0},{3,7,3,3},{1,5,2,8},{7,5,2,2},{0,6,7,3},{13,1,1,1},{5,3,4,4},{7,2,7,0},{5,8,3,0},{3,13,0,0},{0,7,9,0},{8,0,3,5}, - {1,3,7,5},{4,0,2,10},{12,0,1,3},{1,7,6,2},{3,9,0,4},{7,2,0,7},{0,1,7,8},{2,1,8,5},{0,13,1,2},{0,8,1,7},{5,0,11,0},{5,6,2,3},{0,3,0,13},{2,3,4,7},{5,6,3,2},{4,2,10,0},{3,3,7,3},{7,2,5,2},{1,1,11,3},{12,3,0,1},{5,1,1,9},{1,15,0,0},{9,7,0,0},{9,1,2,4},{0,7,3,6},{3,0,13,0},{3,0,11,2},{0,6,5,5},{8,2,2,4},{6,10,0,0},{4,8,4,0},{0,0,3,13}, - {0,4,12,0},{7,1,6,2},{3,5,0,8},{8,0,6,2},{6,2,3,5},{2,10,0,4},{4,11,0,1},{6,1,5,4},{5,1,3,7},{0,11,3,2},{4,6,0,6},{2,6,0,8},{3,1,7,5},{2,14,0,0},{2,9,2,3},{0,3,4,9},{11,0,1,4},{13,0,3,0},{8,3,0,5},{0,5,3,8},{5,11,0,0},{0,1,4,11},{2,1,9,4},{3,4,4,5},{7,1,2,6},{12,2,2,0},{9,4,1,2},{6,0,2,8},{4,6,2,4},{11,2,3,0},{3,2,2,9},{10,3,1,2}, - {1,1,2,12},{0,5,2,9},{0,1,11,4},{6,2,4,4},{2,8,2,4},{0,9,4,3},{11,0,2,3},{0,2,11,3},{6,0,7,3},{0,3,6,7},{4,5,5,2},{1,2,6,7},{7,5,1,3},{9,0,2,5},{2,6,4,4},{4,1,9,2},{4,8,2,2},{1,12,3,0},{0,9,6,1},{0,10,6,0},{3,1,5,7},{2,13,0,1},{2,2,1,11},{3,6,0,7},{5,6,5,0},{5,5,4,2},{4,0,3,9},{3,4,1,8},{0,11,2,3},{2,12,1,1},{7,1,3,5},{7,0,9,0}, - {8,0,8,0},{1,0,2,13},{3,3,10,0},{2,4,4,6},{2,3,8,3},{1,10,5,0},{7,3,0,6},{2,9,0,5},{1,4,6,5},{6,6,3,1},{5,6,0,5},{6,3,0,7},{3,10,2,1},{2,5,5,4},{3,8,4,1},{1,14,0,1},{10,3,3,0},{3,5,7,1},{1,1,3,11},{2,4,0,10},{9,3,1,3},{5,10,1,0},{3,0,6,7},{3,1,9,3},{11,2,1,2},{5,3,3,5},{0,5,1,10},{4,1,11,0},{10,2,0,4},{7,6,0,3},{2,7,0,7},{4,2,2,8}, - {6,1,7,2},{4,9,2,1},{0,0,8,8},{3,7,2,4},{9,6,0,1},{0,12,4,0},{6,7,1,2},{0,7,2,7},{1,0,10,5},{0,0,14,2},{2,7,3,4},{5,0,0,11},{7,7,1,1},{6,2,7,1},{4,5,3,4},{3,5,1,7},{5,9,1,1},{6,2,1,7},{3,2,0,11},{0,11,0,5},{3,11,2,0},{10,1,4,1},{7,0,4,5},{11,4,0,1},{10,3,0,3},{0,2,4,10},{0,15,0,1},{0,11,5,0},{6,7,2,1},{1,12,2,1},{4,1,3,8},{1,0,13,2}, - {1,8,5,2},{7,0,1,8},{3,12,1,0},{9,2,4,1},{1,7,4,4},{11,4,1,0},{4,3,8,1},{2,8,4,2},{1,11,3,1},{1,1,4,10},{4,10,2,0},{8,2,5,1},{1,0,9,6},{5,3,2,6},{0,9,7,0},{10,2,2,2},{5,8,1,2},{8,7,0,1},{0,3,12,1},{1,0,1,14},{4,8,0,4},{3,8,0,5},{4,6,5,1},{0,9,5,2},{10,2,3,1},{2,3,9,2},{1,0,12,3},{11,3,0,2},{4,5,2,5},{0,2,12,2},{9,1,0,6},{9,2,0,5}, - {1,2,7,6},{4,7,4,1},{0,12,2,2},{0,0,0,16},{2,8,3,3},{3,6,2,5},{0,6,3,7},{7,5,4,0},{3,3,3,7},{3,3,0,10},{5,0,6,5},{0,0,10,6},{8,5,3,0},{8,1,5,2},{6,0,9,1},{11,1,2,2},{2,11,2,1},{9,5,2,0},{3,0,4,9},{2,2,12,0},{2,6,6,2},{2,1,13,0},{6,0,5,5},{2,0,14,0},{2,11,1,2},{4,4,7,1},{2,0,11,3},{3,1,1,11},{2,9,4,1},{3,7,6,0},{14,0,2,0},{1,10,4,1}, - {8,0,7,1},{3,6,5,2},{0,3,11,2},{2,5,6,3},{11,1,3,1},{6,5,3,2},{3,8,1,4},{0,2,7,7},{2,10,2,2},{1,6,2,7},{11,0,0,5},{12,1,1,2},{12,1,2,1},{0,7,1,8},{0,3,9,4},{0,2,1,13},{7,1,4,4},{10,1,0,5},{4,0,8,4},{5,2,7,2},{0,2,0,14},{4,3,7,2},{2,7,1,6},{1,2,2,11},{6,3,3,4},{1,14,1,0},{2,4,6,4},{5,3,6,2},{5,3,5,3},{8,4,1,3},{1,3,0,12},{3,5,2,6}, - {1,8,7,0},{0,7,4,5},{2,1,6,7},{4,11,1,0},{7,2,4,3},{6,1,3,6},{4,5,4,3},{2,11,0,3},{1,5,7,3},{12,0,2,2},{5,0,4,7},{1,13,0,2},{7,7,2,0},{4,1,7,4},{4,5,0,7},{5,0,5,6},{6,5,4,1},{2,4,2,8},{1,10,1,4},{6,3,1,6},{3,3,8,2},{0,7,7,2},{4,4,2,6},{1,1,8,6},{1,12,0,3},{2,1,12,1},{1,9,2,4},{1,11,0,4},{2,5,2,7},{10,0,3,3},{4,6,3,3},{3,7,1,5}, - {1,9,0,6},{7,1,7,1},{1,6,5,4},{9,2,3,2},{6,2,2,6},{2,2,2,10},{8,3,3,2},{0,1,8,7},{2,0,8,6},{0,3,1,12},{9,4,2,1},{9,4,3,0},{6,2,6,2},{1,8,0,7},{5,1,10,0},{0,5,5,6},{8,2,4,2},{2,3,2,9},{6,0,3,7},{2,2,6,6},{2,6,2,6},{1,13,2,0},{9,3,0,4},{7,3,5,1},{6,5,2,3},{5,2,6,3},{2,0,12,2},{5,7,1,3},{8,1,3,4},{3,1,10,2},{1,0,15,0},{0,8,0,8}, - {5,0,7,4},{4,4,6,2},{0,1,0,15},{10,0,1,5},{7,3,4,2},{4,9,3,0},{2,5,7,2},{3,4,2,7},{8,3,2,3},{5,1,6,4},{0,10,2,4},{6,6,1,3},{6,0,0,10},{4,4,3,5},{1,3,9,3},{7,5,3,1},{3,0,7,6},{1,8,6,1},{4,3,0,9},{3,11,0,2},{6,0,6,4},{0,1,3,12},{0,4,2,10},{5,5,6,0},{4,1,4,7},{8,1,6,1},{5,6,4,1},{8,4,2,2},{4,3,1,8},{3,0,2,11},{1,11,4,0},{0,8,3,5}, - {5,1,7,3},{7,0,8,1},{4,3,5,4},{4,6,4,2},{3,2,4,7},{1,6,3,6},{0,7,8,1},{3,0,1,12},{9,1,4,2},{7,4,0,5},{1,7,0,8},{5,4,1,6},{9,1,5,1},{1,1,9,5},{4,1,1,10},{5,3,0,8},{2,2,5,7},{4,0,0,12},{9,0,7,0},{3,4,0,9},{0,2,6,8},{8,2,0,6},{3,2,6,5},{4,2,6,4},{3,6,4,3},{2,8,6,0},{5,0,3,8},{0,4,0,12},{0,16,0,0},{0,9,2,5},{4,0,11,1},{1,6,4,5}, - {0,1,6,9},{3,4,6,3},{3,0,10,3},{7,0,6,3},{1,4,9,2},{1,5,3,7},{8,5,2,1},{0,12,0,4},{7,2,3,4},{0,5,6,5},{11,1,1,3},{6,5,0,5},{2,1,5,8},{1,4,11,0},{9,1,1,5},{0,0,13,3},{5,8,2,1},{2,12,0,2},{3,3,6,4},{4,1,10,1},{4,0,5,7},{8,1,0,7},{5,1,9,1},{4,3,3,6},{0,2,2,12},{6,3,2,5},{0,0,12,4},{1,5,1,9},{2,6,5,3},{3,6,3,4},{2,12,2,0},{1,6,8,1}, - {10,1,1,4},{1,3,4,8},{7,4,4,1},{1,11,1,3},{1,2,10,3},{3,9,3,1},{8,5,1,2},{2,10,4,0},{4,2,0,10},{2,7,6,1},{8,2,3,3},{1,5,5,5},{3,1,0,12},{3,10,3,0},{8,0,5,3},{0,6,8,2},{0,3,13,0},{0,0,16,0},{1,9,4,2},{4,1,8,3},{1,6,6,3},{0,10,5,1},{0,1,12,3},{4,0,6,6},{3,8,3,2},{0,5,4,7},{1,0,14,1},{0,4,6,6},{3,9,1,3},{3,5,8,0},{3,6,6,1},{5,4,7,0}, - {3,0,12,1},{8,6,1,1},{2,9,5,0},{6,1,1,8},{4,1,2,9},{3,9,4,0},{5,2,9,0},{0,12,3,1},{1,4,10,1},{4,0,7,5},{3,1,2,10},{5,4,2,5},{5,5,5,1},{4,2,3,7},{1,7,5,3},{2,8,0,6},{8,1,2,5},{3,8,2,3},{6,1,2,7},{3,9,2,2},{9,0,0,7},{0,8,6,2},{8,4,3,1},{0,2,8,6},{6,5,1,4},{2,3,5,6},{2,10,3,1},{0,7,0,9},{4,2,7,3},{2,4,8,2},{7,1,1,7},{2,4,7,3}, - {2,4,10,0},{0,1,10,5},{4,7,1,4},{0,10,4,2},{9,0,1,6},{1,9,6,0},{3,3,4,6},{4,5,7,0},{5,5,2,4},{2,8,1,5},{2,3,6,5},{0,1,1,14},{3,2,3,8},{10,1,2,3},{9,1,6,0},{3,4,3,6},{2,2,0,12},{0,0,9,7},{4,0,9,3},{7,0,5,4},{4,5,6,1},{2,5,1,8},{2,5,9,0},{3,5,4,4},{1,3,11,1},{7,1,5,3},{3,2,7,4},{1,4,2,9},{1,11,2,2},{2,2,3,9},{5,0,10,1},{3,2,11,0}, - {1,10,3,2},{8,3,4,1},{3,6,7,0},{0,7,5,4},{1,3,3,9},{2,2,10,2},{1,9,5,1},{0,5,0,11},{3,0,3,10},{0,4,8,4},{2,7,7,0},{2,0,2,12},{1,2,11,2},{6,3,7,0},{0,6,2,8},{0,10,1,5},{0,9,0,7},{6,4,4,2},{6,0,1,9},{1,5,10,0},{5,4,6,1},{5,5,3,3},{0,0,4,12},{0,3,2,11},{1,4,1,10},{3,0,9,4},{5,5,0,6},{1,7,8,0},{2,0,3,11},{6,4,1,5},{10,0,6,0},{0,6,0,10}, - {0,4,11,1},{3,1,6,6},{2,5,8,1},{0,2,10,4},{3,1,11,1},{6,6,2,2},{1,1,10,4},{2,1,2,11},{6,1,8,1},{0,2,13,1},{0,7,6,3},{6,8,2,0},{3,0,0,13},{4,4,4,4},{6,2,0,8},{7,3,1,5},{0,11,4,1},{6,7,0,3},{2,6,3,5},{5,2,1,8},{7,1,8,0},{5,5,1,5},{1,8,3,4},{8,2,6,0},{6,0,10,0},{5,6,1,4},{1,4,4,7},{2,7,4,3},{1,4,8,3},{5,4,3,4},{1,10,2,3},{2,9,1,4}, - {2,2,11,1},{2,5,0,9},{0,0,1,15},{0,0,11,5},{0,4,7,5},{0,1,15,0},{2,1,0,13},{0,3,10,3},{8,0,2,6},{3,3,2,8},{3,5,5,3},{1,7,1,7},{1,3,2,10},{4,0,4,8},{2,0,9,5},{1,1,1,13},{2,2,7,5},{2,1,10,3},{4,2,1,9},{4,3,6,3},{1,3,5,7},{2,5,3,6},{1,0,8,7},{5,0,2,9},{2,8,5,1},{1,6,0,9},{0,0,5,11},{0,4,9,3},{2,0,7,7},{1,7,2,6},{2,1,1,12},{2,4,9,1}, - {0,5,7,4},{6,0,4,6},{3,2,10,1},{0,6,1,9},{2,6,1,7},{0,5,8,3},{4,1,0,11},{1,2,4,9},{4,1,5,6},{6,1,0,9},{1,4,3,8},{4,5,1,6},{1,0,5,10},{5,3,1,7},{0,9,1,6},{2,0,1,13},{2,0,6,8},{8,1,1,6},{1,5,9,1},{0,6,9,1},{0,3,5,8},{0,2,9,5},{5,2,8,1},{1,1,14,0},{3,2,9,2},{5,0,8,3},{0,5,10,1},{5,2,3,6},{2,6,7,1},{2,3,0,11},{0,1,9,6},{1,0,4,11}, - {3,0,5,8},{0,0,15,1},{2,4,5,5},{0,3,7,6},{2,0,0,14},{1,1,12,2},{2,6,8,0},{3,1,8,4},{0,1,5,10} - }; - // All total orderings for 16 pixels [0,2] 2-bit selectors. - // BC1 selector order: 0, 1, 2 - // Note this is different from g_unique_total_orders4[], which reorders the selectors into linear order. - const uint32_t NUM_UNIQUE_TOTAL_ORDERINGS3 = 153; - static uint8_t g_unique_total_orders3[NUM_UNIQUE_TOTAL_ORDERINGS3][3] = - { - {6,0,10},{3,6,7},{3,0,13},{13,3,0},{12,4,0},{9,1,6},{2,13,1},{4,7,5},{7,5,4},{9,6,1},{7,4,5},{8,6,2},{16,0,0},{10,6,0},{2,7,7}, - {0,0,16},{0,3,13},{1,15,0},{0,2,14},{1,4,11},{15,1,0},{1,12,3},{9,2,5},{14,1,1},{8,2,6},{3,3,10},{4,2,10},{14,0,2},{0,14,2},{1,7,8},{6,6,4}, - {11,5,0},{6,4,6},{11,3,2},{4,3,9},{7,1,8},{10,4,2},{12,1,3},{11,0,5},{9,3,4},{1,0,15},{9,0,7},{2,6,8},{12,2,2},{6,2,8},{6,8,2},{15,0,1}, - {4,8,4},{0,4,12},{8,5,3},{5,9,2},{11,2,3},{12,3,1},{6,3,7},{1,1,14},{2,9,5},{1,8,7},{4,10,2},{7,7,2},{13,1,2},{0,15,1},{3,2,11},{7,0,9}, - {4,4,8},{3,8,5},{0,5,11},{13,2,1},{1,10,5},{4,11,1},{3,10,3},{5,10,1},{10,2,4},{0,6,10},{14,2,0},{11,4,1},{3,12,1},{1,13,2},{1,5,10},{5,11,0}, - {12,0,4},{8,1,7},{6,10,0},{3,13,0},{7,2,7},{0,7,9},{5,8,3},{0,12,4},{11,1,4},{13,0,3},{0,16,0},{5,7,4},{10,3,3},{10,0,6},{0,13,3},{4,6,6}, - {2,8,6},{2,5,9},{7,8,1},{2,1,13},{2,0,14},{7,3,6},{5,1,10},{3,11,2},{5,4,7},{8,3,5},{10,5,1},{6,9,1},{1,3,12},{4,5,7},{2,2,12},{4,1,11}, - {0,8,8},{4,12,0},{6,5,5},{8,7,1},{5,5,6},{3,7,6},{7,9,0},{4,9,3},{0,10,6},{8,0,8},{5,3,8},{10,1,5},{6,1,9},{7,6,3},{9,5,2},{0,1,15}, - {9,7,0},{2,14,0},{3,4,9},{8,4,4},{9,4,3},{0,9,7},{1,9,6},{3,9,4},{5,2,9},{2,3,11},{5,6,5},{1,14,1},{6,7,3},{2,4,10},{2,12,2},{8,8,0}, - {2,10,4},{4,0,12},{0,11,5},{2,11,3},{1,11,4},{3,5,8},{5,0,11},{3,1,12},{1,2,13},{1,6,9} - }; - - // For each total ordering, this table indicates which other total orderings are likely to improve quality using a least squares pass. Each array is sorted by usefulness. - static uint16_t g_best_total_orderings4[NUM_UNIQUE_TOTAL_ORDERINGS4][MAX_TOTAL_ORDERINGS4] = - { -#if RGBCX_USE_SMALLER_TABLES - { 202,120,13,318,15,23,403,450,5,51,260,128,77,21,33,494,515,523,4,141,269,1,2,700,137,49,48,102,7,64,753,82 }, - { 13,141,23,217,115,51,77,2,64,21,0,4,5,317,137,269,202,33,318,7,291,352,9,10,3,180,32,6,365,102,341,349 }, - { 29,58,262,1,52,74,6,171,5,287,151,334,27,500,75,26,331,223,53,635,220,19,50,45,46,17,14,396,163,409,324,70 }, - { 40,51,33,453,14,23,62,56,12,196,730,475,153,99,403,775,117,130,585,34,4,17,162,11,139,57,102,38,108,47,123,440 }, - { 33,23,51,13,102,64,202,128,12,40,15,196,153,10,1,2,77,99,141,0,515,5,117,3,120,403,700,165,22,14,269,453 }, - { 13,23,51,4,77,141,202,33,115,64,32,128,0,11,177,40,15,102,2,217,7,137,269,21,90,59,515,1,180,403,22,6 }, - { 26,235,19,47,648,624,78,145,27,112,122,64,444,6,630,453,25,42,65,130,711,85,390,113,416,108,665,29,730,138,644,95 }, - { 64,141,352,751,217,247,237,437,177,269,86,954,947,875,32,318,95,77,304,92,597,180,232,291,128,864,349,588,372,202,312,1 }, - { 642,898,180,638,901,341,82,197,10,951,15,515,165,762,700,253,811,753,752,365,143,479,244,569,8,110,351,873,55,31,499,116 }, - { 221,23,51,125,438,254,13,21,39,49,308,656,0,115,530,159,158,401,30,166,912,386,165,688,518,9,105,627,424,22,421,33 }, - { 143,31,1,44,197,8,180,125,116,55,13,498,23,341,638,242,93,15,2,141,0,901,752,115,36,206,165,479,338,365,515,762 }, - { 12,23,51,13,14,15,37,99,515,38,700,117,2,196,134,153,753,64,54,33,128,120,21,0,328,5,139,82,453,719,457,1 }, - { 13,15,23,515,961,700,457,753,51,115,4,165,197,2,38,569,1,474,0,37,99,719,5,12,629,14,11,3,33,77,64,10 }, - { 15,515,700,753,1,0,2,4,3,23,134,12,961,5,10,197,11,33,82,120,457,51,165,7,6,341,217,21,77,9,40,180 }, - { 13,51,23,457,719,961,730,401,165,453,0,117,386,15,134,1,758,153,12,54,515,99,11,2,700,5,753,4,308,33,6,899 }, - { 134,898,82,117,13,33,77,102,23,260,341,351,120,901,197,153,961,111,196,110,180,457,854,10,450,8,165,40,4,115,0,365 }, - { 60,18,126,167,35,16,191,71,24,92,121,271,68,107,212,146,118,150,199,7,21,1,9,575,727,5,566,48,0,132,108,273 }, - { 62,136,129,123,128,41,162,17,249,211,214,789,618,710,38,678,248,507,57,64,152,269,119,3,177,183,597,106,4,179,216,90 }, - { 403,523,51,475,494,453,817,899,202,23,450,13,421,120,102,730,33,128,4,1,805,5,7,153,757,260,318,196,77,457,326,65 }, - { 4,59,3,62,12,33,56,193,27,21,102,17,40,77,76,84,32,0,6,123,119,177,128,11,18,611,605,25,13,51,73,210 }, - { 43,20,319,422,414,945,0,7,819,61,5,376,325,173,804,904,470,693,97,707,14,49,22,104,147,107,95,32,426,1,330,577 }, - { 13,23,51,2,0,115,4,141,217,33,10,77,1,15,64,180,3,515,7,6,22,102,11,5,40,9,165,700,202,197,317,341 }, - { 28,49,0,105,1,24,65,159,35,55,95,239,16,2,109,7,9,14,170,320,347,168,424,158,10,301,124,5,67,21,64,36 }, - { 15,515,700,753,0,1,13,2,117,4,12,10,5,165,457,3,9,134,11,7,6,51,77,64,961,82,33,197,14,341,120,141 }, - { 7,71,14,149,97,18,60,16,150,92,398,189,140,124,24,273,35,2,69,302,154,68,0,336,517,43,66,28,118,251,230,1 }, - { 4,102,33,77,40,59,11,624,210,12,128,342,5,503,91,139,64,32,25,494,202,678,416,0,403,275,21,450,196,318,523,177 }, - { 25,19,42,6,122,813,256,235,85,26,436,53,297,573,680,390,445,63,27,416,80,233,65,73,389,283,45,605,194,17,250,343 }, - { 402,102,202,128,33,300,403,23,12,77,40,21,342,117,483,99,25,494,6,4,63,32,84,569,139,757,475,318,19,26,196,134 }, - { 158,9,0,109,39,49,65,22,35,168,55,24,68,124,159,16,185,344,333,154,254,272,175,289,1,577,95,28,105,810,30,169 }, - { 197,180,115,237,498,165,2,5,287,546,400,3,61,34,509,13,297,80,341,52,45,186,58,881,23,873,468,176,64,17,311,250 }, - { 120,968,373,260,704,110,450,202,137,318,77,95,269,326,217,717,661,652,851,349,93,1,518,98,827,291,21,177,82,33,848,719 }, - { 44,116,144,268,434,489,367,384,98,127,918,93,948,31,206,940,855,0,203,137,9,22,617,141,332,105,393,492,959,282,299,131 }, - { 13,77,23,33,51,0,64,141,102,4,2,115,1,6,202,15,10,128,269,7,177,180,3,40,22,11,515,217,117,318,700,137 }, - { 15,515,700,753,4,11,141,40,165,23,64,180,13,202,32,3,51,125,5,197,21,128,0,93,77,1,120,82,269,117,110,59 }, - { 176,231,585,62,34,14,412,161,56,236,527,57,17,3,51,202,4,23,369,283,128,13,472,440,84,361,136,457,381,130,719,53 }, - { 9,0,180,217,237,101,141,352,88,100,230,64,175,317,115,498,68,39,30,1,702,83,213,36,365,208,752,13,252,321,952,546 }, - { 28,9,22,1,49,0,109,39,83,95,86,30,13,105,128,55,141,168,158,67,31,159,208,12,96,5,185,2,160,64,137,23 }, - { 72,4,38,12,51,89,477,11,57,76,401,308,23,474,99,148,413,179,59,13,431,152,54,569,17,3,205,629,197,421,405,15 }, - { 457,13,23,961,15,51,515,700,165,12,753,629,11,1,719,117,0,3,2,37,569,197,40,328,33,5,153,134,99,64,38,196 }, - { 254,100,310,9,30,1,39,625,166,265,190,0,272,557,131,731,31,98,578,688,404,93,101,88,49,21,127,264,44,36,252,478 }, - { 51,23,12,13,15,128,99,120,10,202,515,153,64,82,700,33,165,2,5,117,403,1,141,0,3,196,37,453,753,197,260,93 }, - { 38,99,542,139,453,117,196,23,457,13,328,111,37,134,961,11,12,51,40,775,587,401,474,54,153,477,41,629,33,475,14,277 }, - { 6,85,25,233,343,91,26,63,138,29,19,65,283,4,81,235,42,122,605,64,648,256,174,370,74,389,718,59,45,194,445,416 }, - { 49,5,97,20,197,21,18,193,0,64,408,729,173,350,43,422,165,7,14,104,61,32,509,713,523,102,120,95,125,397,35,232 }, - { 144,116,268,434,384,489,367,206,93,855,940,44,98,332,617,127,959,911,137,282,203,31,22,219,141,9,131,276,417,0,1,120 }, - { 17,106,64,62,32,255,136,292,476,162,129,241,123,141,41,237,720,214,209,352,519,211,186,148,752,247,507,90,21,77,197,119 }, - { 2,29,52,50,5,58,14,6,27,1,366,357,45,53,17,19,171,151,26,181,133,38,218,764,287,583,61,113,3,487,600,281 }, - { 130,59,196,412,381,730,711,236,77,210,202,402,453,99,401,108,361,803,291,283,153,4,57,51,128,183,14,719,503,117,23,11 }, - { 13,23,51,141,77,4,33,64,115,0,217,10,180,202,2,102,11,9,15,165,40,21,128,352,22,7,197,3,317,515,269,1 }, - { 23,13,202,51,120,15,21,5,141,1,128,269,137,515,64,102,125,48,98,33,260,523,318,93,700,165,450,77,2,12,403,82 }, - { 1,2,14,46,29,67,38,52,5,171,58,24,103,69,96,70,83,181,54,75,163,223,16,45,112,309,155,0,186,35,18,108 }, - { 15,515,700,753,13,0,1,2,153,5,23,10,117,3,9,7,134,165,12,6,341,33,4,14,77,457,115,21,719,180,217,82 }, - { 197,165,509,13,391,180,308,115,23,546,5,498,2,29,3,401,901,61,34,80,14,457,250,569,237,873,38,297,45,15,468,386 }, - { 19,73,27,250,200,714,444,472,26,53,34,17,813,322,283,390,128,297,78,123,432,14,436,136,106,690,57,122,389,80,503,3 }, - { 3,17,21,45,62,32,38,12,155,14,2,328,5,99,401,536,828,13,227,488,106,51,719,119,540,76,165,221,115,629,209,41 }, - { 115,341,873,197,365,13,901,180,569,752,317,1,10,498,143,634,261,0,509,15,943,237,44,31,116,601,165,127,282,23,141,64 }, - { 453,51,23,403,33,421,475,102,15,153,196,515,13,700,117,523,12,40,753,21,4,134,0,494,670,899,22,801,730,10,11,401 }, - { 23,13,51,33,12,117,153,134,453,196,15,99,515,40,14,700,128,102,11,753,77,64,403,202,0,401,475,37,65,2,3,38 }, - { 2,7,5,14,70,1,29,61,52,45,6,112,66,16,21,32,592,46,38,135,87,58,186,315,290,128,113,0,64,48,227,23 }, - { 33,23,102,51,128,13,64,202,141,1,77,10,153,40,196,117,2,3,0,5,15,269,403,12,137,134,318,165,120,6,453,99 }, - { 16,92,7,20,43,35,126,71,60,14,107,18,68,97,0,121,279,149,24,246,191,48,118,575,55,140,362,783,230,150,375,566 }, - { 13,23,4,33,77,64,51,102,141,128,32,10,0,202,40,115,59,22,90,11,177,21,291,6,7,318,180,117,137,2,95,165 }, - { 507,162,129,41,4,211,62,38,123,59,57,248,183,130,99,11,3,361,202,17,402,556,266,305,803,210,128,184,152,136,313,117 }, - { 643,123,193,650,802,18,25,389,718,256,65,289,84,91,619,511,415,90,235,63,57,510,324,216,862,102,6,183,108,397,217,736 }, - { 13,23,15,1,515,51,0,2,700,5,753,165,141,115,12,3,4,180,21,197,457,7,6,10,120,9,33,202,77,32,8,11 }, - { 23,51,13,453,64,403,12,21,5,202,128,475,165,141,523,95,125,115,3,1,4,730,120,32,2,494,180,719,457,197,450,401 }, - { 204,74,135,66,6,174,192,7,138,172,85,353,348,580,280,97,95,500,29,64,426,32,87,889,65,81,25,2,52,43,568,673 }, - { 35,0,68,69,24,9,1,16,65,103,149,133,18,114,28,50,83,2,189,7,46,14,101,336,175,124,251,55,71,218,38,238 }, - { 16,101,0,118,9,18,24,68,35,154,71,124,60,212,191,520,55,806,694,167,28,39,364,375,1,346,252,65,604,302,22,21 }, - { 0,9,16,35,1,24,68,18,65,21,103,67,13,149,28,189,71,23,101,238,114,7,335,133,486,141,22,212,48,50,30,118 }, - { 13,202,23,77,33,51,128,5,21,141,115,32,102,64,4,0,318,269,10,15,291,2,494,177,11,217,3,515,22,137,6,700 }, - { 16,92,60,35,7,18,24,68,150,149,14,71,0,375,97,126,118,107,230,191,246,273,140,55,175,653,9,575,2,28,566,517 }, - { 76,90,21,179,316,148,205,32,464,288,184,257,245,1,89,2,460,57,152,45,38,358,645,5,12,449,350,48,37,17,4,14 }, - { 19,27,26,813,80,297,17,495,436,53,73,200,4,378,250,59,106,25,45,128,361,42,113,469,122,390,77,40,736,6,11,136 }, - { 6,26,235,138,19,145,112,70,331,262,25,42,52,624,27,453,122,47,500,78,648,85,29,2,630,632,409,113,50,226,108,75 }, - { 7,16,14,24,92,35,18,2,46,9,60,140,0,87,50,5,54,13,12,38,171,23,126,21,58,64,1,70,128,71,220,163 }, - { 90,205,257,184,32,179,460,5,245,45,2,288,769,524,57,21,152,229,17,1,497,4,292,59,619,452,432,76,476,11,266,14 }, - { 15,515,700,753,4,5,11,141,13,1,33,3,0,128,202,23,180,21,2,64,269,32,117,134,120,40,102,318,153,17,137,352 }, - { 47,130,711,108,453,412,730,196,390,283,78,27,51,183,381,236,128,200,719,14,153,472,503,34,59,250,3,4,57,803,123,432 }, - { 12,277,51,474,111,153,23,99,13,37,961,94,629,542,569,431,79,139,38,134,117,453,33,188,196,40,115,15,11,157,401,515 }, - { 17,495,469,106,26,378,80,27,161,483,19,742,527,436,383,862,73,136,53,814,297,6,119,84,62,56,25,3,209,611,4,128 }, - { 81,681,636,91,0,750,370,104,718,138,18,693,173,784,29,397,348,74,192,673,174,65,6,207,64,280,306,52,671,32,355,319 }, - { 15,515,700,753,33,77,4,102,115,117,40,13,1,153,134,11,5,217,23,196,2,21,3,317,32,365,0,341,291,59,12,51 }, - { 0,9,28,35,68,1,65,67,101,39,69,175,16,238,13,22,96,124,18,24,251,30,55,12,23,2,50,141,114,5,154,103 }, - { 23,33,77,13,117,40,11,102,64,4,51,403,153,453,10,0,196,134,128,65,12,291,86,99,95,59,15,141,202,180,137,719 }, - { 214,90,289,6,874,64,25,65,235,42,751,249,256,312,194,85,746,875,174,32,525,288,519,835,247,348,233,544,217,524,437,352 }, - { 1,22,2,0,36,67,28,5,49,95,12,50,168,83,105,55,7,9,14,194,103,23,114,21,584,46,10,13,38,69,208,159 }, - { 269,141,13,202,33,180,318,77,291,137,102,352,128,23,349,51,31,217,372,317,125,197,44,21,11,5,901,1,18,0,4,494 }, - { 435,144,274,88,203,418,30,1,190,410,96,778,100,530,521,326,466,795,686,166,960,321,382,264,367,822,131,31,692,9,213,93 }, - { 76,72,90,21,37,179,12,205,32,428,148,38,308,405,4,413,57,184,749,245,316,221,54,645,288,1,152,155,464,257,2,14 }, - { 77,33,64,102,13,141,23,2,40,1,51,10,0,115,6,180,202,128,4,3,177,269,15,7,22,165,291,14,217,318,137,11 }, - { 397,81,4,32,65,788,693,804,681,11,249,21,91,64,690,494,3,0,422,56,348,725,194,123,23,59,523,319,61,510,95,90 }, - { 60,126,16,7,92,121,314,246,35,107,150,132,14,146,24,18,199,298,232,71,359,140,672,97,392,649,5,423,95,21,22,388 }, - { 15,515,141,217,115,700,13,23,120,317,753,180,33,260,110,137,341,51,1,365,4,77,64,202,0,40,36,352,197,269,10,21 }, - { 111,134,117,474,23,13,961,12,569,431,37,15,51,115,515,700,277,99,753,38,197,405,457,4,72,94,629,45,11,89,54,148 }, - { 23,13,51,5,1,15,2,21,12,202,141,0,515,165,120,32,4,64,700,3,115,197,269,125,753,7,9,128,6,180,453,403 }, - { 13,141,4,23,5,2,115,217,202,51,180,137,269,352,77,1,317,3,21,318,0,15,9,64,10,197,11,341,33,515,752,7 }, - { 165,125,197,13,391,21,23,558,48,380,97,120,298,33,14,426,66,115,32,386,900,180,6,98,357,237,326,509,51,278,221,457 }, - { 120,82,15,260,515,1,351,77,450,700,13,21,141,23,753,202,217,93,110,33,51,854,5,128,326,102,137,180,817,48,269,352 }, - { 23,13,15,51,515,700,961,753,0,457,1,2,4,115,10,453,569,5,33,165,11,719,14,40,64,197,3,21,474,629,38,401 }, - { 264,166,39,30,9,100,435,254,93,921,190,363,1,625,411,382,897,656,203,478,404,812,438,110,473,88,18,691,156,141,274,272 }, - { 9,0,252,100,166,39,101,265,364,68,88,329,520,18,419,676,118,167,404,604,16,1,21,30,212,158,553,49,382,274,48,13 }, - { 15,515,700,753,4,11,141,5,3,13,202,1,180,21,2,165,269,23,40,64,0,318,12,32,128,51,77,117,523,197,120,457 }, - { 24,1,2,69,35,16,67,18,14,50,0,46,68,9,38,7,133,71,83,149,28,108,189,218,65,114,238,29,75,54,5,96 }, - { 90,289,214,64,874,13,77,712,66,751,4,23,51,192,32,0,202,194,312,177,33,65,234,104,875,288,59,5,835,416,102,95 }, - { 0,9,49,127,98,31,301,28,371,159,1,395,512,737,158,761,916,623,16,44,242,39,170,18,293,105,24,272,101,22,23,385 }, - { 17,62,136,214,123,129,32,292,119,209,710,106,141,162,128,64,45,4,77,249,11,618,211,3,207,130,519,183,38,177,21,269 }, - { 5,107,581,356,279,32,441,362,493,660,13,298,0,534,49,147,21,22,132,121,97,423,7,590,259,683,14,786,126,508,60,246 }, - { 51,13,15,730,453,23,515,719,386,457,12,700,403,475,899,1,6,523,753,421,99,401,165,33,2,19,361,5,0,670,120,27 }, - { 49,28,9,159,272,22,254,131,158,327,95,105,0,39,35,168,347,286,374,55,65,627,424,912,68,578,1,24,239,175,688,169 }, - { 15,515,700,33,753,4,77,141,341,317,1,10,13,180,102,22,40,117,115,365,5,901,23,197,134,11,217,351,64,82,21,137 }, - { 134,15,13,515,23,700,12,753,51,474,37,961,197,10,457,569,4,0,99,2,115,38,165,153,94,3,139,11,1,82,33,5 }, - { 7,2,20,58,5,14,128,66,6,29,32,43,21,52,16,38,631,61,74,97,46,135,113,25,202,192,13,0,884,45,112,87 }, - { 77,13,33,202,23,128,102,4,141,342,117,0,269,318,134,22,11,21,32,153,403,291,49,64,137,51,40,15,494,5,196,98 }, - { 2,1,14,6,46,38,29,65,5,36,67,0,103,7,22,86,133,50,108,208,52,83,24,323,283,69,28,18,10,25,23,75 }, - { 15,515,700,753,1,5,4,2,3,13,0,11,180,341,12,33,10,197,134,365,77,23,21,901,6,117,165,7,37,32,17,102 }, - { 203,268,206,93,417,940,31,8,120,137,44,499,959,473,202,692,728,559,0,260,10,326,141,564,817,127,341,1,450,22,110,23 }, - { 15,82,515,120,700,0,10,753,33,8,64,165,110,31,260,93,13,197,23,22,40,4,351,44,77,9,11,153,102,51,1,196 }, - { 60,0,16,7,14,43,20,71,28,10,2,22,154,18,13,24,92,1,51,576,35,615,805,925,68,126,124,149,97,64,23,55 }, - { 19,6,26,80,5,84,27,17,25,2,504,129,45,240,56,123,4,119,618,1,76,106,64,51,14,3,128,65,32,710,0,42 }, - { 15,515,700,753,13,4,77,23,33,51,0,5,8,10,11,31,44,1,82,22,202,64,110,102,93,21,291,40,141,180,9,49 }, - { 195,98,271,223,132,167,146,407,1,360,121,834,393,591,212,199,293,259,522,107,354,147,156,191,807,590,48,18,125,16,765,541 }, - { 128,202,77,210,402,318,33,102,6,40,403,29,342,269,196,757,99,139,2,111,42,4,494,117,275,300,13,12,678,0,177,122 }, - { 13,33,23,40,51,102,4,117,77,64,134,0,128,153,202,196,453,11,15,12,1,22,403,141,59,14,10,475,515,65,700,95 }, - { 7,16,14,24,18,2,28,0,92,71,1,22,6,35,60,20,168,10,154,118,5,302,124,69,97,109,703,158,420,12,149,66 }, - { 15,1,515,23,0,13,700,2,51,753,180,5,120,165,197,21,115,4,33,9,141,7,12,6,3,457,386,202,260,523,8,31 }, - { 60,107,121,132,146,126,199,279,150,92,16,649,441,35,955,7,21,0,423,5,18,195,598,298,493,356,32,653,22,362,953,10 }, - { 31,44,98,276,284,299,116,935,9,201,0,131,39,127,144,662,1,137,371,492,567,489,93,254,49,268,22,28,30,293,434,737 }, - { 13,15,23,515,700,0,1,51,753,4,2,10,77,202,5,115,3,165,197,457,9,12,11,961,33,120,22,141,180,7,6,40 }, - { 123,162,184,257,17,183,229,130,129,3,84,136,99,152,556,383,57,497,12,205,4,62,56,452,80,266,128,14,40,119,27,106 }, - { 196,33,117,40,153,23,134,13,51,102,453,0,15,475,12,14,515,2,22,700,4,21,753,64,401,670,730,1,9,11,10,99 }, - { 224,219,187,131,258,385,442,871,836,31,98,908,44,574,127,944,137,839,116,36,613,1,254,39,926,160,829,96,93,371,860,827 }, - { 121,195,156,132,146,360,590,407,786,522,883,591,259,929,626,941,150,687,5,55,296,379,467,178,586,465,279,21,1,13,60,354 }, - { 2,1,14,29,6,5,46,52,38,19,114,75,26,65,108,96,25,50,36,70,103,309,17,236,218,74,12,86,0,3,10,112 }, - { 15,515,82,700,120,753,10,0,8,197,260,165,351,64,13,110,117,93,31,1,9,33,22,23,457,44,450,77,102,898,40,49 }, - { 7,66,97,2,172,74,226,52,29,135,192,232,43,324,92,5,38,20,222,14,6,568,87,107,353,620,580,16,138,174,448,32 }, - { 62,129,123,162,136,249,618,183,507,57,4,152,17,59,11,184,117,77,3,128,211,41,130,205,12,40,33,106,64,229,38,313 }, - { 1,13,15,2,4,515,23,0,3,115,700,5,51,77,341,141,753,180,33,217,197,202,901,6,21,165,11,365,318,317,10,102 }, - { 6,26,235,19,145,47,112,78,64,27,453,95,29,444,25,624,85,108,648,70,32,130,74,42,711,630,632,138,65,122,113,730 }, - { 23,51,12,15,13,99,515,153,117,10,700,37,120,82,165,2,753,64,128,0,403,3,5,1,134,197,453,31,202,457,110,21 }, - { 16,24,18,71,64,35,92,7,246,146,9,108,60,118,199,5,140,2,267,0,230,830,32,133,1,68,50,330,247,563,36,12 }, - { 15,515,700,753,0,1,13,2,23,3,4,217,51,5,115,8,9,180,341,10,7,6,317,77,33,372,901,197,365,11,120,165 }, - { 234,639,178,202,77,142,5,455,450,49,416,0,147,427,198,21,315,329,13,318,325,557,120,344,113,259,22,128,61,105,23,494 }, - { 1,31,36,44,141,180,55,2,64,22,98,116,13,352,0,115,10,127,5,164,253,498,237,165,341,197,4,86,15,170,125,23 }, - { 15,120,13,141,23,260,217,515,1,77,51,110,180,700,317,82,269,137,115,202,21,753,64,5,351,291,0,450,352,93,36,326 }, - { 26,6,112,396,19,145,25,122,648,287,42,74,624,222,416,45,138,66,644,151,113,651,29,573,64,280,445,27,525,85,70,58 }, - { 156,360,5,146,121,21,271,522,354,132,49,13,18,195,16,340,60,591,446,586,727,0,107,407,167,48,1,463,199,566,32,23 }, - { 5,61,49,147,178,612,660,120,21,182,23,427,259,683,33,4,77,70,13,3,376,98,64,0,481,344,48,595,291,263,141,51 }, - { 89,79,468,179,358,205,94,405,115,498,72,180,365,431,37,111,341,734,188,317,482,217,11,4,245,152,413,216,12,474,490,752 }, - { 24,16,35,68,18,71,7,92,0,108,9,14,118,101,336,175,375,302,28,124,154,55,149,60,398,1,65,2,140,273,345,230 }, - { 51,730,421,801,453,386,23,523,13,475,719,401,670,365,899,403,115,457,758,165,33,494,450,6,423,805,629,56,569,514,958,388 }, - { 113,45,6,311,29,2,151,614,145,491,112,80,5,27,61,74,315,66,209,631,19,25,58,17,73,26,1,243,70,64,611,287 }, - { 4,339,188,471,11,59,79,12,377,94,99,33,77,102,51,111,37,152,13,961,474,542,40,342,3,23,128,403,202,177,184,57 }, - { 15,4,515,11,700,33,82,40,0,120,753,10,8,110,13,93,23,165,77,260,64,31,22,51,44,102,351,1,125,9,197,21 }, - { 16,24,18,0,35,68,28,71,124,118,60,7,9,55,14,92,109,101,419,175,22,252,154,375,149,302,158,346,2,49,1,126 }, - { 17,45,227,21,106,3,2,243,209,5,48,32,221,62,207,50,29,186,290,270,263,52,14,496,400,119,46,255,54,430,38,721 }, - { 340,354,586,658,156,195,698,668,1,296,9,18,883,363,447,379,303,98,411,13,31,163,51,5,371,48,919,846,121,21,360,70 }, - { 277,153,111,12,23,51,474,99,38,37,139,117,41,457,79,453,542,13,11,33,134,157,629,188,961,14,196,401,102,569,15,94 }, - { 0,18,16,159,49,24,9,105,35,68,7,28,22,1,60,344,55,101,109,2,14,158,13,23,71,118,455,286,272,424,5,327 }, - { 0,105,9,49,16,18,158,28,518,24,101,320,1,68,170,301,272,127,7,286,35,890,109,39,159,98,21,344,31,55,371,23 }, - { 141,1,180,15,13,2,365,217,515,352,317,115,341,0,4,5,269,700,23,21,3,752,197,77,753,51,31,901,10,202,8,64 }, - { 4,23,51,33,19,17,102,153,485,880,40,403,196,26,300,453,27,117,78,0,12,200,47,5,11,14,342,99,53,77,475,2 }, - { 62,184,56,440,130,229,183,3,556,152,99,162,12,266,17,548,136,57,305,161,123,14,452,4,383,403,257,34,40,84,33,139 }, - { 13,23,77,141,64,202,33,51,269,115,0,102,21,4,217,128,5,32,318,137,291,9,15,2,180,10,3,317,177,515,7,6 }, - { 1,22,36,105,170,0,86,2,31,28,239,64,55,5,10,98,9,44,127,95,654,67,301,143,13,12,49,23,320,141,83,21 }, - { 15,515,700,753,0,1,13,2,23,901,5,8,51,82,9,180,457,4,7,12,3,6,10,120,341,141,22,898,197,351,115,260 }, - { 1,39,274,98,100,265,190,30,438,310,166,223,88,96,909,31,264,625,530,9,382,812,21,252,593,0,254,539,44,131,23,778 }, - { 18,212,167,118,363,1,447,411,146,60,271,16,781,121,647,9,621,562,21,478,664,68,815,5,354,98,48,101,24,446,777,463 }, - { 24,28,22,0,7,1,2,16,14,65,35,49,158,95,109,159,55,105,10,18,124,9,67,5,239,149,12,289,108,68,21,424 }, - { 105,22,131,272,286,98,55,239,1,31,320,9,127,327,36,185,28,374,86,219,0,64,187,44,578,164,224,913,535,115,601,13 }, - { 22,31,28,301,127,98,44,0,105,1,512,395,9,293,109,299,95,338,239,125,242,116,36,320,55,841,900,685,599,23,13,763 }, - { 2,1,58,29,5,14,52,46,186,334,45,155,151,50,400,75,38,69,502,61,48,227,223,7,163,17,262,67,549,21,70,113 }, - { 7,107,135,232,97,14,2,92,66,16,172,192,278,387,298,356,38,35,448,52,46,43,60,29,20,126,324,526,357,359,64,5 }, - { 20,43,104,426,173,7,560,414,707,784,319,81,0,861,422,819,38,74,715,52,376,97,879,32,330,22,49,64,66,95,192,526 }, - { 104,74,636,66,204,0,355,81,222,25,29,319,145,784,20,65,90,4,174,194,7,64,6,746,138,173,750,715,91,43,192,32 }, - { 0,9,101,35,68,39,65,28,252,124,67,154,364,336,100,166,30,1,289,55,149,346,16,114,158,88,439,24,429,22,570,194 }, - { 57,14,4,231,236,585,176,59,369,23,361,13,719,51,300,342,12,457,56,3,62,38,202,401,34,46,2,322,11,215,210,507 }, - { 1,2,15,3,141,0,515,5,33,700,13,64,77,180,6,128,753,10,4,269,102,202,11,7,134,197,352,120,117,318,12,291 }, - { 5,1,21,202,13,32,48,23,0,61,259,22,494,120,70,49,51,18,137,128,465,12,178,115,2,453,403,141,58,3,90,450 }, - { 141,205,4,72,59,79,245,11,352,94,152,76,247,216,21,188,452,217,497,12,89,37,111,339,588,77,64,875,864,115,358,464 }, - { 15,515,700,753,0,1,2,13,5,4,23,3,8,341,365,51,115,10,120,457,6,141,77,197,31,7,165,9,202,450,961,260 }, - { 5,2,50,14,58,38,171,46,29,1,45,186,17,52,155,218,48,281,61,487,54,36,67,21,328,334,151,227,760,114,400,133 }, - { 457,120,70,125,318,64,23,48,795,291,202,761,751,415,77,846,269,758,21,237,96,260,391,165,87,1,128,5,221,13,137,763 }, - { 13,23,51,33,4,40,117,102,453,64,153,196,0,77,15,11,12,475,1,65,134,10,515,22,21,14,700,59,403,141,2,753 }, - { 229,152,57,266,452,381,432,12,313,184,99,471,17,4,62,339,157,3,129,59,128,11,369,37,77,38,40,123,5,497,188,257 }, - { 49,28,109,22,159,9,272,95,105,131,55,35,254,168,39,327,169,0,1,286,175,374,347,158,420,67,36,194,312,424,627,346 }, - { 5,2,61,29,45,58,80,311,1,17,209,227,52,243,106,869,454,151,592,496,48,334,14,155,6,186,46,171,75,21,255,667 }, - { 244,44,110,141,260,30,269,352,839,131,574,228,373,276,1,406,219,717,217,137,253,224,120,93,36,31,567,116,661,187,341,88 }, - { 12,99,79,139,11,453,196,51,277,474,111,23,542,37,94,188,33,13,401,775,40,961,313,102,4,339,153,485,629,134,300,431 }, - { 16,35,9,0,68,24,149,69,67,18,1,114,65,230,71,7,103,133,50,167,212,118,101,191,140,64,399,28,124,283,55,565 }, - { 88,30,274,435,131,613,190,100,93,829,166,1,187,795,530,127,382,957,960,160,31,137,466,264,39,800,406,254,28,473,521,219 }, - { 167,16,18,118,212,24,60,71,101,68,191,9,375,411,363,35,0,1,589,199,302,21,447,55,146,126,92,271,647,121,562,48 }, - { 64,141,86,177,77,128,147,597,304,95,269,102,275,4,352,49,120,5,372,194,465,13,588,237,947,216,202,180,612,751,107,534 }, - { 18,65,90,403,523,289,240,214,194,102,701,475,202,217,283,862,389,51,33,0,494,421,453,817,84,64,847,899,352,13,23,437 }, - { 13,51,23,202,5,12,21,128,15,115,0,1,141,120,64,32,4,2,515,403,165,457,3,10,700,99,453,318,719,450,308,401 }, - { 98,223,393,31,1,271,834,791,167,44,202,64,93,697,5,116,77,125,450,446,212,18,541,293,51,120,195,132,284,13,807,765 }, - { 15,515,700,753,4,11,23,13,40,51,82,165,0,110,93,33,141,64,120,5,10,77,3,102,180,32,202,125,8,197,31,21 }, - { 15,515,700,753,0,1,13,2,901,23,5,341,3,51,82,8,4,180,961,9,115,10,12,6,898,7,351,141,134,22,31,120 }, - { 234,416,77,5,315,639,325,202,147,198,113,49,450,61,455,142,0,21,22,342,329,494,178,58,102,427,318,230,13,120,43,470 }, - { 60,146,16,18,156,126,121,271,199,360,132,24,167,0,640,10,71,522,21,92,5,340,107,354,118,150,22,195,446,35,28,212 }, - { 4,361,11,14,56,368,377,161,27,12,300,77,59,200,17,554,202,33,40,494,495,21,210,80,757,25,128,23,19,38,444,53 }, - { 141,82,217,351,15,352,120,1,180,260,515,64,854,36,700,317,752,372,13,269,77,753,922,21,349,23,202,110,93,137,51,373 }, - { 15,515,700,753,77,13,0,1,23,33,102,2,51,4,3,5,291,217,10,9,450,120,341,7,317,6,11,117,115,8,260,180 }, - { 15,515,120,13,700,23,77,141,1,260,0,753,180,51,137,202,115,365,110,291,217,5,128,9,21,341,197,269,2,450,317,165 }, - { 174,6,348,85,138,74,280,204,66,233,192,355,289,65,81,580,636,353,25,91,104,343,673,214,64,95,42,712,792,32,194,90 }, - { 152,497,452,59,4,216,11,79,94,77,128,188,269,339,588,33,76,529,318,32,141,471,12,202,111,21,5,51,37,90,72,177 }, - { 417,499,10,141,253,244,110,559,8,564,180,260,728,120,352,638,642,341,951,206,143,752,901,93,137,661,922,373,44,31,811,197 }, - { 13,77,23,33,4,51,0,102,128,59,141,40,64,115,177,10,137,22,202,2,7,11,90,1,117,180,269,14,49,6,134,3 }, - { 1,2,22,0,36,5,67,50,14,28,12,86,38,46,83,168,194,65,103,114,49,7,10,95,21,69,23,24,128,51,55,13 }, - { 17,106,119,207,255,306,742,378,84,62,136,45,3,5,240,80,61,56,209,383,311,790,655,32,2,440,76,151,58,29,179,263 }, - { 3,128,1,141,2,202,33,5,64,15,0,515,102,13,269,10,700,180,134,51,120,6,77,318,23,137,17,117,753,197,82,153 }, - { 514,38,377,328,11,57,41,248,880,266,556,4,152,361,471,757,485,403,305,102,3,211,313,99,457,130,12,14,157,40,23,54 }, - { 68,0,167,101,9,118,264,520,16,18,21,478,562,1,124,212,100,936,664,777,191,88,806,154,48,24,759,604,35,252,265,65 }, - { 230,689,699,213,466,352,217,831,30,443,418,144,854,201,840,855,1,251,203,317,530,957,96,93,822,539,36,752,351,137,83,800 }, - { 33,77,102,117,15,82,13,134,23,64,0,515,120,153,51,4,40,128,700,260,202,141,196,22,753,11,351,10,1,326,95,269 }, - { 11,40,33,51,117,13,542,328,14,134,38,153,23,12,485,231,102,54,775,37,3,377,111,139,211,4,457,403,369,475,99,719 }, - { 33,64,77,128,141,2,1,202,102,13,23,117,0,15,3,153,51,134,10,40,6,5,515,269,137,180,318,165,700,7,196,753 }, - { 15,515,700,753,4,1,5,11,13,21,33,180,93,141,64,2,23,77,82,3,0,102,32,40,352,341,10,197,98,110,117,901 }, - { 1,2,14,67,50,46,38,24,103,83,0,5,36,28,29,133,114,96,65,52,18,75,54,108,22,7,238,58,160,9,361,69 }, - { 258,201,276,137,160,860,116,261,295,843,567,144,131,44,187,268,943,219,284,31,202,935,141,98,662,203,127,96,36,93,224,1 }, - { 7,2,14,16,46,87,75,52,92,278,29,38,140,70,1,5,35,294,24,262,135,69,171,172,58,409,112,60,50,66,97,12 }, - { 13,23,0,2,51,1,33,4,115,10,15,141,77,3,5,180,217,515,9,7,64,11,700,6,102,40,197,22,317,753,165,202 }, - { 74,145,6,66,25,204,42,29,222,337,138,26,7,525,192,174,746,287,544,135,415,2,609,632,112,64,87,0,85,45,712,396 }, - { 77,33,102,15,217,13,23,141,202,515,51,700,291,4,269,753,317,180,21,64,318,115,128,0,275,2,352,196,3,5,137,11 }, - { 187,219,258,871,44,442,160,574,137,224,908,116,839,131,36,926,276,201,93,228,202,860,31,613,144,531,406,1,902,30,190,318 }, - { 1,372,141,5,21,77,225,744,96,30,23,349,13,291,269,284,69,442,459,144,303,839,217,622,160,330,260,48,120,410,189,352 }, - { 66,222,2,74,29,87,135,6,7,145,52,25,294,337,226,172,138,331,42,70,97,112,26,1,632,192,43,5,415,609,461,353 }, - { 45,17,106,209,5,2,21,29,48,207,3,186,243,155,255,263,454,119,400,496,270,14,290,62,425,1,171,32,659,52,38,56 }, - { 93,88,141,120,30,213,260,373,100,717,459,82,110,1,166,450,180,321,217,372,36,269,131,225,22,352,326,466,473,187,244,410 }, - { 266,57,152,381,313,471,12,229,99,369,339,62,157,3,4,37,77,38,188,17,11,162,40,184,129,59,475,775,128,452,403,453 }, - { 217,352,317,141,752,15,180,515,372,365,700,341,753,349,77,21,291,1,115,244,64,120,13,98,269,82,5,498,864,351,23,144 }, - { 14,514,369,102,403,377,51,719,880,153,23,13,457,11,485,4,401,12,328,453,33,40,117,57,629,38,730,236,134,670,361,961 }, - { 107,7,172,14,92,135,2,359,60,314,46,16,126,278,232,150,279,32,38,392,298,5,35,97,24,192,259,288,330,52,356,312 }, - { 0,4,25,13,59,90,65,23,26,19,18,12,5,216,91,51,389,33,77,11,22,85,27,81,21,177,746,45,42,194,37,123 }, - { 5,49,315,202,416,77,455,639,450,21,197,137,350,13,408,0,329,318,494,344,61,402,64,509,347,120,113,48,95,713,308,401 }, - { 130,47,381,390,59,90,200,214,289,6,65,472,29,64,874,648,50,751,624,26,52,32,4,194,875,714,85,249,247,33,881,19 }, - { 51,23,453,13,719,12,457,165,37,730,99,4,386,197,401,17,11,2,3,15,5,961,475,6,515,64,54,700,32,115,0,403 }, - { 15,515,1,13,700,2,23,0,753,5,3,180,51,4,165,12,141,21,197,457,7,115,6,9,352,10,120,202,8,341,11,77 }, - { 0,9,1,67,35,28,68,16,24,65,18,69,50,114,103,12,22,13,5,101,2,96,23,83,149,21,39,55,7,175,433,124 }, - { 28,105,22,0,1,320,170,9,49,301,109,95,127,31,98,55,65,35,2,24,168,159,36,713,16,740,13,338,21,44,512,23 }, - { 13,77,4,51,23,33,102,202,128,59,40,0,64,141,117,403,115,11,15,318,153,269,22,515,475,134,10,494,177,1,90,210 }, - { 13,23,0,51,77,33,2,141,4,10,1,64,115,102,3,6,22,15,217,11,180,7,40,515,165,202,177,9,269,128,700,5 }, - { 456,116,492,8,949,268,867,391,203,51,499,13,719,386,31,791,457,918,125,10,23,93,479,685,417,0,22,338,506,551,870,730 }, - { 17,237,45,180,106,62,32,64,115,41,136,498,255,21,197,129,241,13,3,227,23,352,165,752,350,365,449,155,4,546,476,38 }, - { 1,15,180,515,0,2,341,700,901,352,4,141,13,3,752,5,753,217,317,115,365,23,197,21,51,165,31,6,269,202,77,7 }, - { 205,141,216,269,497,4,588,76,59,152,128,452,79,77,875,11,72,94,188,217,352,12,247,37,90,64,32,1,474,23,947,372 }, - { 64,247,217,237,317,180,752,115,349,141,498,13,437,304,23,372,352,164,579,291,33,864,177,197,0,490,72,10,482,77,269,51 }, - { 2,1,0,13,15,141,3,77,5,515,64,33,23,180,6,700,4,117,217,7,10,11,102,165,753,197,115,134,40,352,12,269 }, - { 11,40,38,328,33,542,12,313,41,339,23,157,377,117,369,51,471,99,775,485,13,305,457,57,14,475,37,248,4,54,188,719 }, - { 33,77,102,40,13,23,0,51,4,128,64,202,117,141,22,196,153,10,134,15,59,269,1,137,65,11,403,318,453,86,515,177 }, - { 472,80,34,250,495,161,17,14,469,176,128,4,389,106,283,436,216,527,3,297,483,177,53,56,231,194,119,84,719,57,255,59 }, - { 317,352,180,141,217,752,115,341,365,244,1,269,202,901,253,15,21,498,372,4,137,515,13,2,700,318,5,197,23,143,753,349 }, - { 9,39,101,18,265,100,333,520,252,16,0,329,593,1,553,364,68,167,310,30,121,254,118,158,363,166,60,604,272,24,286,404 }, - { 15,515,1,180,700,901,0,2,753,341,752,4,3,13,115,365,317,5,23,197,141,217,165,352,6,22,36,9,137,51,7,10 }, - { 131,39,9,829,166,613,578,827,1,30,716,254,100,98,31,224,0,406,228,310,616,219,44,846,127,190,938,96,265,371,856,438 }, - { 17,64,62,106,141,751,136,292,32,129,352,41,38,476,86,128,214,237,5,177,123,209,217,45,269,954,162,710,180,3,90,4 }, - { 25,42,235,65,650,736,605,6,630,85,123,343,233,256,26,122,63,389,141,249,416,444,368,194,19,108,138,174,90,0,544,511 }, - { 184,229,152,57,266,432,497,452,17,381,619,257,313,12,4,205,59,3,99,471,157,128,5,129,339,369,77,11,32,45,202,2 }, - { 137,202,160,860,141,30,93,567,36,276,295,261,131,39,9,964,201,843,1,98,800,318,116,22,943,187,10,219,206,44,269,535 }, - { 0,493,125,64,49,9,279,10,35,18,93,55,293,31,14,13,194,165,325,48,22,132,21,107,98,389,44,581,342,259,174,137 }, - { 15,515,700,753,4,33,13,77,23,5,51,32,102,40,93,11,349,141,21,8,82,202,64,31,110,10,117,0,1,44,3,318 }, - { 110,253,854,811,352,141,244,951,180,642,661,384,498,143,752,317,911,10,269,206,559,351,261,120,902,533,922,959,365,160,332,217 }, - { 2,29,70,1,75,52,6,220,26,112,145,331,74,163,19,69,38,324,46,58,14,5,25,21,278,223,50,307,66,7,67,409 }, - { 13,23,77,33,51,4,64,141,115,102,0,2,128,177,40,11,202,10,6,180,7,15,269,1,32,217,59,22,291,3,137,515 }, - { 340,897,691,478,658,264,914,382,100,812,363,1,724,156,166,698,88,521,39,404,682,447,296,96,303,411,30,909,9,274,656,772 }, - { 9,18,310,101,265,159,326,120,105,158,33,363,77,195,51,55,13,39,354,132,23,7,28,639,16,137,98,1,252,272,709,49 }, - { 57,313,471,12,99,369,157,339,266,152,38,37,475,453,328,775,11,40,59,188,77,514,401,403,342,4,139,33,377,51,229,14 }, - { 16,7,24,14,35,140,60,92,18,69,71,2,189,1,46,230,108,388,150,38,21,172,278,67,246,267,50,309,236,135,451,0 }, - { 206,417,93,940,959,473,499,203,8,137,559,728,31,202,44,120,450,141,10,260,116,564,22,326,269,318,268,244,0,1,253,638 }, - { 15,515,700,753,1,0,13,2,23,4,3,51,5,217,7,77,341,115,8,9,10,33,6,180,317,349,291,120,11,165,457,901 }, - { 1,2,5,14,48,21,290,32,50,45,38,46,263,207,155,72,76,29,17,408,425,171,89,52,7,0,292,449,3,227,513,428 }, - { 121,132,354,167,271,223,146,98,18,463,1,668,446,195,407,60,212,447,781,48,360,363,411,522,156,393,807,9,21,16,293,13 }, - { 131,578,105,371,219,224,716,616,187,49,9,254,737,159,385,98,258,127,272,761,0,916,623,910,28,286,39,31,22,518,924,242 }, - { 302,467,97,6,273,1,24,484,124,51,36,18,2,398,453,421,523,69,7,23,13,403,386,150,66,0,298,65,426,165,22,158 }, - { 30,190,530,88,1,100,778,539,625,274,382,410,96,731,960,39,795,321,9,131,264,144,840,748,44,166,669,957,36,31,435,228 }, - { 141,1,2,128,64,33,15,202,3,0,180,5,13,77,515,134,269,102,197,700,10,137,318,6,120,165,753,352,4,82,23,117 }, - { 44,201,567,116,131,224,295,662,489,268,219,31,434,144,187,276,110,384,93,261,699,137,36,442,120,1,613,30,228,64,141,244 }, - { 12,15,51,23,515,37,99,13,700,0,10,117,753,38,165,82,134,120,11,453,197,64,115,569,1,629,401,22,457,474,110,153 }, - { 7,135,2,92,172,14,66,140,38,52,97,46,29,74,16,324,278,226,6,87,1,571,262,5,357,232,35,380,69,314,24,330 }, - { 125,386,23,963,949,60,51,391,165,221,13,197,118,21,719,193,541,421,517,150,393,7,401,453,308,5,791,551,326,558,48,173 }, - { 6,85,42,25,138,222,174,235,280,256,525,289,26,214,64,746,90,32,544,65,204,19,66,337,355,95,348,415,74,29,5,312 }, - { 1,14,5,50,2,67,24,0,46,69,48,21,58,103,16,12,18,38,54,96,83,7,502,45,36,181,35,9,430,28,10,155 }, - { 811,351,642,180,951,752,110,638,253,10,82,352,197,341,365,564,499,854,873,55,9,417,282,901,244,22,559,143,206,141,28,898 }, - { 23,13,51,15,12,453,403,165,4,515,115,719,475,457,700,523,2,21,0,99,202,197,14,5,386,753,128,401,37,308,33,117 }, - { 120,13,23,77,141,1,15,93,217,82,260,51,137,202,110,515,21,180,165,5,128,102,64,351,291,700,269,352,326,203,177,0 }, - { 1,5,0,22,12,2,36,21,10,23,86,13,28,51,9,128,48,14,32,50,7,3,96,137,54,4,202,49,37,65,208,323 }, - { 219,98,23,127,301,51,258,308,170,910,13,165,22,105,293,616,125,242,276,401,201,395,964,115,55,284,31,374,327,206,512,900 }, - { 64,180,80,165,5,237,2,250,34,58,297,61,197,17,22,29,186,498,231,445,247,3,752,311,95,32,483,153,27,45,115,469 }, - { 13,77,23,33,0,2,1,64,141,51,102,10,15,3,115,40,180,6,515,128,7,22,269,202,4,217,700,5,177,117,14,165 }, - { 15,120,51,515,13,450,23,700,202,153,196,753,260,64,128,141,730,4,326,386,21,523,33,318,5,457,95,32,403,1,77,269 }, - { 2,1,5,29,32,45,207,263,14,425,58,72,76,21,7,408,48,46,52,186,17,292,38,6,61,89,476,50,155,720,119,3 }, - { 15,515,700,753,4,13,11,5,1,23,33,21,3,141,32,2,40,180,117,64,269,202,102,197,0,165,120,51,341,352,153,12 }, - { 76,5,214,129,2,123,45,710,17,249,618,460,179,32,1,257,205,519,90,207,245,184,162,61,769,209,292,106,6,29,14,128 }, - { 1,15,23,13,120,141,51,515,202,21,700,165,0,180,137,2,5,77,128,93,753,260,269,197,326,33,110,352,82,102,318,48 }, - { 7,2,135,14,29,87,66,52,97,172,70,112,5,58,46,337,92,16,20,43,1,38,232,155,74,294,6,461,409,151,262,32 }, - { 574,187,384,926,860,110,258,434,269,531,141,244,160,261,253,116,699,959,940,717,533,36,219,31,902,661,871,295,201,352,10,260 }, - { 156,354,296,1,182,586,64,379,340,937,850,698,31,48,98,44,120,18,163,23,30,658,195,125,77,284,223,291,774,481,96,39 }, - { 250,80,34,472,17,495,176,469,33,194,64,483,4,297,141,14,161,27,53,667,56,833,73,527,585,231,106,51,84,814,2,59 }, - { 97,7,81,140,66,92,172,192,24,298,43,6,74,69,314,426,462,14,501,16,21,508,60,189,267,232,230,104,48,20,135,330 }, - { 31,44,116,144,268,393,492,434,367,489,127,98,918,0,384,9,22,206,948,105,93,203,1,456,332,940,299,28,137,49,293,125 }, - { 15,128,33,3,13,51,141,1,202,64,23,2,515,120,102,0,5,82,10,700,165,197,269,153,403,110,753,137,196,318,117,12 }, - { 31,98,127,9,0,105,22,28,44,512,293,395,299,1,242,49,685,763,320,599,125,116,109,276,284,95,870,159,23,456,36,900 }, - { 7,24,124,1,6,97,2,69,14,18,23,92,21,67,66,16,5,484,43,20,118,65,36,22,28,0,51,140,13,71,29,150 }, - { 1,64,442,303,284,349,202,141,622,67,154,447,260,44,652,429,9,335,237,919,197,98,167,33,682,269,547,77,863,411,340,201 }, - { 1,15,2,141,515,0,700,13,3,180,10,753,5,64,77,33,4,6,7,197,102,269,165,23,134,11,352,341,291,349,22,120 }, - { 99,139,12,453,196,277,775,40,475,33,23,401,215,51,11,14,77,111,313,130,38,211,37,266,129,15,339,153,719,3,369,515 }, - { 33,77,102,4,23,128,13,141,202,64,51,0,40,59,269,115,117,137,153,1,318,11,10,177,15,134,22,90,196,2,403,32 }, - { 7,2,14,58,70,112,16,5,87,38,46,52,6,128,135,1,32,21,155,29,66,64,0,97,92,186,172,294,13,23,20,37 }, - { 15,13,515,1,700,2,23,0,753,5,3,4,51,10,341,115,365,180,11,33,317,77,6,7,217,12,197,165,117,9,64,102 }, - { 2,1,14,29,75,69,67,6,52,46,38,24,103,220,83,25,70,87,262,74,96,267,50,366,26,16,226,394,357,66,108,19 }, - { 9,105,18,39,1,0,16,557,101,272,252,890,326,49,265,21,137,100,23,938,13,310,159,5,31,24,254,51,30,128,202,132 }, - { 80,209,45,61,667,17,6,106,5,2,151,29,483,255,454,833,27,311,112,19,738,378,1,58,113,26,25,469,119,887,32,64 }, - { 13,23,51,15,5,1,515,0,21,2,12,141,700,165,202,115,753,32,180,4,3,197,10,120,457,9,269,128,64,341,7,33 }, - { 99,12,453,277,139,157,369,474,339,51,38,23,37,196,188,401,775,111,11,313,328,475,153,266,4,471,79,40,33,629,102,14 }, - { 7,92,16,232,97,140,126,14,60,107,66,35,298,387,314,104,246,462,441,150,0,38,24,2,172,357,230,330,5,633,22,289 }, - { 13,77,23,202,318,141,33,4,51,269,102,177,115,403,137,2,40,494,90,11,342,128,31,117,21,32,7,12,64,134,14,10 }, - { 13,2,0,23,141,1,77,3,180,33,6,64,15,10,115,51,4,5,217,197,7,165,515,102,22,11,700,269,40,352,177,14 }, - { 15,515,700,753,4,11,1,93,13,5,180,110,82,21,120,23,2,33,10,141,3,165,197,102,901,0,32,341,117,40,153,12 }, - { 15,515,700,753,1,13,0,2,23,4,77,51,3,5,341,291,7,33,6,115,10,9,8,217,11,177,120,180,102,165,197,365 }, - { 20,43,198,325,173,904,104,234,66,147,77,319,416,422,97,426,5,0,7,450,861,202,712,725,2,32,639,376,38,324,945,315 }, - { 105,0,9,28,49,301,170,1,127,159,22,16,31,98,512,623,24,109,158,395,35,68,371,65,713,55,2,242,293,21,44,18 }, - { 213,88,689,466,230,30,321,435,699,352,217,201,795,831,144,854,1,443,96,539,530,840,418,251,855,190,93,100,669,31,957,662 }, - { 130,453,47,196,4,57,14,59,236,711,51,153,730,77,412,381,23,202,108,128,361,13,283,117,11,719,200,46,34,78,210,2 }, - { 1,2,5,14,0,50,36,22,38,46,65,67,12,86,114,28,103,29,208,7,10,128,21,83,218,23,96,54,194,6,133,51 }, - { 6,26,74,19,165,453,14,730,1,125,197,50,29,51,138,357,13,2,108,391,70,719,46,457,47,500,386,262,112,23,235,52 }, - { 9,10,376,20,43,0,49,18,30,120,2,33,325,104,501,470,77,788,725,102,523,39,858,5,904,414,174,55,137,37,342,13 }, - { 15,515,700,753,0,1,13,23,51,77,120,202,341,82,5,4,9,260,2,137,141,128,115,351,901,8,180,10,197,21,450,33 }, - { 105,131,272,578,9,49,371,219,159,616,286,320,224,187,716,98,28,22,0,623,127,258,910,737,385,31,239,347,254,109,424,95 }, - { 457,51,13,23,961,12,719,99,453,15,4,515,165,401,629,3,700,11,17,14,2,37,753,41,57,569,38,45,0,33,5,32 }, - { 202,120,5,33,318,77,450,102,1,260,403,128,494,21,165,13,269,12,326,23,342,523,402,2,817,64,15,141,125,82,457,475 }, - { 141,269,352,217,180,64,349,137,202,160,317,15,372,515,700,752,318,753,244,13,437,291,165,864,22,237,5,82,954,21,77,418 }, - { 70,29,2,145,74,112,26,6,75,52,19,66,632,1,87,220,5,135,163,287,307,25,226,7,58,396,294,278,113,409,69,151 }, - { 82,351,317,15,752,180,898,352,141,901,515,341,10,700,365,1,753,498,0,217,253,115,55,854,33,5,143,32,21,160,36,197 }, - { 39,9,310,254,0,30,101,49,252,272,100,265,105,455,159,557,190,333,286,688,18,166,1,158,709,16,625,627,31,131,327,329 }, - { 2,58,29,5,1,151,186,52,70,45,7,549,14,75,112,400,113,155,61,46,227,163,311,315,66,6,307,27,17,220,287,74 }, - { 141,217,13,21,352,23,269,77,180,115,317,64,202,15,349,137,5,51,165,291,318,752,372,4,0,102,33,365,197,32,341,125 }, - { 68,35,0,9,65,101,149,124,24,154,175,16,28,7,67,1,18,189,114,398,55,14,345,39,118,133,69,2,230,429,71,283 }, - { 66,7,29,2,112,52,20,43,97,151,74,192,135,5,173,525,337,45,145,58,415,25,14,32,644,70,544,226,222,21,6,580 }, - { 31,125,44,22,116,299,242,55,1,170,64,36,479,870,456,685,10,599,558,0,268,506,28,740,23,903,492,164,393,206,2,86 }, - { 188,11,79,12,99,377,94,33,542,339,40,474,111,37,4,51,102,453,139,775,13,475,23,961,277,471,134,57,431,266,115,117 }, - { 658,698,340,98,296,303,1,31,850,363,156,919,44,774,586,385,120,77,82,10,223,30,354,291,23,914,478,87,260,163,48,13 }, - { 15,515,700,753,82,4,1,13,901,33,197,11,5,10,23,165,2,0,180,3,21,77,51,120,365,115,217,40,117,102,32,401 }, - { 15,515,700,753,4,11,5,13,1,141,3,180,23,202,21,2,269,64,165,33,40,32,0,318,120,128,12,197,117,352,51,17 }, - { 91,6,233,85,370,718,81,65,25,256,63,343,42,74,235,123,138,511,397,249,26,194,650,355,64,87,544,18,90,643,66,214 }, - { 23,13,202,51,21,120,1,5,141,128,450,64,318,403,15,137,260,33,12,48,32,31,125,494,269,102,165,515,77,2,197,14 }, - { 180,317,365,341,752,217,115,352,901,482,372,498,1,141,15,253,515,244,2,700,0,21,13,82,23,4,579,351,753,291,269,77 }, - { 13,115,197,341,9,352,468,237,64,498,23,165,22,509,901,546,482,180,28,569,317,51,365,873,391,95,86,217,49,837,752,706 }, - { 13,23,51,1,141,5,165,202,21,120,64,125,180,15,2,33,197,115,128,32,260,269,12,82,4,515,137,7,318,93,0,700 }, - { 214,289,90,174,874,6,138,280,65,81,64,85,355,751,194,233,312,348,835,91,0,32,343,636,249,29,875,288,519,104,247,74 }, - { 15,515,700,753,4,5,11,13,1,33,23,21,2,3,102,32,141,77,180,117,31,64,0,40,134,196,120,352,12,44,197,6 }, - { 33,15,13,515,117,23,700,217,134,753,0,51,153,77,141,2,4,64,196,1,3,180,10,115,5,102,6,11,22,202,165,7 }, - { 15,515,700,753,33,4,77,102,1,40,13,117,11,115,134,5,21,153,23,217,3,32,2,317,120,196,180,141,51,12,59,260 }, - { 15,515,700,753,13,0,1,23,2,217,51,3,4,5,8,317,115,9,341,10,202,180,6,365,7,82,457,22,120,901,33,291 }, - { 7,2,135,20,97,14,66,52,337,673,192,29,43,355,353,5,16,294,107,376,147,226,331,560,64,470,222,104,415,32,4,324 }, - { 195,132,142,167,146,77,363,271,121,354,202,120,647,178,786,212,687,0,101,878,16,522,60,5,450,411,35,55,98,639,259,318 }, - { 202,77,20,0,318,66,104,128,102,269,177,43,33,7,216,291,494,5,2,342,74,173,97,112,450,22,337,10,234,52,64,678 }, - { 107,362,612,356,359,97,414,43,259,20,392,7,298,147,819,683,465,173,729,660,319,14,5,779,581,595,246,35,501,92,0,230 }, - { 6,165,14,453,13,51,19,23,386,457,74,391,308,2,26,401,47,758,603,108,719,366,1,29,309,730,324,197,133,70,115,867 }, - { 179,72,205,180,247,245,4,490,352,59,317,152,79,498,94,217,148,76,752,864,11,216,141,405,89,452,197,111,497,188,37,21 }, - { 107,7,298,314,14,359,32,392,232,279,172,97,60,581,387,126,121,0,534,493,356,92,441,95,13,21,35,147,22,5,16,362 }, - { 156,271,354,586,360,132,591,195,121,18,340,1,5,13,21,48,668,446,23,463,296,658,60,55,407,698,146,70,626,51,163,24 }, - { 13,23,51,4,0,12,457,15,11,453,2,515,5,1,99,10,115,165,700,475,401,403,3,961,40,14,37,753,719,32,64,569 }, - { 48,125,21,165,13,221,23,763,423,508,197,5,98,92,193,16,441,386,64,314,293,457,391,140,49,60,102,693,683,51,35,867 }, - { 202,77,120,450,5,318,1,494,0,195,18,132,523,403,326,604,354,260,121,576,203,167,234,817,682,49,35,615,21,20,13,102 }, - { 39,9,166,30,0,101,158,68,404,190,333,274,252,310,88,100,49,28,344,35,21,22,419,131,438,1,16,65,530,694,124,10 }, - { 15,515,700,753,110,4,1,11,165,180,93,13,82,5,2,197,33,120,0,3,10,23,21,115,901,217,341,77,317,51,32,117 }, - { 2,29,1,14,6,52,5,46,50,26,70,19,103,58,38,67,96,262,516,309,218,133,108,27,75,17,112,114,24,487,331,83 }, - { 120,77,15,13,1,141,260,23,515,217,110,51,137,700,317,202,165,291,180,21,753,128,0,177,326,93,450,82,64,269,197,5 }, - { 255,59,554,297,183,56,33,444,108,358,123,196,269,122,77,153,57,177,117,730,19,467,605,130,128,50,275,4,291,475,134,133 }, - { 13,23,51,12,153,14,117,120,165,134,99,401,38,453,15,128,197,719,64,515,475,403,37,33,196,700,40,125,5,0,54,2 }, - { 64,33,174,348,95,108,467,554,56,0,25,306,233,6,63,511,343,120,13,85,29,561,543,707,319,180,899,355,77,49,256,18 }, - { 120,260,51,23,77,15,202,1,93,82,141,450,13,326,515,137,21,5,64,33,110,700,128,165,318,203,269,102,351,753,197,125 }, - { 15,515,700,753,4,13,11,1,5,21,23,2,33,64,3,180,32,141,22,102,77,0,10,93,82,352,117,40,341,31,165,6 }, - { 15,515,700,753,341,13,23,141,33,1,0,217,4,77,180,10,82,351,51,137,5,64,9,317,21,11,102,40,260,202,854,115 }, - { 105,272,131,22,327,286,28,239,320,9,109,578,219,49,98,224,95,159,538,371,616,127,187,64,713,55,0,170,168,258,716,623 }, - { 16,18,68,35,24,60,71,118,92,126,0,9,101,191,7,55,154,175,212,14,167,150,302,28,375,1,107,124,346,273,21,108 }, - { 20,147,43,470,376,142,904,178,427,798,0,595,198,325,858,319,61,202,173,97,5,422,14,22,107,259,32,49,887,77,414,392 }, - { 13,23,51,12,33,15,99,64,128,515,453,202,117,153,37,102,700,40,134,196,120,0,2,753,141,14,38,3,82,403,77,21 }, - { 383,17,62,136,84,119,56,440,3,504,240,80,378,129,123,548,106,128,4,11,14,555,162,32,184,361,59,64,205,5,469,57 }, - { 70,1,48,652,5,638,846,888,21,349,269,260,340,562,767,761,163,883,774,141,125,518,591,0,23,9,87,13,371,303,622,31 }, - { 66,135,6,97,74,278,69,7,14,324,267,172,2,140,462,1,357,38,808,550,92,841,189,29,16,25,298,87,75,204,24,335 }, - { 51,23,33,13,102,40,12,128,64,77,10,202,0,196,117,4,14,99,134,453,65,153,11,475,139,403,22,141,86,2,21,15 }, - { 88,100,264,166,274,435,772,1,382,921,96,478,30,438,639,909,897,521,190,466,960,410,9,144,530,418,31,329,265,691,778,93 }, - { 62,440,136,56,84,3,504,548,555,383,4,17,129,128,507,361,123,59,119,162,14,57,152,328,161,11,202,495,184,27,80,215 }, - { 911,617,332,959,206,141,253,244,282,384,110,120,10,260,352,143,951,811,269,373,160,417,93,531,728,203,434,940,137,55,36,717 }, - { 120,15,260,141,77,1,515,82,700,351,33,23,450,13,110,326,64,217,269,753,203,137,102,5,165,21,51,291,93,177,373,128 }, - { 15,515,700,753,0,1,2,23,13,51,5,9,82,901,180,8,3,4,120,6,7,141,93,12,197,341,10,33,115,730,64,125 }, - { 7,104,97,107,356,232,66,560,298,289,14,707,38,568,359,64,20,0,65,324,22,214,92,32,192,5,387,43,712,90,172,95 }, - { 6,1,2,66,67,14,74,24,108,29,69,83,458,7,25,38,135,103,36,150,451,114,52,594,75,65,380,18,267,602,19,278 }, - { 13,23,51,12,115,21,202,5,457,15,4,1,64,719,0,403,2,3,453,165,99,141,401,128,32,515,10,37,523,197,120,700 }, - { 57,59,4,11,412,381,77,53,421,291,250,368,99,14,27,369,803,283,23,108,403,19,339,210,0,401,12,444,236,40,361,736 }, - { 15,515,700,1,0,753,2,13,23,5,51,180,3,115,6,7,457,4,9,8,12,82,197,165,141,901,120,719,33,64,21,22 }, - { 64,95,180,247,929,146,90,126,197,32,237,60,288,165,316,92,5,13,77,7,217,955,522,22,16,314,132,4,317,10,312,86 }, - { 15,1,120,13,23,515,0,51,700,180,141,2,5,202,21,260,753,165,137,33,77,110,197,128,326,7,450,4,102,9,269,12 }, - { 14,2,16,46,1,7,24,69,75,35,38,50,29,220,52,140,267,67,18,54,70,309,5,60,92,189,171,87,71,163,58,0 }, - { 31,98,127,44,9,299,0,276,293,284,116,49,935,599,105,22,456,201,28,1,39,125,242,137,371,144,131,492,159,272,51,395 }, - { 6,27,151,53,573,445,297,113,26,73,436,19,491,250,396,315,45,112,145,58,614,881,25,34,611,200,17,80,70,5,138,631 }, - { 32,693,81,788,90,804,403,56,494,21,84,397,202,65,18,77,64,681,214,725,523,784,526,33,102,825,240,0,115,241,817,91 }, - { 24,7,14,2,18,16,65,0,108,149,28,69,1,71,154,36,124,35,67,140,189,429,92,68,66,22,55,118,302,150,9,6 }, - { 0,68,9,35,65,101,189,212,114,67,124,69,1,154,149,39,230,64,252,16,88,702,103,100,18,336,28,329,520,83,30,755 }, - { 5,2,186,29,61,45,17,1,52,48,58,171,155,227,80,209,311,21,14,46,50,106,243,513,334,502,496,38,3,6,32,592 }, - { 15,515,700,753,13,1,2,0,3,4,5,23,341,11,10,33,6,51,165,117,153,7,180,12,365,901,77,569,197,115,64,9 }, - { 13,15,23,515,0,51,1,700,4,2,753,10,3,5,12,77,33,961,165,457,197,11,115,9,22,102,40,403,202,21,14,59 }, - { 15,515,700,753,13,0,1,23,2,33,102,5,4,10,9,3,51,115,77,7,6,341,12,11,217,40,457,196,180,165,8,523 }, - { 166,39,30,274,190,100,333,438,530,310,88,252,0,9,539,265,1,656,404,101,625,131,778,254,31,455,676,329,724,158,21,23 }, - { 734,148,94,308,431,115,37,89,111,413,79,468,197,629,341,474,569,12,13,873,179,401,11,4,180,23,205,72,59,365,134,51 }, - { 539,228,224,219,816,190,30,258,871,840,669,93,406,530,957,187,160,531,748,137,131,88,863,36,728,839,44,213,352,116,202,466 }, - { 393,791,125,801,730,551,386,23,31,175,93,98,51,13,144,788,126,203,21,345,116,22,949,110,575,165,326,44,0,4,60,221 }, - { 13,23,77,141,0,4,51,2,33,115,64,1,10,3,6,15,11,102,7,217,180,40,515,22,128,177,202,9,700,269,165,5 }, - { 2,29,7,70,52,14,1,58,112,46,75,5,171,163,87,220,307,151,186,334,38,66,155,16,69,135,278,45,262,97,6,21 }, - { 88,321,213,100,230,435,689,466,1,382,30,352,217,699,410,96,795,36,921,752,190,141,144,180,44,831,317,83,443,31,840,251 }, - { 363,411,101,520,354,9,195,668,132,156,447,1,905,364,18,23,765,664,146,5,360,13,121,96,98,31,252,39,100,759,264,551 }, - { 13,23,51,730,12,719,453,457,401,475,5,21,403,2,0,1,15,4,3,899,99,32,165,11,515,308,197,115,6,961,700,523 }, - { 72,76,89,12,37,4,308,179,38,528,90,431,54,205,148,184,401,57,152,474,23,59,51,245,428,11,32,99,405,316,257,21 }, - { 376,20,43,147,470,173,97,595,107,319,414,142,819,5,729,178,858,7,427,32,426,104,14,0,392,362,259,61,230,77,560,246 }, - { 202,141,269,494,318,137,51,128,403,4,217,96,77,5,64,177,291,180,15,352,102,10,33,349,2,317,0,341,120,515,21,453 }, - { 77,202,33,128,102,318,494,269,13,0,117,23,342,291,403,15,134,51,153,141,177,515,82,137,196,700,203,64,22,351,753,4 }, - { 253,110,951,352,499,811,10,854,180,638,244,559,642,752,564,8,141,143,417,341,901,260,206,197,922,661,93,15,498,373,165,911 }, - { 141,13,23,180,4,217,5,1,269,317,21,0,2,202,115,51,352,77,3,197,64,341,318,15,291,9,137,93,32,165,515,33 }, - { 9,0,18,252,16,101,68,39,24,118,35,109,158,329,28,167,60,364,333,265,49,100,22,419,553,55,1,677,71,7,212,159 }, - { 28,109,9,39,0,158,49,22,168,35,55,175,1,65,67,185,194,159,289,95,272,114,30,105,86,584,36,169,254,2,83,24 }, - { 15,515,13,700,1,753,2,23,0,3,4,5,33,341,11,51,6,10,197,115,901,180,77,40,102,12,365,165,141,217,7,317 }, - { 173,693,104,422,5,18,61,32,102,0,20,13,784,560,33,66,397,526,49,207,29,25,510,707,65,6,11,344,21,263,81,77 }, - { 23,13,386,51,308,801,719,221,401,949,21,730,165,421,102,115,125,33,341,670,468,117,770,1,120,6,197,14,403,97,67,958 }, - { 0,49,105,16,28,24,159,9,158,320,1,68,35,239,170,18,109,7,55,65,2,95,301,124,347,14,21,154,22,127,286,31 }, - { 2,5,1,207,45,29,32,58,76,61,6,263,292,655,72,14,17,476,7,119,52,306,70,64,21,90,186,214,106,38,3,790 }, - { 21,6,125,49,13,64,715,66,115,95,197,33,22,32,204,165,56,278,0,408,241,120,4,808,681,350,263,85,81,571,135,509 }, - { 612,427,325,107,202,5,376,49,64,392,403,470,21,147,31,788,494,14,362,465,858,98,20,804,518,43,845,318,125,97,725,534 }, - { 32,21,76,72,2,1,14,5,241,449,89,38,350,221,155,48,50,292,37,46,45,90,270,54,17,179,214,12,148,430,476,413 }, - { 24,0,28,16,7,124,35,154,14,149,65,18,9,68,55,108,175,71,2,1,22,109,92,67,484,336,118,69,302,398,570,420 }, - { 1,5,14,2,48,50,38,67,46,21,0,54,45,270,281,12,24,32,155,96,513,103,290,83,61,58,36,17,37,72,69,181 }, - { 13,961,569,197,37,15,23,474,515,94,148,111,12,165,629,341,700,79,901,401,51,405,753,10,134,4,115,734,873,11,89,117 }, - { 33,23,102,51,13,40,77,128,64,202,141,15,4,12,0,1,2,117,22,11,10,403,153,515,99,318,137,269,139,196,700,134 }, - { 0,1,24,67,9,16,18,35,28,69,103,50,5,2,65,12,83,68,7,96,14,22,21,149,75,114,13,133,23,71,218,54 }, - { 384,617,940,332,855,911,206,959,434,282,141,10,93,253,244,110,144,268,120,36,352,137,417,203,116,31,44,269,160,201,143,951 }, - { 30,93,473,137,31,704,450,652,190,203,800,254,166,274,326,144,269,160,127,303,120,625,88,848,110,435,77,521,349,131,340,744 }, - { 53,27,73,26,19,250,297,200,25,630,17,6,611,122,34,42,714,235,472,65,436,14,80,684,690,106,45,113,680,108,64,4 }, - { 15,515,1,2,700,0,753,3,5,141,180,4,13,77,33,10,217,6,7,134,11,352,197,64,165,341,317,23,12,115,102,40 }, - { 254,530,39,613,688,221,30,31,438,190,228,960,1,44,141,21,180,406,23,166,9,202,13,96,137,48,131,829,317,269,393,51 }, - { 9,39,28,35,30,166,158,36,0,175,101,346,364,67,49,68,168,420,88,1,194,131,100,352,55,83,190,64,137,570,86,65 }, - { 62,56,3,548,555,507,440,161,34,4,215,136,162,514,361,527,17,14,211,130,328,11,383,123,84,183,38,57,184,152,205,494 }, - { 92,126,107,7,356,493,97,279,359,298,16,246,35,60,14,441,362,121,43,423,5,132,392,20,508,230,199,146,232,173,150,414 }, - { 15,82,141,515,291,922,349,700,217,260,372,120,351,93,77,753,318,352,373,854,1,326,269,21,13,102,144,202,64,23,203,137 }, - { 141,217,352,115,180,13,269,317,752,77,23,21,341,197,5,372,244,291,9,64,51,102,4,1,365,2,165,33,3,48,237,351 }, - { 78,47,390,19,130,453,108,27,711,813,730,444,412,283,196,690,123,14,128,26,250,389,650,236,200,65,51,4,34,183,297,73 }, - { 34,250,297,80,472,64,495,17,311,3,148,45,667,61,176,53,243,27,90,161,469,141,483,151,62,128,29,4,58,56,5,231 }, - { 51,23,33,13,551,77,102,326,421,21,523,120,5,899,453,692,202,153,308,615,115,958,450,401,791,68,221,93,475,18,403,4 }, - { 98,223,393,363,411,1,478,834,664,156,284,691,447,791,914,293,354,724,697,9,807,541,759,51,18,421,48,264,948,586,195,848 }, - { 7,14,107,232,16,92,2,60,46,5,359,121,24,526,220,620,135,1,172,21,126,314,132,77,18,75,32,278,12,23,52,38 }, - { 32,76,2,1,21,72,241,14,5,48,292,89,476,45,720,270,179,90,17,214,148,38,50,29,129,155,350,46,290,227,123,464 }, - { 15,515,700,753,13,23,33,77,51,4,102,0,32,202,1,11,128,82,117,141,40,5,110,8,3,90,137,21,10,318,403,165 }, - { 66,6,69,2,1,74,14,135,278,267,380,24,29,97,67,38,103,75,7,388,324,25,52,150,87,83,189,357,335,108,204,172 }, - { 152,4,339,59,79,471,188,11,77,94,128,33,529,377,12,111,102,202,452,402,216,99,13,542,51,40,474,37,64,291,23,961 }, - { 15,515,700,753,1,0,196,13,33,2,77,5,23,102,3,10,9,7,217,4,6,153,117,177,14,457,115,12,40,730,11,134 }, - { 17,209,45,106,207,5,255,119,62,2,61,3,263,742,306,655,425,378,32,56,29,136,84,80,311,58,186,240,243,383,14,21 }, - { 120,260,450,15,1,23,817,13,515,523,326,5,700,51,82,31,202,64,21,753,318,93,32,269,98,33,351,77,102,125,457,165 }, - { 116,492,268,93,23,206,203,0,551,918,13,51,8,22,417,940,120,10,499,31,949,791,125,523,165,473,341,730,421,959,401,391 }, - { 15,515,700,753,165,13,0,1,197,23,4,82,120,2,180,12,260,719,8,3,386,117,5,523,901,11,341,51,10,9,141,351 }, - { 14,24,69,7,2,66,108,1,67,6,36,398,18,267,150,97,29,38,83,149,65,74,28,0,189,71,388,16,273,124,46,22 }, - { 330,96,523,335,367,662,141,839,1,922,372,615,244,717,269,443,418,352,403,692,217,854,752,180,36,64,498,576,349,201,98,284 }, - { 184,90,257,205,245,229,57,152,769,17,524,5,32,497,45,432,619,2,452,266,4,106,1,21,179,59,76,3,460,292,381,128 }, - { 7,14,16,2,46,5,70,107,87,13,58,307,92,32,38,23,202,0,172,24,18,21,60,128,77,35,20,10,9,4,171,112 }, - { 7,66,140,16,14,92,97,69,267,172,189,24,380,2,35,60,298,451,230,135,314,74,150,71,38,357,6,330,67,423,21,443 }, - { 121,167,354,132,18,446,147,101,212,146,407,16,55,35,647,191,20,271,199,68,60,259,463,107,9,126,363,7,195,43,14,411 }, - { 76,90,179,32,205,21,184,460,257,288,45,245,316,5,57,152,241,2,358,1,229,72,524,148,48,769,17,4,12,38,14,720 }, - { 147,259,178,878,427,465,581,198,786,798,142,534,325,929,20,362,35,132,107,376,43,5,279,77,49,146,70,202,590,771,33,14 }, - { 473,93,450,778,141,30,855,466,144,203,330,530,88,523,459,372,201,617,839,704,254,321,934,326,39,36,82,717,332,213,559,403 }, - { 523,475,51,899,730,453,23,719,403,33,457,13,421,386,4,120,117,196,102,153,15,801,450,817,515,260,202,11,700,99,165,125 }, - { 15,1,13,515,0,2,700,5,23,753,4,3,341,317,10,115,180,11,33,64,217,77,117,165,197,7,6,365,9,141,102,134 }, - { 19,4,119,40,33,202,27,84,102,56,77,73,504,485,26,494,757,63,862,59,23,300,25,12,128,11,5,13,342,880,469,6 }, - { 32,20,2,13,5,21,23,6,12,38,43,29,64,7,95,51,61,207,48,147,90,178,17,182,49,0,115,202,52,362,37,22 }, - { 339,188,11,79,4,94,377,12,99,111,542,102,37,33,474,51,471,40,453,152,77,13,59,403,342,23,117,57,475,134,128,38 }, - { 34,128,283,176,495,231,318,432,503,275,529,527,161,53,3,202,56,291,585,469,73,17,14,412,57,27,80,245,250,381,402,51 }, - { 15,515,13,700,1,217,141,120,23,180,753,115,365,51,317,341,77,260,0,291,110,137,202,5,21,269,64,36,349,2,4,10 }, - { 13,15,961,515,700,753,4,12,2,457,3,11,197,51,37,569,115,23,5,0,99,10,1,134,6,111,165,33,72,40,38,79 }, - { 15,515,700,753,13,1,0,2,23,33,5,3,10,4,9,115,7,102,6,51,12,217,77,11,40,457,569,341,117,317,14,719 }, - { 5,76,2,32,292,214,45,1,129,519,123,179,90,710,17,29,460,72,14,207,21,249,58,205,464,263,618,48,6,245,3,257 }, - { 72,76,32,4,21,12,38,23,99,54,89,3,14,17,51,57,11,90,13,488,179,2,59,148,45,37,5,115,401,1,10,421 }, - { 98,223,393,1,834,264,284,791,724,293,478,772,697,909,363,682,905,447,541,821,411,51,421,9,807,48,765,31,730,96,386,410 }, - { 341,13,509,8,23,638,165,901,762,10,569,242,391,197,873,642,506,499,629,961,15,180,116,456,206,546,417,1,338,457,515,867 }, - { 1,2,5,50,14,38,46,114,0,36,29,22,218,65,86,96,137,21,133,285,12,10,323,181,17,58,51,23,67,7,28,6 }, - { 481,878,202,13,5,23,182,32,269,21,1,318,77,142,557,494,141,33,640,137,70,291,2,51,260,415,929,403,120,58,4,259 }, - { 15,515,700,753,1,4,13,0,2,5,341,3,11,180,134,12,10,317,197,365,33,21,23,165,117,6,77,7,217,37,32,498 }, - { 25,119,19,6,26,42,27,17,4,790,45,814,2,469,483,84,122,1,0,33,32,128,76,80,611,113,73,56,5,240,202,77 }, - { 14,2,7,1,24,0,65,6,16,69,67,22,124,28,108,5,18,36,86,10,38,46,66,398,289,168,12,83,21,23,610,13 }, - { 51,23,128,13,15,202,12,120,33,64,141,82,10,515,0,403,700,3,1,99,117,269,153,165,753,5,318,197,102,260,2,137 }, - { 16,35,24,0,9,18,7,1,68,69,50,71,103,65,67,189,133,23,28,13,60,537,149,335,75,21,64,5,114,2,12,14 }, - { 754,803,133,576,880,543,2,1,657,50,14,38,46,5,29,67,218,36,58,171,52,96,24,103,775,0,114,83,181,54,65,45 }, - { 21,32,5,3,2,17,14,72,76,1,12,23,38,51,4,54,10,0,89,13,99,137,45,36,421,115,543,11,22,128,221,48 }, - { 434,384,268,144,855,940,617,206,332,116,93,911,959,282,203,137,141,489,44,120,10,110,244,36,98,31,269,253,367,417,160,9 }, - { 15,2,1,0,13,515,5,700,3,23,180,217,141,10,753,4,117,6,77,33,64,7,11,197,352,317,341,134,165,115,12,9 }, - { 2,113,6,25,1,0,29,4,7,833,5,45,32,61,128,19,77,151,74,145,64,42,14,210,655,106,59,177,27,17,21,738 }, - { 116,268,918,203,551,31,8,692,206,791,403,499,417,93,940,421,0,23,22,120,13,523,44,51,299,473,959,1,10,475,202,125 }, - { 107,126,132,612,362,279,20,146,259,493,199,121,590,43,660,147,35,376,939,60,941,534,683,5,0,953,16,7,49,649,595,470 }, - { 15,515,700,753,13,1,0,23,2,33,77,4,3,51,5,102,115,10,9,341,6,7,11,342,217,12,120,180,40,317,141,8 }, - { 53,27,17,161,469,378,73,527,19,136,383,250,495,56,862,26,62,84,80,106,200,4,34,14,440,297,3,128,585,5,129,123 }, - { 17,45,209,106,5,207,243,454,119,255,2,263,186,290,29,3,21,62,425,61,84,32,58,56,48,408,655,136,306,14,742,227 }, - { 4,152,59,452,128,79,216,11,339,471,529,188,94,77,202,12,291,33,318,377,99,51,23,5,402,349,32,474,102,13,205,111 }, - { 15,515,700,753,1,0,2,13,3,5,23,4,180,51,115,9,6,12,7,8,197,33,10,961,901,77,141,752,110,22,120,341 }, - { 951,752,638,811,351,642,180,253,10,341,197,901,110,873,8,244,15,352,165,898,143,515,564,762,499,55,365,700,82,753,141,854 }, - { 6,262,197,350,74,26,115,509,841,583,165,38,21,13,47,50,235,19,33,324,453,4,308,196,138,99,64,903,675,1,223,130 }, - { 125,165,391,23,386,221,21,13,558,457,51,867,197,115,401,758,77,97,308,791,7,180,48,120,963,451,743,89,603,134,403,450 }, - { 1,14,2,5,16,46,7,38,58,24,50,0,69,48,35,67,54,18,12,75,21,45,513,155,430,37,270,9,61,163,223,32 }, - { 23,13,51,0,12,15,4,1,115,2,515,453,10,457,5,3,202,21,165,700,403,11,37,64,77,401,9,197,753,59,475,99 }, - { 129,84,17,56,27,495,19,548,80,123,162,378,3,504,161,469,618,73,40,53,4,26,205,184,106,183,62,6,257,128,862,12 }, - { 28,9,22,49,109,1,67,0,39,55,168,158,83,36,35,86,420,194,185,159,95,105,69,208,272,103,50,114,2,254,169,30 }, - { 242,391,8,456,116,13,23,492,341,165,867,51,499,457,479,638,338,509,719,10,1,642,417,762,401,93,206,268,901,569,22,197 }, - { 211,162,248,130,57,4,41,556,507,266,183,152,305,361,11,129,62,229,38,471,514,313,157,300,377,3,440,128,123,328,339,59 }, - { 7,92,97,16,298,140,60,126,14,35,279,314,232,246,43,230,508,173,71,107,423,24,150,779,20,189,66,18,607,21,0,653 }, - { 15,515,700,753,1,0,2,13,23,5,3,180,51,901,6,4,7,12,9,115,8,457,165,82,120,197,10,64,141,341,22,117 }, - { 0,18,403,25,523,74,6,24,42,91,22,102,13,51,49,193,475,681,95,85,730,64,899,397,273,750,247,673,32,805,757,288 }, - { 56,0,18,65,33,554,84,343,64,6,90,561,22,19,899,108,27,63,289,475,240,467,370,32,233,214,24,123,95,287,28,194 }, - { 31,98,127,9,0,44,293,105,395,299,49,242,28,22,599,116,1,284,276,125,456,685,763,159,272,623,23,935,393,144,201,137 }, - { 1,5,2,14,38,46,50,48,21,7,58,45,270,61,155,171,0,290,69,32,29,54,67,16,24,666,663,17,37,75,502,52 }, - { 23,51,13,453,457,12,719,4,15,99,401,2,961,3,11,730,475,515,0,1,165,115,629,700,14,17,403,40,5,33,37,64 }, - { 968,967,966,965,964,963,962,961,960,959,958,957,956,955,954,953,952,951,950,949,948,947,946,945,944,943,942,941,940,939,938,937 }, - { 2,1,14,29,67,103,6,46,52,75,24,133,38,218,83,309,36,108,70,114,96,5,238,74,25,26,220,236,65,50,69,87 }, - { 7,71,16,92,24,60,14,97,150,140,35,189,149,298,18,230,43,508,2,423,69,0,38,314,66,279,399,517,251,20,232,273 }, - { 23,1,120,51,13,202,77,141,260,21,15,5,128,82,2,450,269,165,102,318,48,32,137,515,125,64,12,115,351,180,33,7 }, - { 77,13,33,23,64,51,4,102,141,128,40,1,2,202,0,6,177,115,137,15,59,10,11,7,269,22,515,180,318,3,700,95 }, - { 101,9,18,363,264,520,411,604,676,682,905,271,16,821,167,0,621,364,39,100,121,118,166,781,647,252,1,848,447,265,404,60 }, - { 144,203,326,382,166,418,93,88,96,822,1,141,859,77,744,438,110,269,921,367,521,274,100,39,494,120,403,473,217,576,13,291 }, - { 13,21,180,125,5,23,191,32,18,16,146,199,115,24,165,118,0,225,22,1,60,197,64,901,375,241,48,12,408,71,522,818 }, - { 15,515,700,753,13,0,23,8,1,51,82,102,2,33,4,9,180,165,5,77,10,110,12,197,120,260,18,326,351,403,22,457 }, - { 33,77,102,64,13,23,128,51,141,202,1,40,0,2,117,10,15,4,6,318,269,134,22,515,180,115,177,153,137,196,3,700 }, - { 174,544,104,525,74,0,151,25,6,624,29,66,2,636,81,45,204,177,64,416,7,644,5,138,222,319,355,77,22,122,789,216 }, - { 141,304,372,352,291,947,177,269,128,954,77,349,217,202,64,318,498,437,102,864,86,13,115,180,137,5,210,197,32,950,678,7 }, - { 161,200,53,17,714,27,34,73,472,62,585,56,440,383,136,78,527,19,4,3,106,361,14,250,80,514,377,84,322,390,862,548 }, - { 32,76,72,21,38,14,89,54,12,37,2,241,5,428,17,1,181,221,350,45,3,4,449,90,148,179,99,292,794,770,477,46 }, - { 33,23,128,64,141,13,77,51,102,202,2,15,1,3,40,10,5,153,269,515,165,0,117,196,180,318,6,700,137,134,120,22 }, - { 96,137,30,0,9,39,840,202,669,406,141,530,613,1,180,88,22,160,679,576,28,403,31,219,49,228,829,100,36,15,10,856 }, - { 180,141,352,1,15,752,115,0,217,365,2,515,13,901,341,317,23,4,197,700,269,5,3,31,753,244,21,165,253,202,51,44 }, - { 1,2,67,0,28,50,83,65,14,46,103,114,24,38,36,9,69,5,18,7,22,133,55,218,16,124,29,54,96,160,12,480 }, - { 180,115,352,317,365,217,752,901,141,15,341,1,515,253,700,0,753,873,2,197,31,137,165,244,4,120,160,44,98,5,202,3 }, - { 5,32,347,49,13,21,95,713,23,1,77,33,60,64,107,4,126,928,296,850,0,241,197,102,652,195,180,534,165,153,379,10 }, - { 341,180,365,901,317,115,15,752,515,700,217,873,753,82,0,110,197,141,951,165,1,564,13,351,253,12,10,3,2,4,308,244 }, - { 17,45,21,3,106,5,155,38,227,32,2,209,62,54,12,243,14,181,552,587,46,540,207,794,37,48,430,119,255,221,770,29 }, - { 16,24,35,18,7,0,50,1,9,14,75,69,2,5,12,21,60,13,67,71,23,48,10,108,223,181,189,103,46,64,92,51 }, - { 127,13,98,165,308,23,286,293,258,51,219,395,197,115,301,401,31,391,22,105,457,170,239,276,55,338,629,116,180,479,509,569 }, - { 539,213,748,840,957,669,30,466,88,217,144,251,863,190,137,93,230,228,679,352,317,203,617,321,258,530,160,219,96,831,816,689 }, - { 5,48,1,21,2,14,0,36,12,38,32,54,430,181,50,270,72,99,281,45,17,10,46,22,37,218,67,3,290,76,23,51 }, - { 13,23,0,4,33,51,2,115,141,1,77,217,180,10,9,317,3,102,11,5,15,197,7,202,22,165,40,64,515,6,341,31 }, - { 13,15,117,515,23,12,37,134,165,700,38,54,457,753,51,64,153,197,14,10,33,82,961,0,99,89,115,719,141,3,4,1 }, - { 5,21,2,3,1,32,14,12,48,17,0,10,51,23,38,22,4,72,13,54,36,45,137,76,99,114,86,37,11,64,540,430 }, - { 202,128,77,318,291,33,269,102,275,141,494,342,40,678,0,177,20,210,402,7,4,5,137,6,13,450,403,32,49,120,23,22 }, - { 1,2,24,14,67,46,69,50,38,103,16,18,75,35,83,29,52,96,5,108,0,7,54,71,149,394,236,309,70,133,220,58 }, - { 15,515,1,700,0,2,753,13,23,5,180,3,51,4,165,457,12,197,115,6,7,21,9,141,8,901,33,82,120,77,10,110 }, - { 0,28,65,14,67,2,124,24,1,9,7,69,55,154,36,16,46,114,175,35,83,22,429,18,109,149,68,189,108,336,251,133 }, - { 56,162,403,3,129,775,99,161,17,40,527,33,880,4,14,128,475,12,548,23,102,202,361,117,34,184,383,200,183,196,64,53 }, - { 151,2,29,58,112,45,186,113,5,70,52,1,311,6,315,66,61,7,74,27,631,17,80,87,287,243,209,227,14,491,19,869 }, - { 6,1,74,2,75,29,25,66,26,70,52,138,67,324,357,42,19,220,14,85,87,108,38,451,309,103,24,69,380,135,114,65 }, - { 15,515,700,13,23,0,1,120,753,51,180,2,260,202,5,141,77,102,9,450,115,21,197,165,7,137,110,33,12,269,901,4 }, - { 5,45,17,2,14,46,48,38,181,50,155,3,186,54,61,29,21,227,281,80,540,106,12,400,52,1,58,32,328,171,209,487 }, - { 16,18,265,121,158,35,60,9,39,7,329,105,252,68,24,1,132,167,159,22,0,49,286,101,21,146,23,327,120,709,5,14 }, - { 108,467,283,56,389,650,123,412,33,177,899,475,216,453,269,349,619,65,51,730,403,670,23,196,523,128,84,13,401,789,503,543 }, - { 514,3,11,377,328,4,361,507,57,403,14,880,130,485,176,215,236,38,152,102,211,56,62,757,54,585,300,556,34,555,40,229 }, - { 3,555,62,266,130,99,507,139,514,12,152,229,215,305,57,40,440,33,403,471,38,56,475,14,361,313,775,328,196,548,123,23 }, - { 120,202,318,15,77,13,1,450,33,269,515,260,5,128,494,51,23,700,102,141,40,753,326,403,817,137,523,21,177,922,342,7 }, - { 15,1,515,23,0,13,700,2,51,753,180,5,165,21,197,12,3,120,115,4,141,6,9,7,457,33,386,202,82,8,31,341 }, - { 15,180,515,82,351,700,10,317,753,115,217,365,141,898,33,901,13,23,110,854,752,77,1,197,4,341,143,36,64,352,102,9 }, - { 104,289,66,707,214,90,712,64,97,173,20,0,414,194,874,43,32,7,568,560,65,38,426,312,715,192,376,74,835,5,324,147 }, - { 84,56,0,554,63,65,453,249,123,643,18,26,847,475,511,403,416,561,524,289,370,73,9,19,45,42,719,194,27,467,33,730 }, - { 21,346,13,350,308,826,197,101,352,68,570,0,165,23,9,841,115,100,509,694,221,230,35,217,569,88,124,749,1,777,212,154 }, - { 16,92,7,24,60,18,35,140,126,14,50,71,46,330,2,75,246,5,121,267,571,1,230,309,220,0,9,64,146,236,54,108 }, - { 82,15,515,898,365,700,180,33,341,753,77,901,10,115,55,351,21,5,1,4,13,102,36,217,2,165,752,120,197,117,11,317 }, - { 16,24,35,18,69,71,140,1,103,7,189,68,0,50,9,108,2,133,60,267,230,46,149,67,167,118,92,14,75,21,191,38 }, - { 60,71,16,18,7,20,43,118,35,68,375,28,608,0,175,566,154,92,14,149,628,33,22,13,2,10,279,23,107,356,55,117 }, - { 187,258,871,295,201,434,219,224,489,384,268,110,261,839,44,699,93,116,36,131,141,228,144,160,940,567,244,406,137,574,98,253 }, - { 66,7,97,172,192,712,232,324,204,74,43,448,387,426,568,20,526,107,104,135,356,729,173,0,22,5,32,95,2,64,500,560 }, - { 15,515,700,753,1,4,0,341,13,3,134,2,5,33,11,77,12,10,23,197,365,901,7,40,217,32,21,6,51,180,961,37 }, - { 0,28,24,9,35,65,16,124,68,55,109,154,7,39,22,149,158,14,175,1,49,252,18,71,2,168,289,419,108,420,67,101 }, - { 7,16,14,92,2,46,140,24,220,35,38,60,75,1,50,18,87,54,5,126,29,52,278,262,314,107,71,21,172,135,330,394 }, - { 7,92,16,14,172,126,2,60,140,35,135,314,278,46,24,38,232,107,330,66,5,18,150,246,230,97,52,1,121,563,279,21 }, - { 6,26,235,53,297,436,27,19,25,73,113,445,90,214,65,42,64,289,250,611,624,32,45,648,614,17,85,491,34,122,200,416 }, - { 352,141,1,217,854,752,351,180,244,36,110,661,82,258,816,160,295,219,567,224,230,269,922,144,260,268,93,201,137,116,489,202 }, - { 16,60,35,18,126,107,68,191,92,121,7,14,598,20,493,279,167,446,118,0,28,43,463,55,24,212,375,566,9,150,575,21 }, - { 15,1,515,2,4,13,0,700,3,5,23,753,341,77,51,115,33,11,180,10,197,141,6,165,7,901,102,40,9,202,217,12 }, - { 23,51,13,202,21,5,1,120,15,137,128,125,32,2,12,141,33,165,64,515,403,318,700,48,180,7,6,450,115,523,475,260 }, - { 131,716,224,371,219,187,737,616,385,254,9,98,105,924,31,258,836,39,127,578,49,916,44,761,272,137,944,159,0,242,442,22 }, - { 15,515,700,1,753,2,5,0,4,13,3,180,11,141,197,10,341,217,33,134,165,6,77,7,317,12,352,64,365,32,102,40 }, - { 66,74,7,173,174,29,192,2,222,20,226,43,353,52,712,6,0,138,500,204,97,145,64,104,426,673,355,90,25,5,65,87 }, - { 5,259,786,534,590,493,279,49,13,581,465,21,929,35,941,132,147,32,23,612,362,626,107,121,178,0,146,61,48,939,10,18 }, - { 2,14,16,7,278,69,135,140,46,24,267,35,92,38,1,189,29,52,309,60,66,75,71,172,74,357,18,87,67,6,230,5 }, - { 165,13,308,197,391,23,401,15,51,457,180,509,115,569,3,629,961,719,34,758,317,734,14,29,46,2,17,901,38,453,5,217 }, - { 1,22,2,14,0,28,7,168,67,49,65,24,36,95,5,105,55,35,12,46,69,16,114,159,194,50,10,9,158,83,164,109 }, - { 34,453,3,196,130,14,322,11,47,51,377,236,361,4,730,153,514,711,57,440,62,17,161,108,176,59,485,56,162,412,202,117 }, - { 18,16,21,23,48,13,24,35,121,5,156,60,51,1,7,132,141,221,163,115,0,271,447,340,363,202,125,71,2,781,22,698 }, - { 165,13,457,23,197,961,629,569,341,41,12,38,401,901,54,51,115,17,15,509,421,37,62,45,719,57,32,328,117,758,157,99 }, - { 2,1,77,141,33,64,3,102,0,23,13,5,128,10,6,15,180,202,269,40,51,515,7,165,137,117,318,4,700,153,197,352 }, - { 68,212,0,124,101,9,154,16,562,191,21,149,65,24,35,1,118,167,818,350,520,100,722,841,264,71,13,302,478,23,375,346 }, - { 98,23,48,598,13,293,541,21,125,121,51,807,0,31,35,259,126,7,386,1,223,783,10,107,199,20,221,144,342,963,49,64 }, - { 21,13,5,586,1,23,167,48,33,781,647,49,165,18,51,271,77,32,761,118,0,82,391,22,146,141,459,31,197,156,115,4 }, - { 2,1,5,61,29,7,58,45,14,6,425,32,70,52,290,738,207,21,72,112,66,76,655,17,186,46,64,263,38,0,128,87 }, - { 39,265,9,100,1,333,363,101,18,411,447,254,166,310,31,98,264,30,639,404,156,286,16,93,593,203,272,682,0,905,44,821 }, - { 6,2,1,19,29,51,26,108,25,74,5,23,14,114,13,386,133,103,42,66,453,70,309,138,719,324,65,38,64,96,52,75 }, - { 20,43,356,107,49,858,595,7,414,359,0,5,392,319,97,612,422,819,14,376,173,246,22,470,147,427,230,92,197,33,683,95 }, - { 0,9,68,35,65,67,114,101,28,1,124,175,336,69,154,103,83,24,189,133,39,16,50,7,2,149,55,251,18,345,230,36 }, - { 23,13,51,15,0,1,515,115,165,2,5,12,700,202,4,21,141,457,753,197,10,3,180,120,32,9,318,11,453,64,6,269 }, - { 121,195,60,16,126,107,98,271,146,407,132,35,1,167,199,223,493,191,279,20,18,5,43,7,21,92,48,393,0,362,212,467 }, - { 31,44,299,116,393,144,492,456,268,22,105,0,367,918,384,434,127,489,98,9,963,125,242,948,1,28,206,49,36,51,93,293 }, - { 23,13,457,51,165,401,719,758,197,453,961,629,308,14,15,12,730,3,386,569,391,29,739,515,34,828,832,901,115,514,670,341 }, - { 105,36,131,22,180,115,341,127,169,1,9,31,64,98,44,365,317,141,272,143,160,55,219,86,197,776,239,187,0,535,13,752 }, - { 1,6,2,14,66,25,29,5,108,67,65,114,19,38,26,52,74,7,24,18,69,86,36,388,64,51,17,83,23,46,42,75 }, - { 51,386,23,453,719,13,730,6,457,670,758,19,401,165,2,475,47,26,899,14,108,17,1,5,197,29,894,754,236,74,27,285 }, - { 252,18,9,101,121,16,132,0,419,167,364,60,604,35,265,363,146,271,39,158,68,109,28,329,848,24,647,907,682,159,212,55 }, - { 283,503,128,432,26,193,63,269,789,529,102,122,389,275,678,6,25,318,445,4,342,27,573,605,177,862,643,291,216,57,235,59 }, - { 2,1,29,75,69,52,14,6,46,74,87,7,220,226,278,38,135,66,267,70,16,262,25,24,380,324,357,140,67,394,97,222 }, - { 97,298,69,7,66,140,189,24,16,267,172,423,60,150,14,314,92,71,81,501,43,35,74,6,517,232,149,607,83,330,18,2 }, - { 475,421,403,899,51,805,523,958,453,817,23,615,401,801,120,326,202,670,494,730,450,386,115,629,260,576,77,365,569,0,165,13 }, - { 7,20,14,128,77,97,112,202,2,177,16,415,269,318,275,66,107,43,141,414,135,38,307,10,58,0,6,291,32,5,4,40 }, - { 24,14,7,0,2,1,22,28,16,65,168,124,35,67,108,109,18,49,10,149,69,158,5,95,289,12,55,6,36,71,46,21 }, - { 26,80,27,73,122,25,19,17,6,42,684,209,445,573,667,106,45,690,4,611,255,680,297,495,65,59,128,119,483,113,64,53 }, - { 107,259,362,376,465,20,470,147,595,534,612,683,660,43,5,49,581,0,858,35,427,246,97,786,178,356,14,21,142,878,7,279 }, - { 131,30,228,190,856,406,224,88,219,530,863,613,778,274,944,816,187,39,100,160,258,31,44,93,1,321,539,36,871,137,435,531 }, - { 113,6,311,25,45,491,80,611,27,26,209,667,17,73,122,42,684,396,19,85,106,5,614,4,2,255,151,29,1,64,648,61 }, - { 15,515,700,753,0,1,23,51,120,2,13,82,5,260,9,4,341,77,180,115,141,10,7,12,450,8,202,901,197,351,165,93 }, - { 219,127,98,258,395,421,924,293,242,201,697,105,276,51,308,23,453,272,401,944,512,137,13,31,284,567,386,365,116,131,964,125 }, - { 15,180,352,141,515,752,217,82,1,317,854,700,351,753,115,341,110,13,260,120,21,36,33,898,23,10,5,365,4,160,901,137 }, - { 129,123,17,257,162,184,205,249,183,769,5,80,3,4,229,130,119,45,90,99,618,106,57,497,12,128,2,84,59,152,27,40 }, - { 33,102,23,77,64,128,51,13,0,202,10,141,40,15,1,22,117,137,2,86,4,403,269,153,515,196,65,11,700,115,99,5 }, - { 7,14,2,16,172,107,46,92,5,135,35,202,294,87,38,232,29,97,20,21,24,1,60,220,66,43,12,0,126,52,54,70 }, - { 403,576,615,523,475,326,805,817,494,421,51,202,120,450,137,453,23,859,260,401,402,77,33,670,0,958,15,197,386,515,165,480 }, - { 141,352,217,137,0,180,202,349,9,269,23,51,115,291,77,372,13,317,120,752,365,351,93,22,2,341,64,10,82,854,28,18 }, - { 1,23,13,51,202,141,5,165,21,15,120,180,64,2,197,125,33,102,12,7,137,515,48,128,269,318,93,700,0,403,9,4 }, - { 25,151,6,145,122,29,174,45,113,74,4,665,42,138,2,614,416,287,19,348,746,0,66,26,1,7,64,243,311,396,81,624 }, - { 30,190,254,166,100,382,731,829,88,131,264,795,9,93,625,274,438,1,578,613,716,31,44,39,530,36,616,921,265,203,160,77 }, - { 132,5,21,13,1,23,32,195,379,687,156,121,626,296,48,70,850,146,51,82,883,771,35,49,652,407,60,4,260,0,845,33 }, - { 9,254,0,49,272,131,39,159,688,101,105,578,518,158,286,28,327,333,68,224,252,219,344,16,22,1,716,31,30,228,24,890 }, - { 16,7,35,60,18,20,14,68,9,0,28,118,43,92,126,55,107,2,101,154,24,71,5,202,121,109,22,252,21,97,1,621 }, - { 15,515,700,753,13,1,341,2,0,4,3,5,11,23,10,33,117,12,901,197,6,134,77,8,165,317,21,365,217,7,17,40 }, - { 78,19,444,47,26,390,27,453,130,813,108,730,711,65,412,122,51,680,113,235,690,196,630,283,128,236,14,64,73,53,200,445 }, - { 2,7,29,5,61,6,45,1,66,113,112,14,52,315,738,128,32,151,74,16,20,64,70,21,592,0,25,4,425,43,491,222 }, - { 145,112,74,66,6,29,26,70,19,396,25,87,2,287,135,151,138,222,5,226,42,122,7,307,1,644,45,58,113,651,635,632 }, - { 92,16,7,60,126,24,140,35,14,232,18,121,246,71,46,267,172,150,107,314,132,146,230,2,278,108,330,199,236,5,38,572 }, - { 13,115,197,538,569,341,98,55,165,127,365,762,219,286,844,23,170,206,734,638,535,901,169,253,629,0,873,509,180,10,332,258 }, - { 58,151,74,53,287,27,29,396,6,70,2,73,5,52,112,26,651,1,297,113,17,75,19,45,334,445,145,34,315,549,436,331 }, - { 214,289,90,874,104,751,64,65,312,835,204,249,750,194,74,81,875,32,519,288,348,0,174,247,636,715,138,192,784,6,524,280 }, - { 9,39,28,35,30,0,166,49,1,175,439,158,64,346,36,101,67,364,86,88,274,100,168,55,23,10,420,22,190,141,505,180 }, - { 341,901,15,515,700,753,1,365,10,0,569,180,2,197,115,31,165,3,5,4,44,22,317,13,9,951,23,253,116,143,762,93 }, - { 120,202,77,450,260,15,128,318,102,515,494,13,817,700,269,5,403,51,1,33,23,753,82,326,141,342,291,137,21,523,351,32 }, - { 13,115,241,64,180,32,125,197,165,4,118,22,21,23,16,247,237,28,225,191,95,141,167,5,0,341,288,35,459,18,177,24 }, - { 16,24,35,14,1,2,7,69,18,46,60,50,267,140,71,189,108,38,75,92,0,5,9,230,67,21,309,335,54,236,394,220 }, - { 15,515,700,753,898,180,901,341,197,638,10,165,33,1,115,4,77,365,317,13,102,217,117,0,5,2,253,3,82,569,21,752 }, - { 193,523,18,84,56,730,233,65,4,817,90,33,643,403,91,511,453,240,59,11,214,51,719,196,153,475,32,123,64,847,102,561 }, - { 112,29,151,2,74,6,66,7,222,145,287,45,5,624,52,25,113,416,58,122,19,70,186,204,4,87,644,549,337,884,32,0 }, - { 13,0,23,2,1,15,33,3,77,515,141,5,4,217,10,51,64,180,700,115,6,117,11,7,753,40,102,165,197,22,317,153 }, - { 28,0,1,67,65,9,2,114,83,69,103,50,36,22,55,24,46,14,124,109,35,7,16,38,133,160,389,323,18,12,154,5 }, - { 121,132,18,167,271,146,101,363,621,9,411,647,16,354,520,60,212,932,1,806,55,0,195,446,68,35,31,364,777,252,407,118 }, - { 26,6,85,396,122,624,25,19,42,445,64,648,573,416,174,680,665,214,45,348,90,65,194,145,113,881,138,289,112,436,297,544 }, - { 16,146,18,92,24,199,60,71,121,126,35,108,156,953,271,674,132,7,32,640,360,246,649,118,21,95,5,517,14,9,1,314 }, - { 51,13,23,453,475,730,719,15,457,403,64,115,33,95,4,523,3,12,21,6,899,102,5,128,401,202,11,141,308,515,22,125 }, - { 151,396,6,53,27,113,58,26,73,112,74,287,45,29,297,19,145,70,138,445,315,436,34,2,17,573,5,61,549,491,1,80 }, - { 223,1,888,774,260,98,269,385,349,202,96,141,421,622,730,863,318,697,87,453,393,418,922,834,751,5,163,335,120,291,352,30 }, - { 16,60,92,35,126,121,7,150,246,18,107,1,598,24,167,195,14,97,71,279,98,441,191,199,517,146,356,223,298,271,230,0 }, - { 22,1,105,28,239,170,0,55,95,31,36,301,2,320,98,127,9,49,44,64,35,67,10,86,5,12,109,23,168,13,21,312 }, - { 2,6,5,207,292,76,1,119,45,32,17,29,61,306,790,58,240,106,14,64,214,151,476,710,7,72,84,128,4,179,70,25 }, - { 51,23,221,254,115,13,438,530,125,48,21,39,541,960,386,49,1,613,15,840,228,308,627,131,688,401,5,326,421,158,165,83 }, - { 1,5,2,0,12,22,21,36,10,14,48,86,23,13,32,54,3,4,28,65,51,50,137,37,208,114,9,38,17,7,281,202 }, - { 363,23,447,182,296,340,1,93,698,478,379,156,284,144,18,269,21,98,141,70,668,411,664,658,110,914,67,937,180,691,335,291 }, - { 17,32,45,498,41,115,180,197,106,62,54,38,546,165,13,155,468,509,341,243,241,217,542,15,57,536,428,51,117,721,292,129 }, - { 32,95,64,246,22,92,180,13,5,652,125,241,638,237,7,49,4,126,21,115,197,296,888,316,0,165,774,23,16,392,1,534 }, - { 15,515,700,753,33,341,13,217,4,141,77,23,180,317,1,10,102,351,82,115,40,5,854,21,137,11,352,901,365,117,197,0 }, - { 15,120,1,82,93,217,515,260,77,141,13,110,700,351,352,23,180,753,21,854,202,317,64,349,269,51,165,137,5,128,291,36 }, - { 13,23,51,141,77,0,33,4,115,64,2,10,102,202,217,128,1,177,269,11,7,22,6,21,32,9,180,40,15,3,165,318 }, - { 478,264,1,520,98,724,9,682,223,664,21,759,13,772,604,100,23,363,411,48,821,5,0,905,909,447,31,265,88,101,166,39 }, - { 20,29,7,2,77,416,6,128,33,5,0,113,104,32,43,13,491,66,23,21,102,51,74,210,202,525,64,318,10,81,174,14 }, - { 2,1,5,14,7,58,61,29,45,290,46,38,52,21,32,270,6,592,425,0,75,155,16,48,17,50,72,70,207,24,263,663 }, - { 80,6,17,209,106,26,483,113,19,469,255,25,378,27,495,833,45,64,161,2,61,667,76,742,32,90,445,5,814,65,887,119 }, - { 98,223,393,1,354,834,195,791,447,697,284,293,360,541,781,156,51,807,18,664,421,411,163,668,48,31,591,765,883,386,948,23 }, - { 679,141,816,36,93,406,876,144,228,137,1,180,669,21,332,251,5,269,116,187,96,351,202,752,317,64,203,831,574,466,855,345 }, - { 15,515,700,13,1,753,2,0,23,341,3,5,4,10,51,11,33,165,6,7,115,197,12,64,180,153,217,77,9,569,901,317 }, - { 13,23,202,51,5,21,403,15,120,64,1,450,128,141,12,523,33,165,494,125,2,515,269,7,48,102,318,95,260,180,453,197 }, - { 16,18,24,60,71,92,146,246,199,35,140,7,9,118,121,108,167,230,126,132,0,640,156,14,68,133,267,360,649,271,64,55 }, - { 269,141,678,177,202,77,128,318,33,947,40,120,291,349,102,137,64,352,210,864,461,498,13,342,196,23,275,450,954,0,205,111 }, - { 16,24,92,18,71,60,35,7,108,191,167,246,140,14,126,21,1,68,150,118,149,388,399,9,273,0,121,796,230,48,212,517 }, - { 2,14,1,29,46,75,52,70,69,171,38,7,58,163,16,5,24,220,67,112,223,54,50,409,155,35,267,186,151,334,394,140 }, - { 9,252,100,265,166,39,88,404,329,0,1,520,382,812,101,593,264,274,604,676,30,118,68,553,18,664,363,23,639,865,21,411 }, - { 16,18,35,24,0,60,158,7,22,68,14,49,109,159,55,9,28,71,2,10,5,105,1,118,329,13,344,23,92,20,21,126 }, - { 15,13,515,700,23,0,753,1,51,2,4,10,77,5,3,197,115,165,961,202,9,457,180,12,141,22,33,120,6,11,318,31 }, - { 160,93,251,137,317,1,180,36,120,217,345,752,617,352,332,10,96,531,498,318,365,202,141,269,816,341,901,679,143,35,83,968 }, - { 6,25,42,128,19,59,122,4,85,26,611,27,269,233,45,0,343,91,318,80,11,177,283,73,33,614,2,77,64,138,445,216 }, - { 95,64,74,7,32,81,51,204,0,20,237,65,56,38,91,23,207,180,347,343,29,6,511,52,49,10,25,18,554,370,14,312 }, - { 202,120,326,260,450,817,494,318,137,403,128,77,523,553,859,5,704,1,15,23,13,576,7,16,615,51,682,291,515,0,21,234 }, - { 20,43,107,356,362,126,595,92,359,7,422,319,493,16,858,5,392,246,414,683,60,0,35,945,441,21,259,819,49,97,279,173 }, - { 25,42,6,77,33,102,0,122,4,690,29,483,210,27,21,19,2,300,18,648,680,119,117,59,1,10,342,12,26,153,91,684 }, - { 31,44,299,116,125,242,456,599,22,393,0,144,492,28,268,1,9,963,301,105,367,36,127,170,384,434,206,98,918,10,13,93 }, - { 410,521,686,367,662,88,335,321,201,96,98,772,144,1,934,921,443,435,284,274,264,551,120,897,44,100,33,225,744,418,909,960 }, - { 142,178,878,234,132,786,195,202,77,416,147,929,146,522,167,259,687,639,450,271,626,481,590,5,198,212,771,49,0,465,315,427 }, - { 254,39,131,9,272,0,578,716,310,224,30,49,105,827,518,829,166,333,616,228,613,846,101,219,1,31,890,98,159,938,252,100 }, - { 230,699,854,473,450,351,831,137,855,217,352,704,800,202,251,498,160,144,206,203,317,201,253,752,418,141,1,332,82,180,443,36 }, - { 403,202,475,453,494,23,51,77,318,402,13,33,128,102,137,141,120,342,269,0,450,4,899,576,40,421,275,117,217,177,196,64 }, - { 23,44,98,182,291,144,116,39,110,141,96,82,905,70,367,264,125,93,77,411,120,1,658,202,100,415,107,363,197,30,447,105 }, - { 15,515,1,13,700,23,77,120,0,753,51,180,202,141,260,5,21,115,2,137,128,9,450,197,365,269,12,326,110,102,318,7 }, - { 0,32,18,95,207,577,193,29,61,104,64,784,715,102,693,887,81,91,583,671,403,5,52,474,397,180,138,49,37,344,38,263 }, - { 0,101,9,68,252,16,100,39,166,364,124,24,154,265,212,88,18,35,329,419,28,118,71,30,65,158,191,55,1,694,21,676 }, - { 16,24,191,18,35,71,167,118,149,68,212,9,0,1,21,108,101,92,60,375,302,7,589,755,124,674,350,48,562,246,13,363 }, - { 2,14,69,24,1,67,46,16,38,103,29,267,7,35,189,135,278,71,108,18,83,309,52,6,149,388,75,236,60,0,150,66 }, - { 13,1,23,0,4,2,51,15,180,33,3,115,5,515,141,10,77,700,11,9,197,341,202,165,217,102,22,7,753,317,365,6 }, - { 447,1,698,411,31,363,98,5,919,4,156,125,759,691,13,64,459,354,44,21,48,293,30,914,478,225,82,120,2,922,848,839 }, - { 854,82,351,217,141,180,352,15,515,752,1,700,317,898,753,244,10,21,922,115,77,36,4,260,64,110,372,13,5,365,120,11 }, - { 0,1,4,13,5,2,82,33,3,120,10,23,9,11,77,260,21,102,8,31,40,6,351,51,64,450,22,117,93,110,7,457 }, - { 51,13,403,23,12,475,1,2,21,5,453,523,115,202,817,7,0,99,3,6,450,120,494,64,22,95,49,899,10,37,32,141 }, - { 180,752,352,141,498,864,317,217,9,0,115,237,230,39,30,197,83,1,930,64,35,365,372,13,579,88,702,36,101,901,482,21 }, - { 16,24,0,7,22,18,28,35,14,158,71,2,109,60,1,168,49,154,124,68,10,55,92,118,159,9,5,747,95,105,65,6 }, - { 15,515,700,753,0,23,1,13,341,51,120,77,141,4,137,33,260,82,202,9,180,5,351,2,291,10,11,901,21,115,40,128 }, - { 24,68,35,149,18,16,0,7,9,14,189,108,69,65,67,1,71,2,118,28,140,101,114,336,230,124,175,133,46,55,251,154 }, - { 13,308,197,115,125,9,165,237,391,23,509,569,546,28,49,629,22,338,317,254,749,180,468,159,903,386,217,352,558,39,36,734 }, - { 64,90,32,217,77,4,141,216,172,312,128,13,86,33,597,147,352,95,5,115,875,22,59,11,102,14,182,437,97,177,874,707 }, - { 1,22,36,0,105,28,2,67,95,49,55,5,239,12,86,9,83,170,312,64,31,21,23,10,164,50,114,159,208,13,7,320 }, - { 9,18,16,0,159,105,101,252,49,310,24,68,35,39,265,272,7,1,60,28,455,890,329,557,118,286,55,137,327,167,5,13 }, - { 129,123,214,249,618,17,5,257,205,184,460,76,2,162,769,245,90,106,128,45,119,1,183,4,3,12,179,64,6,229,99,209 }, - { 51,453,719,457,23,13,730,899,475,386,4,15,11,12,670,196,515,523,961,401,153,3,700,99,753,117,403,32,120,165,57,0 }, - { 173,66,192,204,20,74,104,636,7,43,289,426,825,712,560,214,81,750,65,97,707,0,90,414,64,348,32,500,22,861,95,6 }, - { 13,23,1,51,5,21,141,120,202,15,165,2,515,180,12,125,0,64,82,700,197,269,32,48,260,128,115,93,9,137,33,753 }, - { 200,34,322,78,472,390,27,714,19,14,136,161,453,176,236,444,59,3,62,128,108,57,283,862,73,53,47,17,412,813,4,56 }, - { 33,347,66,204,426,498,56,172,97,95,5,681,546,22,10,0,135,180,4,241,19,174,6,353,263,21,7,370,42,197,27,808 }, - { 131,224,219,187,385,371,258,442,254,737,31,98,836,127,924,944,44,871,908,716,39,827,201,574,116,137,36,1,276,242,578,616 }, - { 1,15,2,180,4,141,13,515,0,5,3,115,700,901,341,23,217,352,753,51,197,77,317,33,365,752,165,21,6,7,269,93 }, - { 370,91,718,74,81,510,397,66,636,240,355,84,138,511,18,278,6,681,701,289,90,5,214,582,64,104,0,643,192,65,750,32 }, - { 15,515,700,753,13,120,1,0,165,2,197,23,260,180,4,82,51,386,8,5,12,10,3,141,351,341,326,9,450,7,64,6 }, - { 32,2,76,5,1,292,72,45,476,214,21,241,29,14,17,48,129,90,179,460,464,123,290,148,519,205,3,263,249,38,710,89 }, - { 13,165,115,17,197,569,23,509,457,45,32,41,106,180,62,38,659,734,155,536,341,629,961,873,587,54,431,37,391,99,405,428 }, - { 68,24,35,16,0,101,9,124,154,71,149,65,18,175,28,118,7,55,302,108,92,14,22,346,1,39,429,252,375,364,10,67 }, - { 0,9,1,68,230,65,35,69,83,23,101,13,141,67,217,352,21,39,16,88,28,124,212,100,115,154,51,64,30,36,10,317 }, - { 100,265,88,909,410,382,812,593,1,213,321,30,252,230,352,264,9,166,689,39,676,98,21,466,724,639,478,217,13,48,553,101 }, - { 113,61,198,904,43,0,5,37,899,325,20,59,33,523,204,725,817,389,470,329,222,40,174,58,22,453,690,848,122,104,788,105 }, - { 16,24,141,18,7,0,71,140,35,269,75,352,12,9,108,217,5,330,60,64,199,70,22,13,486,246,318,133,65,50,23,498 }, - { 271,167,121,60,18,191,146,199,16,1,446,132,575,212,463,354,126,35,598,566,727,98,107,21,608,955,640,407,5,24,223,68 }, - { 15,515,115,217,700,13,317,753,141,180,33,23,110,120,4,341,82,10,1,260,365,36,64,854,351,21,51,352,137,77,40,0 }, - { 173,7,97,356,43,107,20,387,729,104,426,232,560,595,359,392,414,707,885,81,5,0,66,858,612,49,861,14,22,32,819,230 }, - { 15,515,700,753,13,23,51,82,0,33,165,120,196,4,1,2,197,453,260,351,180,12,40,8,386,110,5,326,9,141,217,457 }, - { 9,100,120,30,77,795,137,82,202,39,264,827,578,127,0,166,373,318,18,326,141,260,1,450,731,31,33,395,217,291,341,254 }, - { 14,2,67,1,24,69,0,28,65,7,46,18,114,108,36,83,38,398,9,16,124,133,103,154,50,55,22,267,29,160,35,547 }, - { 14,7,69,24,66,16,2,267,189,67,71,150,140,97,18,60,172,35,6,1,38,149,388,92,83,135,108,74,462,380,29,36 }, - { 15,515,700,753,1,13,2,0,4,341,5,3,23,365,11,117,180,10,12,33,134,115,77,197,217,165,6,7,317,102,21,9 }, - { 1,22,0,12,5,2,36,28,21,10,86,13,23,49,128,9,95,51,55,96,208,141,48,202,4,137,37,64,105,3,50,7 }, - { 605,630,63,123,736,650,65,108,444,368,561,389,19,25,42,619,122,194,183,27,53,33,84,26,297,813,114,73,256,235,249,216 }, - { 127,39,9,0,31,371,98,254,1,385,395,44,30,836,187,131,100,116,284,578,299,166,28,21,737,16,276,272,23,49,137,935 }, - { 2,61,6,29,45,151,1,655,7,207,32,5,112,425,17,76,833,4,14,64,58,106,119,25,113,128,72,52,70,21,292,790 }, - { 2,5,17,14,3,29,23,27,13,401,46,6,51,58,1,453,45,53,34,52,133,19,236,26,181,114,99,366,151,108,218,38 }, - { 31,22,170,1,301,44,127,98,36,28,55,105,239,0,338,116,512,299,293,125,86,10,242,395,2,13,9,64,841,23,95,685 }, - { 1,21,2,14,5,32,48,50,38,270,46,76,290,72,45,54,17,0,155,221,263,207,37,281,430,3,89,12,181,408,36,67 }, - { 17,106,119,378,84,240,62,80,383,136,306,3,56,790,742,5,207,504,64,440,32,128,45,2,123,209,14,4,61,57,297,667 }, - { 13,15,1,515,23,2,0,700,4,115,3,51,5,10,753,180,33,341,217,11,165,317,365,197,6,77,40,64,22,9,7,117 }, - { 772,335,96,744,1,367,662,686,652,897,303,264,521,31,225,410,141,520,260,116,64,44,321,98,144,88,919,966,340,269,349,284 }, - { 1,0,5,28,36,2,12,22,83,67,65,50,24,14,9,96,21,218,18,114,48,281,54,10,7,160,181,103,37,23,133,99 }, - { 25,6,145,42,138,81,174,348,525,544,26,74,85,280,287,648,746,91,66,0,29,396,204,64,636,90,122,194,355,104,65,233 }, - { 16,24,0,18,28,158,7,35,49,22,68,159,55,1,14,109,105,2,9,71,65,154,124,95,424,344,60,239,118,577,21,10 }, - { 352,854,699,230,93,689,137,144,217,160,251,36,669,202,351,120,617,855,752,203,332,82,450,180,141,748,831,30,258,201,1,816 }, - { 15,13,515,700,1,2,0,753,23,5,4,3,51,33,10,115,11,317,217,77,180,341,117,165,6,134,197,153,64,9,102,7 }, - { 104,20,43,173,66,319,0,77,202,7,198,5,97,580,355,74,2,204,174,52,712,234,426,155,102,192,32,4,500,337,226,904 }, - { 13,23,15,51,1,515,0,2,5,700,141,4,753,165,115,12,3,21,457,10,180,269,32,6,197,202,9,7,120,11,77,33 }, - { 15,515,700,753,0,1,13,2,23,115,4,317,8,3,5,51,9,341,10,217,22,365,33,457,6,180,77,901,197,120,18,7 }, - { 15,515,700,753,1,0,2,3,4,13,5,141,23,16,82,217,457,10,365,180,9,317,51,21,269,898,64,202,11,12,318,341 }, - { 2,5,1,14,50,38,29,17,114,46,133,3,45,21,58,171,181,36,218,12,6,52,0,48,137,65,361,23,155,4,285,51 }, - { 0,68,9,65,101,124,35,212,16,149,154,100,24,1,114,336,67,589,252,39,71,189,69,562,18,13,30,398,118,88,265,264 }, - { 1,2,14,22,0,7,67,65,28,36,24,46,168,5,86,69,38,16,49,12,289,10,194,50,83,114,95,6,18,23,55,158 }, - { 13,23,15,51,515,0,700,4,1,753,2,10,115,961,457,12,33,11,3,5,197,9,165,77,102,403,453,40,64,22,37,59 }, - { 15,515,700,753,0,1,2,13,23,5,51,901,8,9,180,3,7,82,4,120,12,10,719,341,6,31,141,457,197,22,115,93 }, - { 100,252,88,101,0,265,9,724,48,1,21,352,213,676,410,382,321,230,30,329,593,909,39,812,553,217,23,689,520,264,166,419 }, - { 2,5,1,58,171,14,46,50,29,52,45,38,186,155,67,54,151,281,334,61,48,96,17,181,103,400,502,227,21,223,12,69 }, - { 23,120,13,1,202,141,51,21,165,128,260,15,5,269,137,64,33,180,82,318,93,197,77,326,515,125,110,700,450,2,32,48 }, - { 341,197,10,901,13,15,8,638,569,515,479,23,180,873,700,165,143,642,0,961,753,951,1,115,509,499,116,12,498,242,82,206 }, - { 15,1,23,13,515,21,120,51,2,141,202,700,5,180,165,0,753,197,12,7,33,260,352,137,269,4,82,128,48,9,110,6 }, - { 2,29,50,58,1,6,5,52,14,262,17,46,27,53,151,34,171,74,324,26,38,309,45,113,19,96,287,396,223,67,73,583 }, - { 13,23,141,51,4,202,0,115,77,2,33,217,5,317,180,64,10,269,3,9,15,21,1,128,102,137,318,11,352,515,22,31 }, - { 1,67,0,24,50,5,14,18,16,69,2,9,103,35,83,12,96,28,54,7,58,223,21,46,281,48,65,181,22,38,36,108 }, - { 13,23,141,51,77,64,202,115,33,102,128,4,0,269,10,21,217,32,180,318,9,137,2,11,22,291,7,177,16,31,165,197 }, - { 317,115,180,365,873,498,217,341,13,752,482,197,569,352,1,901,36,23,457,468,165,346,546,143,509,134,579,876,868,2,332,21 }, - { 184,257,205,229,152,17,57,497,266,432,452,524,5,619,381,32,4,90,2,12,313,128,45,59,245,106,3,471,129,769,339,214 }, - { 13,23,1,0,15,2,4,515,51,3,10,33,5,700,115,180,753,77,11,365,341,217,9,6,197,7,102,165,317,40,22,64 }, - { 626,70,771,687,379,846,767,761,518,878,82,481,31,786,49,591,178,163,407,44,87,13,845,125,590,371,195,120,98,557,937,351 }, - { 264,1,410,909,772,897,686,521,335,478,98,96,691,639,100,44,284,382,31,321,744,88,914,724,662,765,223,9,682,363,0,367 }, - { 13,23,1,2,0,15,51,515,5,10,4,33,115,77,180,700,3,141,217,40,6,753,317,197,64,165,7,11,102,9,341,22 }, - { 141,77,13,64,269,23,115,21,318,217,5,202,102,33,137,2,15,291,177,51,48,180,32,4,515,352,128,7,0,10,96,11 }, - { 13,15,23,515,51,0,700,753,1,2,4,10,33,11,961,453,115,40,457,14,12,3,9,5,165,401,197,77,22,21,64,102 }, - { 1,22,0,36,2,31,5,12,13,105,28,9,49,86,141,21,23,95,128,55,44,115,170,10,164,98,180,4,137,239,83,51 }, - { 100,101,88,0,252,9,265,30,21,39,759,724,213,329,321,13,419,68,562,382,676,352,694,35,553,410,1,166,909,593,230,23 }, - { 539,88,30,190,321,530,840,144,669,435,957,748,778,100,96,418,203,213,1,131,410,228,466,274,36,382,219,863,613,83,822,352 }, - { 7,97,92,173,298,107,43,314,232,140,16,356,20,387,729,362,126,359,246,14,230,501,426,441,0,5,560,66,104,779,35,60 }, - { 2,14,7,1,58,5,46,16,38,70,75,45,24,155,29,0,21,52,61,163,220,50,69,270,35,48,32,171,18,6,64,54 }, - { 447,411,363,664,647,98,621,1,354,271,223,478,18,777,781,936,360,759,167,132,121,48,21,156,9,195,118,293,23,691,13,264 }, - { 2,1,14,65,36,67,0,7,46,22,69,5,38,24,28,6,83,29,86,114,168,50,124,208,12,18,108,10,194,484,103,16 }, - { 421,386,51,791,730,958,165,801,23,453,697,403,615,13,221,523,24,899,401,326,551,670,576,102,18,33,125,77,566,115,203,197 }, - { 104,319,422,945,0,81,20,43,715,32,784,693,879,7,397,74,306,207,52,681,671,2,61,173,6,636,904,95,887,5,18,192 }, - { 2,29,1,46,14,52,70,262,6,26,50,67,75,96,309,38,103,112,58,19,5,163,145,83,74,220,223,357,24,69,331,25 }, - { 786,929,590,771,687,626,941,178,465,259,70,5,13,21,35,534,107,518,132,49,878,48,146,121,379,279,31,767,147,195,108,125 }, - { 5,2,186,45,17,29,48,50,14,61,46,155,400,1,227,171,52,58,38,54,430,209,80,281,3,106,536,311,181,243,21,502 }, - { 0,32,64,95,817,494,342,403,207,202,194,389,453,365,312,180,316,5,690,237,848,577,450,61,102,523,475,289,49,241,65,482 }, - { 23,13,51,15,202,515,1,0,2,12,115,4,700,5,165,753,77,457,21,141,128,10,3,64,403,32,197,318,9,11,33,117 }, - { 119,2,6,76,5,17,45,292,306,240,32,1,19,84,64,61,4,209,710,80,26,0,106,27,214,25,128,129,29,179,3,113 }, - { 15,515,700,1,753,0,13,23,180,120,51,2,5,33,165,197,9,450,7,260,115,523,4,12,202,141,82,77,21,102,8,6 }, - { 6,138,74,280,222,85,66,226,25,42,87,204,64,337,29,135,95,174,235,26,145,65,19,32,792,294,112,52,256,2,5,22 }, - { 39,9,0,101,333,158,49,252,310,254,272,68,16,18,159,286,344,455,30,109,627,327,24,105,419,100,364,22,35,1,329,709 }, - { 21,5,32,14,2,1,38,72,76,54,17,3,48,221,270,0,45,46,12,181,37,89,36,50,540,290,430,10,4,741,99,23 }, - { 120,137,202,269,141,260,318,450,922,494,77,291,82,15,5,351,128,1,515,326,64,854,700,352,342,21,753,678,349,32,523,90 }, - { 16,24,68,35,71,18,149,118,191,167,9,0,212,124,65,246,7,67,140,189,399,101,133,60,1,108,267,114,69,92,695,154 }, - { 28,0,9,67,1,22,109,36,55,65,194,114,39,83,49,69,2,35,103,50,158,208,86,420,168,289,505,24,7,185,5,323 }, - { 147,325,198,427,142,178,202,798,5,376,20,318,259,43,120,450,77,234,534,904,470,465,878,725,329,14,315,0,260,858,70,61 }, - { 28,0,65,9,109,1,55,67,35,22,24,39,289,7,175,14,114,2,158,124,420,194,68,16,336,36,49,69,168,570,154,505 }, - { 141,180,13,115,1,23,4,269,2,202,0,317,217,51,15,5,21,352,77,318,3,752,197,10,165,365,137,341,9,515,33,64 }, - { 15,515,700,0,753,1,2,13,23,9,51,5,4,901,33,7,8,3,12,93,180,120,197,6,82,341,10,141,22,260,457,115 }, - { 6,74,66,1,25,75,324,380,278,26,138,85,135,500,87,42,220,841,97,350,29,19,70,226,38,21,52,606,235,889,2,14 }, - { 13,23,51,12,4,15,453,0,457,1,403,165,115,3,11,2,64,5,401,10,515,37,202,33,40,32,99,475,197,700,308,17 }, - { 788,180,5,83,693,319,314,4,32,21,17,11,817,3,510,498,33,12,24,104,814,120,64,117,306,804,523,450,288,160,102,43 }, - { 15,1,515,13,2,700,23,0,4,753,3,341,5,51,33,11,10,6,77,7,115,102,180,165,141,9,197,217,901,40,12,64 }, - { 101,18,9,167,520,16,0,118,60,212,604,364,694,24,55,252,68,917,264,35,1,121,146,363,39,100,806,5,21,166,191,28 }, - { 13,23,0,33,51,141,77,4,64,2,115,217,9,102,7,202,21,10,180,3,15,128,5,269,6,32,11,16,165,352,22,317 }, - { 66,135,97,74,172,6,278,7,204,324,138,174,29,85,2,87,25,140,92,192,52,38,802,69,448,500,808,620,22,1,280,232 }, - { 2,24,69,6,97,7,1,0,14,298,423,66,67,29,150,25,189,267,124,74,607,18,36,81,172,33,83,38,52,273,71,809 }, - { 0,9,158,39,68,49,109,16,24,333,35,344,101,22,159,254,272,30,124,65,28,18,793,154,310,252,327,105,627,419,286,55 }, - { 15,515,700,10,753,33,77,180,4,341,1,197,13,115,365,23,901,317,5,102,11,217,165,117,141,40,2,3,253,21,134,55 }, - { 100,166,382,478,265,264,88,39,98,1,404,274,9,593,724,921,639,438,363,682,411,31,30,812,96,447,821,905,252,0,223,435 }, - { 19,283,436,53,297,26,813,432,27,128,42,25,390,503,122,736,73,123,605,63,389,529,630,250,690,65,381,444,6,269,108,216 }, - { 33,202,13,128,494,0,51,141,269,1,4,2,102,180,15,137,65,95,6,450,77,40,117,59,457,36,196,817,134,86,49,515 }, - { 18,16,60,68,101,167,191,118,35,121,9,212,55,0,126,1,24,647,199,146,520,107,628,621,363,71,21,28,346,92,806,727 }, - { 352,230,217,531,160,93,36,669,748,854,689,258,137,871,728,699,752,251,574,202,373,351,228,120,717,260,144,219,268,82,816,1 }, - { 15,515,700,753,13,23,0,51,8,1,4,82,165,77,110,33,10,180,5,202,11,22,120,12,9,197,115,93,403,141,40,351 }, - { 0,9,101,217,35,88,352,100,39,175,30,68,562,752,13,317,252,115,180,197,64,1,83,141,65,213,165,230,194,36,28,265 }, - { 32,64,5,470,288,90,21,147,0,95,356,22,20,519,835,312,819,18,247,182,11,97,13,4,387,49,43,298,316,48,107,7 }, - { 16,14,24,1,7,2,35,0,5,50,18,69,46,12,58,75,9,67,70,163,21,54,38,48,223,502,281,37,140,60,28,10 }, - { 18,265,9,252,39,195,354,411,1,16,132,101,121,682,167,203,5,363,146,593,35,333,21,271,60,13,100,0,156,327,7,520 }, - { 4,13,1,115,141,23,2,180,5,0,51,3,217,202,77,15,33,269,341,318,317,21,165,515,11,10,197,365,9,137,64,352 }, - { 7,24,16,14,71,35,18,92,140,189,108,149,68,60,69,150,2,230,97,66,0,458,67,1,65,251,38,314,388,267,36,46 }, - { 1,22,31,36,0,2,44,5,141,105,180,170,12,64,13,98,86,55,23,21,28,164,115,127,10,125,128,4,9,239,352,197 }, - { 66,7,97,2,192,20,52,43,135,74,560,107,104,0,750,147,414,29,580,173,324,376,226,194,77,174,204,38,356,64,16,470 }, - { 0,101,9,68,35,124,24,65,39,16,252,100,154,166,28,364,149,694,30,88,55,346,1,419,71,439,265,289,22,21,175,158 }, - { 39,9,100,30,127,0,737,856,31,836,827,254,98,931,166,88,93,1,44,190,131,228,120,395,625,385,863,264,219,373,110,28 }, - { 5,17,2,3,21,45,14,155,48,32,38,1,328,181,186,46,23,51,12,61,227,29,106,54,99,133,62,832,13,37,514,543 }, - { 131,613,30,224,228,716,274,100,827,406,219,856,39,190,31,88,1,166,9,44,829,863,931,93,0,187,625,924,127,98,137,254 }, - { 352,217,64,141,752,269,180,864,437,372,954,115,498,177,77,349,317,318,579,291,947,197,247,0,23,717,237,304,128,457,776,678 }, - { 141,217,180,317,352,115,15,341,1,23,13,365,515,752,2,64,0,5,498,700,372,165,51,237,753,77,244,197,137,4,21,253 }, - { 5,2,17,14,1,45,3,38,21,29,181,58,46,48,50,133,114,171,61,155,32,6,186,281,361,12,36,54,4,13,52,514 }, - { 6,2,119,25,790,4,45,483,655,113,1,29,76,26,32,19,887,17,128,0,292,833,59,61,106,64,77,814,14,151,84,42 }, - { 13,23,2,0,51,4,1,115,141,3,5,180,33,217,77,9,202,11,7,15,10,6,317,64,21,197,515,165,102,128,22,269 }, - { 13,23,51,202,21,5,1,15,141,165,120,2,115,12,32,0,515,128,318,64,125,700,4,403,197,453,180,457,3,7,10,6 }, - { 9,39,31,30,0,127,1,44,100,131,98,187,385,276,88,442,219,908,254,116,49,166,935,28,201,36,141,827,137,299,284,21 }, - { 22,49,28,109,9,185,105,95,1,131,159,272,36,67,86,254,39,55,35,0,505,31,83,169,208,327,286,98,168,535,312,708 }, - { 5,1,2,21,0,12,48,22,10,14,36,3,32,17,23,54,86,38,4,51,13,37,137,50,65,281,114,45,28,99,58,202 }, - { 141,269,352,82,217,351,180,854,372,922,752,1,15,260,317,318,515,202,64,700,120,349,954,753,77,35,67,717,898,137,365,115 }, - { 144,203,613,418,326,406,96,669,137,679,1,228,494,822,840,317,36,83,855,160,817,859,856,816,217,831,345,93,876,77,44,251 }, - { 15,13,515,23,700,753,51,1,33,0,202,21,2,5,180,141,120,165,217,82,12,117,4,352,269,197,115,32,3,9,134,260 }, - { 13,23,115,1,0,51,4,77,2,33,15,141,10,5,341,180,515,3,217,202,9,365,317,64,700,102,11,165,197,22,753,7 }, - { 531,943,373,160,728,93,206,260,261,559,964,269,717,535,332,384,365,295,110,533,141,10,180,352,244,137,120,55,959,564,36,253 }, - { 1,31,36,170,22,55,44,10,86,64,127,0,2,98,301,164,740,338,237,143,5,125,116,13,242,141,299,180,23,169,105,12 }, - { 13,23,141,77,51,4,64,32,33,202,115,269,102,128,21,0,177,180,318,90,40,10,7,5,137,15,217,352,9,291,59,22 }, - { 202,120,260,318,77,15,450,269,1,82,33,23,141,13,51,515,351,128,700,5,64,326,137,21,102,110,753,494,93,523,817,165 }, - { 523,899,102,33,730,15,23,403,719,117,153,13,515,51,475,4,700,5,453,817,196,753,494,40,202,120,1,2,450,457,17,421 }, - { 202,403,494,450,120,817,523,475,318,453,33,402,128,77,13,51,260,576,342,102,15,23,515,4,700,5,82,753,326,210,137,615 }, - { 2,29,112,66,7,52,70,151,58,87,135,5,74,226,307,6,14,186,1,45,549,172,644,25,113,287,46,155,334,64,294,97 }, - { 1,77,349,291,260,120,652,102,5,39,64,269,9,33,340,342,13,98,888,698,23,296,100,318,51,202,87,137,638,128,50,850 }, - { 1,13,15,2,0,4,23,515,5,141,180,3,700,341,115,51,753,269,77,901,197,352,217,33,21,11,365,6,165,202,7,317 }, - { 7,14,16,2,46,5,70,58,1,38,24,35,92,163,0,75,21,18,50,54,140,12,87,220,155,69,171,23,60,9,13,307 }, - { 2,29,66,226,135,7,87,74,52,278,6,75,222,220,294,70,97,1,145,25,172,262,324,38,69,112,331,92,5,14,140,26 }, - { 15,515,700,753,33,77,117,4,1,102,134,40,153,11,13,196,217,21,5,51,23,115,32,3,2,202,141,137,128,291,48,177 }, - { 15,217,82,515,351,141,317,1,13,700,260,77,110,120,115,854,23,753,180,51,21,36,137,922,5,64,365,352,291,202,93,341 }, - { 31,190,30,373,120,110,863,88,44,127,908,856,260,318,82,98,93,187,836,717,935,39,442,131,141,254,228,219,1,968,77,116 }, - { 23,13,308,9,165,115,51,21,401,125,49,39,197,391,159,254,217,743,28,438,773,629,558,386,341,95,32,317,876,679,109,166 }, - { 49,5,43,165,7,0,21,104,125,22,173,422,64,13,623,102,20,18,314,95,91,141,23,31,193,51,391,900,779,558,92,232 }, - { 1,4,13,2,15,0,23,515,77,3,341,33,5,700,115,51,202,753,141,180,11,10,102,217,6,901,40,7,197,318,317,365 }, - { 93,843,295,120,36,160,206,261,10,137,567,110,384,141,943,268,201,332,258,55,1,180,64,116,44,144,699,203,282,31,260,373 }, - { 81,7,192,426,43,173,172,104,879,91,5,712,715,526,6,97,568,95,448,66,33,861,560,32,49,20,0,636,232,825,2,22 }, - { 475,403,51,453,33,102,13,23,494,202,0,196,15,77,153,18,4,117,515,450,318,22,730,128,700,421,65,753,269,402,134,817 }, - { 141,269,260,318,202,120,352,349,82,351,1,5,854,137,64,291,15,922,180,851,32,77,515,372,21,700,7,217,13,947,33,753 }, - { 15,515,700,753,1,13,0,2,4,23,3,5,180,115,197,12,51,165,217,10,961,9,6,141,352,21,8,7,33,77,457,120 }, - { 39,166,9,30,0,101,274,404,252,333,190,100,158,438,310,88,68,265,656,21,1,530,329,344,49,539,625,254,13,131,48,419 }, - { 0,1,28,9,22,12,65,83,67,36,5,2,50,55,96,109,16,13,24,23,21,238,49,18,285,160,128,39,69,114,7,323 }, - { 15,515,700,1,0,753,23,2,13,51,5,180,115,6,3,9,197,12,457,120,7,165,901,82,4,21,8,141,31,33,719,341 }, - { 9,39,30,0,28,166,22,49,180,1,352,35,317,158,88,141,498,131,115,345,752,128,228,217,100,83,219,930,13,251,365,36 }, - { 5,61,45,2,80,29,311,209,6,17,58,1,151,106,454,667,243,70,52,496,287,592,255,738,64,74,483,14,27,32,112,19 }, - { 31,125,22,44,299,456,685,242,599,116,170,28,0,1,492,393,506,144,558,10,268,301,239,23,13,36,963,367,55,206,105,95 }, - { 187,258,926,574,839,93,228,860,406,219,871,160,137,531,224,116,120,902,669,201,36,131,44,144,843,533,318,384,442,1,434,268 }, - { 7,107,75,16,87,9,64,177,24,18,291,77,349,141,60,232,23,0,51,269,132,14,5,21,70,32,678,112,126,121,71,947 }, - { 15,515,700,753,1,0,2,13,5,3,23,180,4,115,901,51,6,8,961,9,7,10,12,82,197,22,141,341,33,120,365,457 }, - { 13,23,51,1,5,202,2,12,15,21,165,141,0,115,3,4,32,515,197,10,180,318,128,120,64,700,6,7,403,269,457,137 }, - { 1,2,0,77,64,3,141,13,33,15,23,10,6,102,5,515,180,4,117,7,700,165,11,217,269,40,753,115,128,17,197,134 }, - { 345,531,332,269,260,317,717,752,373,351,180,352,728,82,10,365,160,533,217,143,498,251,244,93,341,901,36,1,141,898,55,864 }, - { 16,7,33,189,92,77,388,60,140,35,102,24,14,1,230,21,150,117,733,314,18,915,71,13,108,134,5,64,69,2,98,22 }, - { 142,202,234,178,5,786,77,49,70,0,416,450,639,878,1,48,21,929,147,259,315,455,198,120,12,481,163,113,846,329,318,22 }, - { 81,715,192,0,173,712,681,104,636,91,74,20,750,370,7,718,95,879,22,43,825,560,422,64,207,49,172,18,397,10,426,319 }, - { 13,23,15,51,515,0,700,753,4,1,961,2,10,115,457,11,33,453,3,5,9,40,12,197,165,77,401,475,64,102,22,569 }, - { 64,297,5,445,95,61,250,311,80,34,17,312,45,2,86,472,58,14,180,53,22,151,869,738,247,237,29,1,128,165,21,288 }, - { 16,24,18,71,7,35,118,92,14,154,60,68,0,149,28,302,124,150,55,175,2,9,97,1,429,20,108,273,22,65,43,126 }, - { 195,360,156,771,132,163,626,687,591,371,883,146,121,846,70,586,379,13,293,98,407,48,761,296,354,18,31,1,55,49,21,105 }, - { 202,13,77,23,318,33,51,0,4,141,5,21,217,32,291,102,64,128,15,10,9,494,269,137,515,403,1,31,117,700,120,317 }, - { 7,192,97,81,172,66,426,173,43,715,712,232,861,879,104,330,568,298,74,893,885,526,387,825,92,140,91,14,636,6,5,448 }, - { 2,1,14,6,67,7,65,69,24,36,66,124,108,83,38,29,22,86,0,18,484,5,28,46,12,10,25,302,150,16,650,74 }, - { 33,77,13,202,102,4,0,23,128,51,141,64,318,22,403,269,137,10,15,40,494,117,32,59,11,153,1,21,177,196,515,115 }, - { 269,141,318,77,349,291,217,202,33,15,372,304,515,22,102,177,351,700,352,120,5,137,10,317,260,753,64,851,854,403,49,21 }, - { 6,74,66,85,138,25,87,42,135,26,226,222,280,29,75,500,220,278,792,70,19,2,1,294,204,64,32,145,853,112,52,174 }, - { 9,0,105,39,16,18,1,101,272,31,127,98,24,518,333,252,310,28,68,737,846,371,158,916,938,49,30,7,286,35,301,455 }, - { 2,1,14,67,24,46,83,108,69,29,38,103,114,36,6,133,18,0,28,7,65,52,236,75,50,398,5,309,135,16,278,160 }, - { 22,1,28,105,49,95,0,2,67,55,36,239,168,159,65,35,14,170,320,164,9,7,10,5,114,12,83,64,194,109,24,301 }, - { 7,66,172,97,92,140,232,568,298,14,192,314,16,380,135,324,2,330,74,38,357,448,126,69,35,5,107,6,387,60,204,572 }, - { 2,29,1,14,5,6,46,133,114,50,52,26,218,108,19,13,366,236,27,45,70,17,58,23,86,51,137,65,112,38,25,12 }, - { 6,1,74,25,2,26,29,66,42,19,75,14,388,67,108,70,52,85,103,65,38,138,357,133,114,594,324,516,603,96,309,69 }, - { 22,9,28,1,36,49,109,105,86,95,131,31,169,39,0,141,272,159,44,55,98,180,13,30,185,115,83,128,352,137,64,208 }, - { 203,822,326,23,77,859,403,494,576,39,473,182,33,1,691,100,18,217,13,817,411,447,363,102,93,966,96,478,291,704,310,120 }, - { 15,515,700,753,33,77,117,4,102,134,115,153,13,1,40,217,11,196,341,2,5,3,23,317,365,0,21,291,32,51,12,569 }, - { 15,515,700,753,13,0,23,1,8,82,51,165,197,120,180,2,9,33,4,110,5,12,10,260,351,386,141,7,457,475,93,901 }, - { 1,23,13,15,51,0,21,2,515,5,141,180,120,165,700,202,197,4,753,12,33,9,7,82,115,93,3,352,260,6,110,48 }, - { 15,515,700,753,0,1,2,5,3,4,8,13,180,341,10,23,7,6,9,51,77,197,961,115,165,82,120,31,22,202,457,217 }, - { 559,661,922,564,141,533,10,317,373,110,143,269,244,260,332,261,93,642,752,295,351,876,531,843,180,206,728,384,352,1,434,120 }, - { 15,515,1,0,700,2,13,23,753,5,51,180,3,165,12,6,197,115,4,9,7,21,719,8,457,82,141,120,33,22,901,10 }, - { 198,234,0,325,5,77,202,416,20,147,32,43,639,315,49,61,450,455,142,21,113,230,22,318,725,342,207,13,95,904,494,10 }, - { 447,264,363,9,411,676,682,1,156,664,821,478,166,354,812,39,100,905,382,897,98,18,759,404,31,101,724,5,265,223,88,13 }, - { 24,14,69,16,35,18,2,7,108,189,71,67,267,149,1,46,68,83,38,140,0,236,251,9,388,60,133,103,65,28,29,50 }, - { 16,7,35,20,14,18,109,2,43,120,107,60,1,121,326,907,553,77,13,147,23,82,68,260,0,403,5,24,202,126,265,199 }, - { 30,131,187,276,31,44,613,442,39,9,190,228,1,839,116,935,908,219,127,88,244,224,110,137,93,201,98,141,36,567,0,856 }, - { 98,223,1,393,812,265,100,421,593,834,697,48,51,410,791,382,21,88,31,284,9,125,96,293,230,23,213,217,656,689,541,5 }, - { 98,51,127,219,616,258,105,293,395,421,924,512,31,308,23,201,116,44,301,272,763,276,125,13,453,170,401,295,261,944,115,567 }, - { 253,110,951,352,811,206,332,180,141,244,282,10,854,417,642,638,559,752,143,911,260,55,93,533,499,498,661,120,351,959,564,341 }, - { 49,9,159,254,272,158,0,131,28,39,627,105,327,286,22,518,688,578,68,347,374,101,224,424,95,35,219,24,16,364,65,344 }, - { 105,22,131,272,98,286,327,109,374,239,28,95,320,219,9,224,55,127,187,36,578,169,64,185,538,1,159,10,371,634,49,616 }, - { 691,478,340,1,658,914,724,363,744,698,156,772,411,296,682,447,9,284,335,98,264,303,909,21,354,410,225,13,664,686,88,919 }, - { 16,24,35,18,71,7,140,108,189,267,92,60,14,230,68,69,9,1,149,46,246,191,388,167,2,0,118,236,133,21,674,5 }, - { 9,0,127,31,98,371,395,39,737,49,1,44,385,272,512,28,293,242,836,761,254,299,101,16,187,22,116,158,159,131,18,21 }, - { 16,18,68,0,60,35,9,101,252,28,118,24,419,55,7,109,604,71,39,121,22,364,14,158,191,167,925,126,329,21,92,49 }, - { 116,268,203,93,206,692,551,31,417,940,499,8,473,44,202,523,959,0,120,137,559,22,450,403,576,10,728,299,13,326,51,1 }, - { 225,459,744,1,919,914,691,330,622,21,141,223,5,284,934,335,88,538,340,82,385,839,363,120,478,98,48,30,64,32,686,166 }, - { 5,1,2,0,14,36,21,281,12,48,50,67,22,28,54,83,24,218,38,10,181,9,32,18,65,58,45,114,430,17,99,37 }, - { 137,450,202,704,120,260,326,318,968,269,851,403,291,77,23,141,182,310,494,373,351,457,82,890,349,110,60,128,817,678,105,96 }, - { 15,515,700,753,4,33,13,23,77,5,40,11,102,93,1,21,110,51,82,117,141,2,10,8,32,64,120,31,202,3,217,115 }, - { 15,13,1,23,515,0,51,2,700,5,753,21,180,141,165,3,12,115,197,4,7,6,457,9,352,202,33,8,719,120,77,341 }, - { 219,127,258,98,276,201,131,395,944,293,116,284,567,31,242,105,137,935,295,44,403,860,51,224,576,456,9,371,578,475,202,512 }, - { 16,7,18,35,60,0,14,20,118,28,68,22,2,24,1,92,158,107,5,49,154,126,109,12,43,10,55,6,677,71,21,168 }, - { 93,728,531,160,559,373,574,120,295,860,533,269,717,260,926,902,258,318,36,201,261,434,851,137,617,141,187,352,843,384,332,251 }, - { 28,0,1,9,22,109,83,39,49,12,36,67,55,5,96,2,128,30,158,69,21,23,160,208,35,13,65,323,50,141,194,238 }, - { 23,13,51,1,5,15,141,21,0,2,165,515,202,700,12,197,180,120,32,115,4,753,64,9,7,269,6,3,125,386,48,453 }, - { 5,21,13,49,14,20,7,23,43,32,1,0,652,48,713,22,38,2,16,132,955,107,12,279,24,888,197,640,70,303,18,638 }, - { 9,0,28,39,1,30,35,101,22,67,83,141,49,175,36,68,55,88,13,251,10,69,23,158,180,115,64,100,217,65,345,166 }, - { 260,120,82,269,5,450,351,1,202,141,854,13,77,922,32,33,137,4,23,125,291,21,15,515,165,349,177,700,318,326,180,753 }, - { 121,16,18,35,363,101,60,20,107,14,68,259,621,55,604,43,7,252,9,364,126,0,167,191,5,407,132,28,199,419,146,10 }, - { 13,23,51,1,15,0,2,141,5,515,12,21,700,115,165,180,4,753,3,197,202,32,9,120,7,8,6,11,37,10,457,269 }, - { 201,144,206,443,418,203,435,96,335,459,187,1,88,332,330,321,269,934,30,372,822,521,268,326,44,523,382,141,410,264,494,473 }, - { 31,44,276,201,116,131,284,662,567,144,9,489,98,295,268,434,0,30,137,39,93,1,187,22,219,918,110,299,141,36,224,384 }, - { 520,478,664,1,264,604,9,167,777,759,411,0,806,724,48,21,101,68,647,936,363,223,118,682,410,18,100,16,252,98,265,13 }, - { 23,70,21,87,60,75,120,182,163,379,92,18,7,937,71,121,446,132,24,98,931,126,107,77,795,195,115,44,411,146,51,850 }, - { 39,9,0,737,127,31,846,98,1,827,105,310,371,30,254,100,44,18,395,242,272,101,385,916,836,16,265,131,938,93,166,557 }, - { 279,20,43,126,107,7,92,16,356,362,60,595,246,359,598,35,0,683,939,653,121,97,125,441,399,392,150,199,48,230,14,649 }, - { 206,417,93,959,499,728,8,559,120,473,137,141,10,564,31,260,44,450,203,341,253,244,373,116,143,638,268,180,352,110,318,940 }, - { 259,465,147,132,590,687,534,199,581,146,941,427,107,640,279,178,121,5,195,150,522,955,198,35,786,929,798,142,1,21,325,626 }, - { 523,15,120,450,202,515,403,51,817,700,13,753,23,457,33,899,128,64,730,102,494,342,115,719,453,196,49,99,318,421,308,5 }, - { 141,559,10,244,365,564,661,180,253,143,752,110,55,317,533,341,901,93,373,206,535,160,82,922,260,36,531,964,352,332,261,197 }, - { 219,258,98,127,276,964,943,137,843,535,201,935,131,860,261,295,284,567,206,44,116,31,253,492,203,332,160,615,36,93,55,692 }, - { 0,319,422,207,945,693,577,887,32,804,95,344,104,904,61,20,5,43,7,725,113,510,306,102,49,263,153,426,33,83,22,9 }, - { 1,22,0,5,12,2,36,21,28,86,49,105,9,10,23,13,141,95,31,55,128,37,51,4,83,202,3,64,96,7,32,44 }, - { 15,515,13,700,1,0,753,2,23,3,4,5,51,10,115,197,6,33,12,9,165,7,8,77,11,961,180,269,141,22,120,457 }, - { 15,1,23,120,77,13,515,51,141,202,700,180,110,137,260,753,326,5,128,102,0,21,2,165,269,33,197,450,318,217,93,115 }, - { 15,515,1,700,0,753,13,2,23,180,51,5,120,4,9,115,197,12,7,165,21,33,6,82,3,8,523,901,31,141,457,260 }, - { 16,18,24,7,92,35,60,75,9,13,71,14,0,108,50,21,126,121,1,140,23,5,132,146,2,12,128,10,64,141,70,87 }, - { 180,341,901,15,515,1,365,700,0,2,197,753,115,4,10,13,752,5,3,8,165,317,141,23,143,873,44,31,569,55,93,6 }, - { 9,0,175,35,101,28,39,67,68,1,65,83,30,69,364,336,22,114,55,124,194,158,100,289,252,166,64,345,103,36,50,88 }, - { 64,165,180,197,115,247,217,237,21,13,32,316,22,141,352,72,288,304,95,225,76,391,386,16,468,90,49,35,365,640,372,23 }, - { 15,515,700,13,753,1,0,2,23,4,5,3,115,51,141,197,12,10,180,961,7,9,21,33,217,6,8,165,457,11,77,341 }, - { 132,121,199,146,60,279,493,640,407,598,126,195,534,581,955,590,107,5,150,35,522,49,259,16,18,360,156,0,147,362,21,167 }, - { 0,1,28,9,22,5,36,12,65,24,67,96,2,83,18,50,114,55,21,16,7,10,23,14,13,160,137,51,48,218,103,69 }, - { 93,120,957,77,30,968,459,110,137,160,613,102,202,352,373,141,31,372,217,330,190,318,269,260,203,44,28,473,228,177,863,704 }, - { 15,515,700,0,753,1,13,23,2,51,5,9,120,82,4,7,901,197,10,8,260,180,341,12,33,6,3,523,165,102,115,141 }, - { 206,417,8,141,499,44,244,93,31,10,137,253,559,116,728,144,120,564,269,638,203,352,143,260,341,752,268,717,951,180,160,110 }, - { 530,254,228,1,96,21,406,39,827,31,669,840,613,829,137,679,166,98,23,51,960,438,131,93,48,224,219,317,310,36,876,190 }, - { 15,515,700,753,13,457,0,197,719,1,165,82,23,8,120,730,2,10,12,180,134,5,9,141,260,4,351,51,115,3,341,899 }, - { 0,16,68,9,24,28,18,35,252,109,39,419,124,158,154,55,101,71,22,118,60,7,49,65,333,14,1,10,329,364,677,346 }, - { 1,15,13,23,515,51,120,0,700,180,2,165,5,753,141,197,21,33,202,102,260,4,9,12,7,326,137,450,115,6,82,110 }, - { 535,253,352,564,110,365,82,180,341,10,854,533,55,898,244,901,873,141,752,143,642,559,498,317,36,951,115,964,638,282,661,197 }, - { 31,44,125,338,116,64,242,36,1,10,55,22,456,237,180,13,299,164,506,86,23,165,558,143,0,762,492,479,844,546,93,8 }, - { 13,23,4,1,202,2,0,51,115,77,141,180,5,15,217,3,33,11,515,317,9,10,102,21,700,341,365,318,269,64,32,128 }, - { 9,39,0,166,68,101,28,364,30,158,562,35,175,65,333,154,49,404,706,124,21,252,274,168,190,289,100,570,16,1,310,346 }, - { 15,515,700,753,341,13,0,23,1,33,141,4,260,82,77,51,351,180,9,5,115,137,10,217,11,120,102,40,349,269,202,854 } -#else - #include "rgbcx_table4.h" -#endif - }; - - static uint8_t g_best_total_orderings3[NUM_UNIQUE_TOTAL_ORDERINGS3][32] = - { - { 12,1,3,5,27,2,4,38,8,7,16,18,6,10,41,79,40,23,46,9,20,88,22,37,14,19,24,126,99,119,35,11 }, - { 7,64,116,14,94,30,8,42,1,108,47,55,137,10,134,95,96,115,69,32,63,29,90,113,11,148,16,103,19,9,34,25 }, - { 12,1,0,5,3,7,4,27,8,6,38,40,41,16,18,46,9,10,20,23,79,62,14,22,88,99,37,126,92,19,120,11 }, - { 16,88,27,18,46,48,126,107,79,19,59,38,37,65,23,66,0,2,3,43,12,151,28,25,5,87,72,40,1,20,52,92 }, - { 79,48,88,16,27,65,18,38,46,19,37,4,72,33,126,41,52,0,12,92,5,1,2,107,3,77,23,91,43,51,22,74 }, - { 1,8,41,122,10,22,2,0,87,24,37,120,38,7,39,4,5,3,9,92,62,59,23,16,104,11,27,79,19,26,25,32 }, - { 2,76,99,28,40,86,93,21,138,60,6,0,17,128,145,119,98,144,141,82,147,54,67,75,5,12,27,132,146,1,38,14 }, - { 47,7,64,90,1,118,116,85,57,14,30,94,50,45,137,134,8,42,69,139,55,68,58,108,95,29,10,115,0,32,2,11 }, - { 49,8,10,30,124,11,32,113,130,58,125,9,100,53,104,115,131,103,24,7,1,39,45,36,139,0,137,22,90,44,114,105 }, - { 9,38,72,125,49,41,84,11,13,5,27,0,16,92,8,2,65,105,10,18,48,29,127,131,36,14,1,46,111,79,130,12 }, - { 130,8,10,100,104,131,49,32,53,39,30,36,113,24,11,22,124,44,83,58,7,103,1,4,9,125,5,0,91,33,115,74 }, - { 114,11,58,8,120,49,9,124,142,111,41,30,10,0,97,130,62,84,38,5,72,125,92,127,100,27,139,113,13,132,32,1 }, - { 60,46,28,27,40,20,0,17,18,2,126,16,6,38,86,23,79,54,1,93,5,88,41,14,21,111,7,48,3,84,72,62 }, - { 72,92,38,65,84,48,41,79,27,16,29,111,88,5,18,46,1,0,152,14,37,19,77,42,132,7,22,13,119,56,12,2 }, - { 7,55,1,95,29,56,64,116,143,8,14,30,47,94,152,90,65,67,10,133,42,72,146,84,16,48,6,0,25,108,77,21 }, - { 27,23,20,5,0,79,38,2,3,1,59,46,4,41,33,86,37,87,88,92,7,126,43,8,22,152,151,150,149,148,147,146 }, - { 12,0,1,2,7,6,3,5,28,4,8,14,60,40,17,19,21,86,126,93,10,18,9,29,48,99,65,25,84,119,72,41 }, - { 60,40,99,2,54,12,0,1,19,28,98,93,6,138,21,5,27,17,151,14,76,46,16,18,38,29,86,144,107,7,25,41 }, - { 12,0,1,2,3,5,6,7,4,28,8,60,14,40,16,17,21,10,19,9,86,38,126,41,93,27,29,48,62,84,79,99 }, - { 0,1,2,10,5,8,3,25,4,29,32,34,63,7,77,26,16,48,65,56,14,22,129,103,72,24,18,152,140,53,96,42 }, - { 46,126,18,54,12,16,1,0,5,2,27,98,20,23,6,3,88,48,28,7,19,8,4,60,151,38,37,21,79,14,65,40 }, - { 76,6,141,86,119,2,138,67,28,145,0,93,17,1,40,60,146,99,147,14,21,144,132,7,5,29,55,27,16,75,19,12 }, - { 71,5,51,39,22,80,0,43,10,122,8,62,41,24,104,87,35,37,2,91,33,120,36,38,1,131,9,100,130,66,3,4 }, - { 126,18,46,27,20,16,88,23,12,79,54,59,48,0,73,1,37,151,5,19,28,38,2,66,60,3,65,98,14,26,6,43 }, - { 22,10,8,5,0,71,35,80,104,39,24,51,100,1,62,32,2,130,11,41,7,9,53,43,49,83,122,120,30,44,37,38 }, - { 1,34,14,129,53,63,42,26,121,148,7,44,96,10,0,24,100,32,64,116,140,22,5,19,29,103,135,108,8,61,39,83 }, - { 1,7,34,63,44,25,135,14,24,108,22,0,83,94,5,129,35,101,47,121,2,19,42,53,6,110,103,8,148,10,16,123 }, - { 12,28,16,60,18,1,6,21,14,0,86,19,2,48,93,17,38,29,7,5,65,126,46,72,41,79,84,119,40,56,54,88 }, - { 0,2,12,27,5,46,38,40,41,79,88,99,3,23,1,62,20,4,22,37,92,35,18,8,16,24,10,60,7,120,98,54 }, - { 1,7,14,56,8,0,84,67,10,2,133,72,42,111,5,30,21,4,9,3,25,94,16,116,47,11,65,18,132,90,55,64 }, - { 30,8,124,139,45,11,58,90,113,137,7,115,10,32,1,49,94,85,9,47,108,103,0,97,63,14,50,114,53,106,100,25 }, - { 65,38,48,27,16,79,72,18,88,19,46,77,84,92,37,41,0,29,1,14,12,111,2,5,31,36,87,74,105,40,28,51 }, - { 10,8,30,113,130,100,53,32,115,103,104,7,1,121,39,49,131,44,24,36,63,137,34,45,22,90,108,83,26,11,94,139 }, - { 51,52,43,33,5,74,16,37,71,91,38,3,36,87,48,22,4,0,122,41,39,18,66,27,79,24,65,88,59,23,62,92 }, - { 1,7,63,53,108,121,94,44,103,100,14,10,129,47,32,26,24,25,148,42,135,22,0,61,83,8,39,104,5,64,115,34 }, - { 1,8,10,7,5,0,80,32,62,2,24,44,53,83,9,41,30,22,100,11,14,25,120,4,26,6,3,16,122,34,19,35 }, - { 74,4,36,48,33,91,39,79,22,16,65,5,131,38,24,71,27,52,0,105,51,18,88,104,3,31,10,37,72,19,41,130 }, - { 59,43,38,79,23,27,92,51,0,16,46,5,18,88,41,37,66,3,87,20,48,2,122,4,22,12,1,126,19,65,33,24 }, - { 12,28,1,27,0,16,2,46,65,60,21,3,5,18,6,19,48,14,4,7,79,88,86,29,22,72,93,40,23,8,17,41 }, - { 22,91,39,33,24,71,5,131,36,10,51,0,130,8,104,2,35,125,9,43,52,49,83,80,100,41,122,3,37,38,4,16 }, - { 12,0,1,2,5,3,4,8,7,27,18,38,10,6,16,46,9,20,41,23,126,79,22,14,19,99,88,54,37,48,62,35 }, - { 12,27,1,2,3,0,46,4,38,16,8,28,7,79,18,5,84,6,88,10,14,21,23,20,40,22,60,19,9,29,72,65 }, - { 1,14,7,55,95,29,8,94,30,56,10,108,77,116,152,64,32,48,63,42,143,148,16,25,137,65,11,0,115,9,19,72 }, - { 37,79,66,38,16,52,48,59,43,27,87,33,41,4,23,51,3,5,88,18,92,46,73,122,22,71,20,0,65,19,2,120 }, - { 24,32,83,22,53,1,8,10,7,30,35,5,103,0,100,101,121,113,34,123,63,2,44,25,71,115,80,14,26,108,51,39 }, - { 97,45,111,58,85,139,0,90,47,7,120,106,142,30,50,132,41,62,84,1,119,114,14,56,117,8,38,29,2,64,116,5 }, - { 12,28,16,18,1,60,6,14,2,21,0,86,126,19,48,93,7,27,17,29,5,65,54,38,72,79,84,88,119,145,8,111 }, - { 118,47,64,116,57,85,7,14,50,1,42,0,45,68,86,69,2,111,134,28,90,55,16,29,56,48,84,144,60,30,112,41 }, - { 12,1,2,0,7,6,28,5,3,4,8,14,60,21,18,40,17,86,10,9,16,29,19,93,126,79,38,84,72,27,111,119 }, - { 11,8,49,130,10,125,9,124,100,114,131,30,58,104,32,39,24,113,36,105,0,41,22,120,5,53,111,38,142,44,83,35 }, - { 50,70,47,118,85,57,106,0,45,7,64,90,81,14,2,134,28,62,86,55,69,1,78,119,68,56,18,67,16,60,29,21 }, - { 43,37,33,87,51,41,66,5,122,38,22,59,92,0,23,91,27,16,71,79,18,52,120,4,3,24,46,20,73,39,62,36 }, - { 79,48,4,16,27,88,43,33,18,38,65,37,46,3,19,51,52,22,66,87,74,5,41,91,23,59,0,71,122,72,20,92 }, - { 32,100,10,8,30,104,24,44,39,113,83,103,1,7,22,53,115,63,135,121,26,35,34,5,0,108,137,90,91,45,2,130 }, - { 0,1,2,5,16,12,6,7,14,3,19,18,29,20,4,21,40,8,17,35,23,48,126,22,25,56,26,10,98,27,38,65 }, - { 143,67,56,146,1,7,133,55,64,141,134,69,6,47,14,29,84,21,111,147,57,16,95,72,118,132,50,0,2,18,119,42 }, - { 1,7,67,14,133,111,8,84,0,21,2,47,64,132,55,10,95,147,119,42,16,5,72,56,4,3,6,29,9,25,18,30 }, - { 68,57,69,112,144,86,102,2,134,55,0,70,118,64,75,47,14,28,93,143,67,7,50,149,1,21,29,56,119,95,60,78 }, - { 58,97,114,30,124,45,11,139,8,90,0,142,7,10,41,113,84,62,49,111,85,1,9,5,137,120,32,14,2,117,47,38 }, - { 23,66,18,79,38,20,43,27,16,88,46,59,126,37,87,12,73,92,3,5,48,0,19,54,2,51,28,1,41,65,122,22 }, - { 0,12,2,27,5,40,46,38,1,41,3,79,88,23,99,4,20,62,22,54,92,18,8,37,16,35,10,7,19,120,144,24 }, - { 1,14,25,26,0,7,44,34,129,42,24,5,135,22,19,148,6,96,83,2,29,16,63,35,101,64,140,136,116,110,3,10 }, - { 12,1,2,27,3,4,38,5,7,8,18,16,46,6,0,40,41,10,79,23,88,9,20,22,14,19,37,92,48,126,28,21 }, - { 7,1,10,32,108,103,94,47,8,53,25,14,34,115,100,129,121,130,148,42,64,116,63,26,44,0,24,30,113,4,104,22 }, - { 47,134,7,14,55,69,64,95,1,29,85,118,56,116,45,57,102,143,50,90,42,30,16,94,0,8,67,75,133,2,18,48 }, - { 12,1,2,0,7,6,28,8,14,5,3,4,40,21,17,18,60,86,16,93,126,10,9,29,99,38,119,25,19,54,27,84 }, - { 59,16,27,18,23,88,79,37,46,66,38,20,73,126,3,43,48,87,92,51,41,12,19,5,52,107,65,0,151,122,54,2 }, - { 1,21,147,7,119,14,76,132,55,0,86,145,2,6,69,67,16,143,111,138,17,28,29,60,18,93,8,19,40,56,84,5 }, - { 144,86,112,2,68,102,69,0,149,93,75,28,57,55,145,60,21,67,99,134,143,40,146,119,82,110,62,6,29,26,78,14 }, - { 102,57,55,69,143,75,146,67,56,68,134,2,29,141,0,21,6,14,133,118,64,1,7,95,47,84,111,28,147,82,72,119 }, - { 0,70,57,119,50,145,2,86,28,118,69,78,149,47,60,68,67,55,93,81,134,21,14,62,64,7,5,1,132,85,41,16 }, - { 51,5,43,71,122,87,41,37,91,39,0,22,33,36,38,24,66,120,62,2,80,16,92,10,59,4,27,23,35,79,8,3 }, - { 12,1,2,0,7,6,28,5,8,14,3,21,40,4,60,17,86,18,16,93,10,9,126,119,99,29,19,41,38,27,25,92 }, - { 27,18,46,126,23,16,88,79,20,151,59,73,48,38,0,54,12,2,37,1,19,5,28,60,66,41,3,109,86,65,40,6 }, - { 48,79,4,33,16,74,65,38,88,27,91,52,18,36,22,19,46,0,37,3,51,5,71,39,72,43,24,41,92,87,2,10 }, - { 86,2,144,93,28,112,141,6,102,21,99,60,75,0,68,82,69,146,67,149,55,40,145,76,111,147,56,119,110,143,26,132 }, - { 6,138,2,99,86,17,40,93,28,21,145,141,0,60,119,147,128,76,67,54,1,12,5,27,144,14,38,98,146,41,29,19 }, - { 1,8,0,10,2,29,7,5,3,56,4,25,14,152,63,32,65,72,96,42,34,108,48,9,26,16,84,103,67,148,22,129 }, - { 149,145,0,86,2,28,93,144,62,60,119,101,21,41,5,35,78,99,26,40,12,68,57,67,110,120,69,18,55,76,132,70 }, - { 12,28,16,1,48,19,6,60,2,14,18,21,0,27,46,65,86,29,5,7,72,93,40,3,17,84,56,88,126,4,38,8 }, - { 1,8,5,10,7,24,2,62,0,41,22,122,120,9,4,3,32,87,11,37,38,83,100,44,25,104,16,26,39,80,14,6 }, - { 0,119,62,86,145,149,28,132,93,2,120,67,60,41,35,5,144,21,123,38,111,81,84,56,12,44,24,50,92,55,40,22 }, - { 2,93,99,28,40,144,60,0,86,150,76,21,149,98,6,25,1,61,82,26,12,5,54,141,7,18,145,16,27,138,110,38 }, - { 24,8,10,22,32,35,100,5,1,53,0,7,71,80,30,123,83,104,51,11,2,39,44,113,9,62,25,103,34,101,43,41 }, - { 12,1,2,0,7,6,28,5,40,60,8,16,3,18,14,4,86,21,17,93,41,10,9,99,27,119,38,19,126,22,48,145 }, - { 45,47,50,7,85,90,97,1,64,139,116,118,30,58,14,106,70,111,0,57,94,42,137,142,29,120,8,56,18,134,84,41 }, - { 12,0,2,5,27,38,1,46,41,40,79,144,3,22,88,23,28,60,99,62,6,24,26,7,4,16,10,35,37,18,14,20 }, - { 37,38,59,92,0,5,23,51,79,41,27,22,2,3,87,16,46,4,1,43,20,33,18,88,24,71,8,10,48,19,126,122 }, - { 12,28,16,60,1,18,6,21,19,14,48,0,2,86,93,5,46,29,17,27,65,7,3,72,38,126,119,40,84,37,56,4 }, - { 0,2,5,1,16,6,27,28,18,38,60,7,14,21,46,40,86,41,19,48,93,8,3,79,22,4,10,37,62,23,24,111 }, - { 85,7,90,30,47,139,45,50,94,58,137,1,8,64,14,116,118,115,113,11,124,108,0,10,97,57,32,70,42,106,29,114 }, - { 33,36,22,71,51,5,91,39,0,52,43,24,131,74,16,37,38,122,41,3,87,48,4,104,35,80,10,2,105,62,27,18 }, - { 12,1,27,2,0,16,3,28,46,18,4,6,5,72,21,79,38,7,14,60,88,8,65,19,48,29,23,40,22,20,86,126 }, - { 0,12,2,27,5,38,46,41,1,40,79,3,88,23,22,99,20,37,62,4,18,6,16,35,60,28,24,7,92,8,14,10 }, - { 7,47,1,30,137,8,116,94,90,64,14,115,108,118,57,10,148,113,42,85,32,11,63,50,103,45,124,134,55,9,69,34 }, - { 55,7,1,29,56,143,64,47,67,133,14,146,95,72,84,8,116,111,6,134,141,21,65,0,69,30,16,45,85,42,50,10 }, - { 14,1,42,8,10,29,108,63,55,148,95,32,7,19,25,115,103,34,56,129,77,0,16,152,94,30,113,26,2,5,48,4 }, - { 111,120,142,97,58,0,41,45,62,132,114,84,139,30,5,8,38,2,7,85,119,90,117,1,124,11,56,47,28,27,35,72 }, - { 1,0,14,2,6,5,16,19,7,29,42,18,3,25,12,35,21,8,26,17,40,4,20,48,109,99,22,96,55,101,10,61 }, - { 12,0,1,5,3,2,4,7,27,8,38,6,40,18,16,10,20,46,9,41,23,22,79,14,62,19,37,126,88,11,92,48 }, - { 10,8,104,39,24,32,22,83,44,100,30,130,53,91,113,5,11,1,35,33,7,49,0,2,103,71,36,124,9,80,131,34 }, - { 1,7,0,14,8,34,5,25,35,26,6,63,10,123,2,16,103,19,44,32,135,121,108,80,62,30,115,94,149,144,53,18 }, - { 75,68,146,141,102,67,2,21,6,57,69,143,0,55,82,86,28,144,147,29,93,112,56,119,133,14,76,60,84,134,111,145 }, - { 10,32,115,7,8,53,1,108,30,113,94,137,100,63,90,34,130,103,121,47,44,25,104,39,24,26,85,14,49,36,22,131 }, - { 39,24,10,22,8,130,91,104,83,49,5,33,100,11,0,35,32,131,71,36,9,44,53,2,80,51,30,1,41,7,43,62 }, - { 38,36,65,105,27,72,31,79,41,131,5,48,125,39,0,16,92,46,22,13,18,84,24,37,88,2,33,74,91,71,130,49 }, - { 0,106,62,50,45,119,85,81,132,28,2,86,41,47,38,60,35,117,5,29,7,30,145,90,55,70,14,111,18,67,93,56 }, - { 0,2,5,1,3,25,19,26,4,34,29,10,22,16,8,7,24,14,48,65,53,18,6,77,44,56,72,61,121,21,136,40 }, - { 7,1,94,8,47,115,10,32,113,103,30,108,137,63,14,64,116,148,129,42,90,25,34,118,53,57,11,49,85,9,96,50 }, - { 14,0,1,26,19,5,42,2,25,24,29,22,6,44,61,16,7,96,136,3,140,34,35,55,135,18,48,77,83,4,8,10 }, - { 1,7,14,0,25,6,34,5,26,16,63,2,19,8,35,101,108,29,94,10,18,42,123,144,129,47,61,21,3,62,149,4 }, - { 12,0,2,1,28,5,6,120,7,60,40,16,18,86,27,14,21,93,8,62,41,38,3,17,4,119,99,48,19,126,10,9 }, - { 86,144,93,2,28,149,0,60,99,112,110,145,40,21,102,26,75,62,69,1,12,101,119,25,76,67,7,68,55,5,6,14 }, - { 8,30,10,32,113,49,115,137,124,103,45,90,7,139,11,1,58,53,130,94,108,100,9,63,85,125,34,47,0,24,44,104 }, - { 120,142,111,41,58,114,97,0,11,62,84,124,5,30,8,38,132,127,27,139,92,10,72,45,49,9,28,2,29,56,16,1 }, - { 8,113,30,137,7,32,10,90,94,115,1,103,108,63,47,85,49,53,11,45,34,50,14,25,9,124,100,130,139,121,42,26 }, - { 64,7,14,47,134,55,1,42,95,69,116,90,94,30,8,29,56,137,45,108,85,10,57,16,102,143,118,19,63,32,11,50 }, - { 62,132,0,119,120,41,111,86,35,28,5,84,56,38,2,93,145,60,67,12,92,27,29,72,55,117,21,24,133,149,22,45 }, - { 57,68,69,118,134,64,50,47,55,14,7,2,102,144,0,112,70,86,85,1,95,29,116,143,42,75,16,56,28,45,21,48 }, - { 0,12,2,1,5,28,6,40,60,27,7,38,16,14,86,18,93,41,62,46,99,35,8,23,3,17,22,21,10,19,79,20 }, - { 12,1,2,27,16,3,38,111,4,0,18,5,7,46,40,8,79,6,14,28,88,10,48,41,19,84,21,9,22,23,20,72 }, - { 53,103,32,7,1,100,22,63,71,44,10,115,108,24,92,104,26,30,122,94,8,39,83,34,137,135,90,91,121,5,87,47 }, - { 87,37,41,0,22,38,2,92,1,24,4,8,3,59,10,5,39,23,71,79,122,27,16,46,33,7,91,20,18,51,9,120 }, - { 1,7,8,10,0,5,35,32,53,44,14,30,2,80,25,34,6,62,26,103,16,19,63,9,149,24,121,41,22,11,113,83 }, - { 11,58,8,30,124,49,10,113,9,114,139,45,97,32,7,137,90,1,0,130,115,125,100,24,5,94,53,41,14,13,35,38 }, - { 125,105,9,36,131,49,8,130,39,11,10,5,22,38,41,104,0,31,13,24,27,16,2,72,65,91,48,32,84,18,100,74 }, - { 12,1,0,2,6,3,7,5,4,8,14,28,16,60,18,10,21,17,19,9,40,27,86,93,29,38,54,11,25,48,46,41 }, - { 84,41,38,72,92,29,111,5,65,120,79,0,27,56,48,14,132,16,119,22,86,88,46,28,62,12,1,2,93,18,24,127 }, - { 99,28,40,60,2,93,138,0,98,17,86,54,76,12,27,1,21,144,128,38,5,14,46,18,25,16,109,6,41,145,7,29 }, - { 1,63,10,32,148,14,103,34,42,7,8,108,116,53,64,96,25,121,26,94,140,0,29,19,55,24,100,136,5,4,44,115 }, - { 131,100,130,49,10,8,36,104,39,0,48,41,11,38,4,24,27,22,16,44,79,5,33,2,53,9,125,74,91,120,32,83 }, - { 36,39,131,74,4,91,22,33,125,104,130,48,10,24,16,5,49,8,100,105,79,0,9,65,71,2,18,83,31,11,19,44 }, - { 0,12,2,1,6,5,7,28,40,60,16,14,18,62,86,27,93,8,17,38,21,41,35,99,3,19,10,23,22,4,9,48 }, - { 1,7,67,14,21,147,111,55,132,119,0,8,2,76,64,16,47,84,6,18,86,95,145,10,42,29,133,5,56,134,17,72 }, - { 69,55,47,134,102,143,7,57,118,95,14,64,29,56,1,50,75,67,146,2,0,133,68,16,21,6,141,85,116,18,72,65 }, - { 1,44,7,24,83,63,34,103,22,121,53,32,25,35,0,115,108,5,14,8,10,101,94,30,2,123,110,26,137,47,90,19 }, - { 14,1,25,42,34,0,26,96,19,29,140,5,53,10,2,121,3,24,44,22,55,77,129,7,63,16,8,4,6,61,100,48 }, - { 30,90,7,8,137,94,85,1,47,113,115,108,45,139,124,11,10,32,50,58,103,14,63,64,9,116,49,42,25,148,0,53 }, - { 40,99,2,60,28,17,0,54,93,98,86,138,6,12,21,76,1,5,27,144,128,38,19,46,14,41,145,7,16,67,3,109 }, - { 45,58,30,139,90,7,85,137,97,8,124,47,1,11,106,114,50,94,0,113,10,115,14,32,9,64,108,41,49,29,62,116 }, - { 14,42,10,1,63,96,32,25,34,8,129,29,0,103,55,19,26,53,77,5,95,2,4,7,3,16,148,56,18,24,121,108 }, - { 21,2,75,86,6,76,144,28,119,99,93,147,141,67,102,145,60,132,146,128,0,82,40,138,55,111,143,17,133,112,69,14 }, - { 111,120,41,62,84,132,0,5,38,119,56,92,72,142,27,28,29,35,58,80,2,86,65,79,12,14,1,24,145,16,21,48 }, - { 146,67,141,69,133,21,6,143,57,55,111,147,56,1,14,132,7,2,134,102,0,119,29,84,76,64,86,72,28,68,47,75 }, - { 12,1,0,5,27,3,7,4,38,8,6,41,16,40,46,10,18,79,2,9,23,86,20,22,62,14,37,88,92,19,24,11 }, - { 0,12,2,1,27,5,38,28,60,6,40,7,16,46,18,14,41,99,93,62,3,79,86,23,149,8,22,35,88,17,19,10 }, - { 141,6,21,67,147,102,146,2,76,119,132,69,55,111,86,75,28,133,143,0,1,145,14,128,56,99,17,60,29,93,84,68 }, - { 21,76,1,119,86,145,2,0,14,7,6,138,146,55,17,28,132,93,67,40,60,143,29,147,111,16,69,141,5,56,19,133 }, - { 1,8,108,14,7,116,64,42,10,63,94,32,115,103,113,96,30,34,55,47,95,148,29,140,129,25,134,53,69,26,19,11 }, - { 12,1,3,5,4,2,0,7,8,38,27,16,18,6,10,20,41,40,79,46,9,23,22,88,92,37,14,24,62,19,48,99 }, - { 1,14,7,0,6,25,5,16,19,2,42,26,29,35,61,8,18,129,101,21,3,110,34,148,96,10,17,4,22,40,12,20 }, - { 0,2,5,1,3,19,22,26,16,24,29,7,14,6,4,25,18,44,8,48,12,61,20,21,10,35,65,56,23,40,17,107 }, - { 1,7,8,29,56,0,10,14,2,42,72,5,4,65,3,30,84,94,67,9,25,133,111,11,32,108,16,63,21,96,26,48 } - }; - - static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } - static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } - - static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } - static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } - - template inline S maximum(S a, S b) { return (a > b) ? a : b; } - template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } - template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } - - template inline S minimum(S a, S b) { return (a < b) ? a : b; } - template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } - template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } - - template inline T square(T a) { return a * a; } - - static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } - static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } - - template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } - static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; } - - static inline int squarei(int a) { return a * a; } - static inline int absi(int a) { return (a < 0) ? -a : a; } - - template inline F lerp(F a, F b, F s) { return a + (b - a) * s; } - - enum class eNoClamp { cNoClamp }; - - struct color32 - { - union - { - struct - { - uint8_t r; - uint8_t g; - uint8_t b; - uint8_t a; - }; - - uint8_t c[4]; - - uint32_t m; - }; - - color32() { } - - color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } - color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); } - - void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); c[3] = static_cast(va); } - - void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); } - void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } - - void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); } - - uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; } - uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; } - - bool operator== (const color32&rhs) const { return m == rhs.m; } - - void set_rgb(const color32& other) { c[0] = static_cast(other.c[0]); c[1] = static_cast(other.c[1]); c[2] = static_cast(other.c[2]); } - - static color32 comp_min(const color32& a, const color32& b) { return color32(eNoClamp::cNoClamp, std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3])); } - static color32 comp_max(const color32& a, const color32& b) { return color32(eNoClamp::cNoClamp, std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); } - }; - + // Rate Distortion Optimization (RDO) enum dxt_constants { cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U, @@ -1491,7 +294,7 @@ namespace rgbcx uint8_t m_low_color[cTotalEndpointBytes]; uint8_t m_high_color[cTotalEndpointBytes]; uint8_t m_selectors[cTotalSelectorBytes]; - + inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } inline bool is_3color() const { return get_low_color() <= get_high_color(); } @@ -1500,6 +303,25 @@ namespace rgbcx inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; } inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); } + inline uint32_t get_endpoint_bits() const { return m_low_color[0] | (m_low_color[1] << 8) | (m_high_color[0] << 16) | (m_high_color[1] << 24); } + inline void set_endpoint_bits(uint32_t s) { m_low_color[0] = (uint8_t)s; m_low_color[1] = (uint8_t)(s >> 8); m_high_color[0] = (uint8_t)(s >> 16); m_high_color[1] = (uint8_t)(s >> 24); } + + inline uint32_t get_selector_bits() const { return m_selectors[0] | (m_selectors[1] << 8) | (m_selectors[2] << 16) | (m_selectors[3] << 24); } + inline void set_selector_bits(uint32_t s) { m_selectors[0] = (uint8_t)s; m_selectors[1] = (uint8_t)(s >> 8); m_selectors[2] = (uint8_t)(s >> 16); m_selectors[3] = (uint8_t)(s >> 24); } + + inline bool any_selectors_transparent() const + { + uint32_t sel_bits = get_selector_bits(); + for (uint32_t i = 0; i < 16; i++) + { + if ((sel_bits & 3) == 3) + return true; + + sel_bits >>= 2; + } + return false; + } + static inline uint16_t pack_color(const color32& color, bool scaled, uint32_t bias = 127U) { uint32_t r = color.r, g = color.g, b = color.b; @@ -1533,2650 +355,78 @@ namespace rgbcx } }; - static const uint32_t TOTAL_ORDER_4_0_16 = 15; - static const uint32_t TOTAL_ORDER_4_1_16 = 700; - static const uint32_t TOTAL_ORDER_4_2_16 = 753; - static const uint32_t TOTAL_ORDER_4_3_16 = 515; - static uint16_t g_total_ordering4_hash[4096]; - static float g_selector_factors4[NUM_UNIQUE_TOTAL_ORDERINGS4][3]; - - static const uint32_t TOTAL_ORDER_3_0_16 = 12; - static const uint32_t TOTAL_ORDER_3_1_16 = 15; - static const uint32_t TOTAL_ORDER_3_2_16 = 89; - static uint16_t g_total_ordering3_hash[256]; - static float g_selector_factors3[NUM_UNIQUE_TOTAL_ORDERINGS3][3]; - - struct hist4 - { - uint8_t m_hist[4]; - - hist4() - { - memset(m_hist, 0, sizeof(m_hist)); - } - - hist4(uint32_t i, uint32_t j, uint32_t k, uint32_t l) - { - m_hist[0] = (uint8_t)i; - m_hist[1] = (uint8_t)j; - m_hist[2] = (uint8_t)k; - m_hist[3] = (uint8_t)l; - } - - inline bool operator== (const hist4 &h) const - { - if (m_hist[0] != h.m_hist[0]) return false; - if (m_hist[1] != h.m_hist[1]) return false; - if (m_hist[2] != h.m_hist[2]) return false; - if (m_hist[3] != h.m_hist[3]) return false; - return true; - } - - inline bool any_16() const - { - return (m_hist[0] == 16) || (m_hist[1] == 16) || (m_hist[2] == 16) || (m_hist[3] == 16); - } - - inline uint32_t lookup_total_ordering_index() const - { - if (m_hist[0] == 16) - return TOTAL_ORDER_4_0_16; - else if (m_hist[1] == 16) - return TOTAL_ORDER_4_1_16; - else if (m_hist[2] == 16) - return TOTAL_ORDER_4_2_16; - else if (m_hist[3] == 16) - return TOTAL_ORDER_4_3_16; - - // Must sum to 16, so m_hist[3] isn't needed. - return g_total_ordering4_hash[m_hist[0] | (m_hist[1] << 4) | (m_hist[2] << 8)]; - } - }; - - struct hist3 - { - uint8_t m_hist[3]; - - hist3() - { - memset(m_hist, 0, sizeof(m_hist)); - } - - hist3(uint32_t i, uint32_t j, uint32_t k) - { - m_hist[0] = (uint8_t)i; - m_hist[1] = (uint8_t)j; - m_hist[2] = (uint8_t)k; - } - - inline bool operator== (const hist3 &h) const - { - if (m_hist[0] != h.m_hist[0]) return false; - if (m_hist[1] != h.m_hist[1]) return false; - if (m_hist[2] != h.m_hist[2]) return false; - return true; - } - - inline bool any_16() const - { - return (m_hist[0] == 16) || (m_hist[1] == 16) || (m_hist[2] == 16); - } - - inline uint32_t lookup_total_ordering_index() const - { - if (m_hist[0] == 16) - return TOTAL_ORDER_3_0_16; - else if (m_hist[1] == 16) - return TOTAL_ORDER_3_1_16; - else if (m_hist[2] == 16) - return TOTAL_ORDER_3_2_16; - - // Must sum to 16, so m_hist[2] isn't needed. - return g_total_ordering3_hash[m_hist[0] | (m_hist[1] << 4)]; - } - }; - - struct bc1_match_entry + struct bc4_block { - uint8_t m_hi; - uint8_t m_lo; - uint8_t m_e; - }; - - static bc1_approx_mode g_bc1_approx_mode; - static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; - static bc1_match_entry g_bc1_match5_half[256], g_bc1_match6_half[256]; - - static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); } - static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); } - - // v0, v1 = unexpanded DXT1 endpoint values (5/6-bits) - // c0, c1 = expanded DXT1 endpoint values (8-bits) - static inline int interp_5_6_ideal(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 2 + c1) / 3; } - static inline int interp_5_6_ideal_round(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 2 + c1 + 1) / 3; } - static inline int interp_half_5_6_ideal(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1) / 2; } - - static inline int interp_5_nv(int v0, int v1) { assert(v0 < 32 && v1 < 32); return ((2 * v0 + v1) * 22) / 8; } - static inline int interp_6_nv(int c0, int c1) { assert(c0 < 256 && c1 < 256); const int gdiff = c1 - c0; return (256 * c0 + (gdiff / 4) + 128 + gdiff * 80) / 256; } + enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 }; + uint8_t m_endpoints[2]; - static inline int interp_half_5_nv(int v0, int v1) { assert(v0 < 32 && v1 < 32); return ((v0 + v1) * 33) / 8; } - static inline int interp_half_6_nv(int c0, int c1) { assert(c0 < 256 && c1 < 256); const int gdiff = c1 - c0; return (256 * c0 + gdiff/4 + 128 + gdiff * 128) / 256; } + uint8_t m_selectors[cTotalSelectorBytes]; - static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; } - static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; } + inline uint32_t get_low_alpha() const { return m_endpoints[0]; } + inline uint32_t get_high_alpha() const { return m_endpoints[1]; } + inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } - static inline int interp_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) - { - assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1); - switch (mode) + inline uint64_t get_selector_bits() const { - case bc1_approx_mode::cBC1NVidia: return interp_5_nv(v0, v1); - case bc1_approx_mode::cBC1AMD: return interp_5_6_amd(c0, c1); - default: - case bc1_approx_mode::cBC1Ideal: return interp_5_6_ideal(c0, c1); - case bc1_approx_mode::cBC1IdealRound4: return interp_5_6_ideal_round(c0, c1); + return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) | + (((uint64_t)m_selectors[4]) << 32U) | + (((uint64_t)m_selectors[5]) << 40U); } - } - static inline int interp_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) - { - (void)v0; (void)v1; - assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1); - switch (mode) + inline void set_selector_bits(uint64_t v) { - case bc1_approx_mode::cBC1NVidia: return interp_6_nv(c0, c1); - case bc1_approx_mode::cBC1AMD: return interp_5_6_amd(c0, c1); - default: - case bc1_approx_mode::cBC1Ideal: return interp_5_6_ideal(c0, c1); - case bc1_approx_mode::cBC1IdealRound4: return interp_5_6_ideal_round(c0, c1); + for (uint32_t i = 0; i < 6; i++) + { + m_selectors[i] = (uint8_t)v; + v >>= 8; + } } - } - static inline int interp_half_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) - { - assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1); - switch (mode) + inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const { - case bc1_approx_mode::cBC1NVidia: return interp_half_5_nv(v0, v1); - case bc1_approx_mode::cBC1AMD: return interp_half_5_6_amd(c0, c1); - case bc1_approx_mode::cBC1Ideal: - case bc1_approx_mode::cBC1IdealRound4: - default: - return interp_half_5_6_ideal(c0, c1); + assert((x < 4U) && (y < 4U)); + return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1); } - } - static inline int interp_half_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) - { - (void)v0; (void)v1; - assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1); - switch (mode) + static inline uint32_t get_block_values6(uint8_t* pDst, uint32_t l, uint32_t h) { - case bc1_approx_mode::cBC1NVidia: return interp_half_6_nv(c0, c1); - case bc1_approx_mode::cBC1AMD: return interp_half_5_6_amd(c0, c1); - case bc1_approx_mode::cBC1Ideal: - case bc1_approx_mode::cBC1IdealRound4: - default: - return interp_half_5_6_ideal(c0, c1); + pDst[0] = static_cast(l); + pDst[1] = static_cast(h); + pDst[2] = static_cast((l * 4 + h) / 5); + pDst[3] = static_cast((l * 3 + h * 2) / 5); + pDst[4] = static_cast((l * 2 + h * 3) / 5); + pDst[5] = static_cast((l + h * 4) / 5); + pDst[6] = 0; + pDst[7] = 255; + return 6; } - } - static void prepare_bc1_single_color_table_half(bc1_match_entry* pTable, const uint8_t* pExpand, int size, bc1_approx_mode mode) - { - for (int i = 0; i < 256; i++) + static inline uint32_t get_block_values8(uint8_t* pDst, uint32_t l, uint32_t h) { - int lowest_e = 256; - for (int lo = 0; lo < size; lo++) - { - const int lo_e = pExpand[lo]; - - for (int hi = 0; hi < size; hi++) - { - const int hi_e = pExpand[hi]; - - const int v = (size == 32) ? interp_half_5(hi, lo, hi_e, lo_e, mode) : interp_half_6(hi, lo, hi_e, lo_e, mode); - - int e = iabs(v - i); - - // We only need to factor in 3% error in BC1 ideal mode. - if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4)) - e += (iabs(hi_e - lo_e) * 3) / 100; - - // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation. - if ((e < lowest_e) || ((e == lowest_e) && (lo == hi))) - { - pTable[i].m_hi = static_cast(hi); - pTable[i].m_lo = static_cast(lo); - - assert(e <= UINT8_MAX); - pTable[i].m_e = static_cast(e); - - lowest_e = e; - } - - } // hi - } // lo + pDst[0] = static_cast(l); + pDst[1] = static_cast(h); + pDst[2] = static_cast((l * 6 + h) / 7); + pDst[3] = static_cast((l * 5 + h * 2) / 7); + pDst[4] = static_cast((l * 4 + h * 3) / 7); + pDst[5] = static_cast((l * 3 + h * 4) / 7); + pDst[6] = static_cast((l * 2 + h * 5) / 7); + pDst[7] = static_cast((l + h * 6) / 7); + return 8; } - } - static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size, bc1_approx_mode mode) - { - for (int i = 0; i < 256; i++) + static inline uint32_t get_block_values(uint8_t* pDst, uint32_t l, uint32_t h) { - int lowest_e = 256; - for (int lo = 0; lo < size; lo++) - { - const int lo_e = pExpand[lo]; - - for (int hi = 0; hi < size; hi++) - { - const int hi_e = pExpand[hi]; - - const int v = (size == 32) ? interp_5(hi, lo, hi_e, lo_e, mode) : interp_6(hi, lo, hi_e, lo_e, mode); - - int e = iabs(v - i); - - if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4)) - e += (iabs(hi_e - lo_e) * 3) / 100; - - // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation. - if ((e < lowest_e) || ((e == lowest_e) && (lo == hi))) - { - pTable[i].m_hi = static_cast(hi); - pTable[i].m_lo = static_cast(lo); - - assert(e <= UINT8_MAX); - pTable[i].m_e = static_cast(e); - - lowest_e = e; - } - - } // hi - } // lo + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); } - } - - // This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w)) - // where w is [0,1/3,2/3,1]. 9 is the perfect multiplier. - static const uint32_t g_weight_vals4[4] = { 0x000009, 0x010204, 0x040201, 0x090000 }; - - // multiplier is 4 for 3-color - static const uint32_t g_weight_vals3[3] = { 0x000004, 0x040000, 0x010101 }; - - static inline void compute_selector_factors4(const hist4 &h, float &iz00, float &iz10, float &iz11) - { - uint32_t weight_accum = 0; - for (uint32_t sel = 0; sel < 4; sel++) - weight_accum += g_weight_vals4[sel] * h.m_hist[sel]; - - float z00 = (float)((weight_accum >> 16) & 0xFF); - float z10 = (float)((weight_accum >> 8) & 0xFF); - float z11 = (float)(weight_accum & 0xFF); - float z01 = z10; - - float det = z00 * z11 - z01 * z10; - if (fabs(det) < 1e-8f) - det = 0.0f; - else - det = (3.0f / 255.0f) / det; - - iz00 = z11 * det; - iz10 = -z10 * det; - iz11 = z00 * det; - } - - static inline void compute_selector_factors3(const hist3 &h, float &iz00, float &iz10, float &iz11) - { - uint32_t weight_accum = 0; - for (uint32_t sel = 0; sel < 3; sel++) - weight_accum += g_weight_vals3[sel] * h.m_hist[sel]; - - float z00 = (float)((weight_accum >> 16) & 0xFF); - float z10 = (float)((weight_accum >> 8) & 0xFF); - float z11 = (float)(weight_accum & 0xFF); - float z01 = z10; + }; - float det = z00 * z11 - z01 * z10; - if (fabs(det) < 1e-8f) - det = 0.0f; - else - det = (2.0f / 255.0f) / det; - - iz00 = z11 * det; - iz10 = -z10 * det; - iz11 = z00 * det; - } - - static bool g_initialized; - - void init(bc1_approx_mode mode) - { - g_bc1_approx_mode = mode; - - uint8_t bc1_expand5[32]; - for (int i = 0; i < 32; i++) - bc1_expand5[i] = static_cast((i << 3) | (i >> 2)); - prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, mode); - prepare_bc1_single_color_table_half(g_bc1_match5_half, bc1_expand5, 32, mode); - - uint8_t bc1_expand6[64]; - for (int i = 0; i < 64; i++) - bc1_expand6[i] = static_cast((i << 2) | (i >> 4)); - prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, mode); - prepare_bc1_single_color_table_half(g_bc1_match6_half, bc1_expand6, 64, mode); - - for (uint32_t i = 0; i < NUM_UNIQUE_TOTAL_ORDERINGS4; i++) - { - hist4 h; - h.m_hist[0] = (uint8_t)g_unique_total_orders4[i][0]; - h.m_hist[1] = (uint8_t)g_unique_total_orders4[i][1]; - h.m_hist[2] = (uint8_t)g_unique_total_orders4[i][2]; - h.m_hist[3] = (uint8_t)g_unique_total_orders4[i][3]; - - if (!h.any_16()) - { - const uint32_t index = h.m_hist[0] | (h.m_hist[1] << 4) | (h.m_hist[2] << 8); - assert(index < 4096); - g_total_ordering4_hash[index] = (uint16_t)i; - } - - compute_selector_factors4(h, g_selector_factors4[i][0], g_selector_factors4[i][1], g_selector_factors4[i][2]); - } - - for (uint32_t i = 0; i < NUM_UNIQUE_TOTAL_ORDERINGS3; i++) - { - hist3 h; - h.m_hist[0] = (uint8_t)g_unique_total_orders3[i][0]; - h.m_hist[1] = (uint8_t)g_unique_total_orders3[i][1]; - h.m_hist[2] = (uint8_t)g_unique_total_orders3[i][2]; - - if (!h.any_16()) - { - const uint32_t index = h.m_hist[0] | (h.m_hist[1] << 4); - assert(index < 256); - g_total_ordering3_hash[index] = (uint16_t)i; - } - - compute_selector_factors3(h, g_selector_factors3[i][0], g_selector_factors3[i][1], g_selector_factors3[i][2]); - } - - g_initialized = true; - } - - void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb, bool allow_3color) - { - bc1_block* pDst_block = static_cast(pDst); - - uint32_t mask = 0xAA; - int max16 = -1, min16 = 0; - - if (allow_3color) - { - const uint32_t err4 = g_bc1_match5_equals_1[fr].m_e + g_bc1_match6_equals_1[fg].m_e + g_bc1_match5_equals_1[fb].m_e; - const uint32_t err3 = g_bc1_match5_half[fr].m_e + g_bc1_match6_half[fg].m_e + g_bc1_match5_half[fb].m_e; - - if (err3 < err4) - { - max16 = (g_bc1_match5_half[fr].m_hi << 11) | (g_bc1_match6_half[fg].m_hi << 5) | g_bc1_match5_half[fb].m_hi; - min16 = (g_bc1_match5_half[fr].m_lo << 11) | (g_bc1_match6_half[fg].m_lo << 5) | g_bc1_match5_half[fb].m_lo; - - if (max16 > min16) - std::swap(max16, min16); - } - } - - if (max16 == -1) - { - max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi; - min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo; - - if (min16 == max16) - { - // Always forbid 3 color blocks - // This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's. - mask = 0; - - // Make l > h - if (min16 > 0) - min16--; - else - { - // l = h = 0 - assert(min16 == max16 && max16 == 0); - - max16 = 1; - min16 = 0; - mask = 0x55; - } - - assert(max16 > min16); - } - - if (max16 < min16) - { - std::swap(max16, min16); - mask ^= 0x55; - } - } - - pDst_block->set_low_color(static_cast(max16)); - pDst_block->set_high_color(static_cast(min16)); - pDst_block->m_selectors[0] = static_cast(mask); - pDst_block->m_selectors[1] = static_cast(mask); - pDst_block->m_selectors[2] = static_cast(mask); - pDst_block->m_selectors[3] = static_cast(mask); - } - - static const float g_midpoint5[32] = { .015686f, .047059f, .078431f, .111765f, .145098f, .176471f, .207843f, .241176f, .274510f, .305882f, .337255f, .370588f, .403922f, .435294f, .466667f, .5f, .533333f, .564706f, .596078f, .629412f, .662745f, .694118f, .725490f, .758824f, .792157f, .823529f, .854902f, .888235f, .921569f, .952941f, .984314f, 1e+37f }; - static const float g_midpoint6[64] = { .007843f, .023529f, .039216f, .054902f, .070588f, .086275f, .101961f, .117647f, .133333f, .149020f, .164706f, .180392f, .196078f, .211765f, .227451f, .245098f, .262745f, .278431f, .294118f, .309804f, .325490f, .341176f, .356863f, .372549f, .388235f, .403922f, .419608f, .435294f, .450980f, .466667f, .482353f, .500000f, .517647f, .533333f, .549020f, .564706f, .580392f, .596078f, .611765f, .627451f, .643137f, .658824f, .674510f, .690196f, .705882f, .721569f, .737255f, .754902f, .772549f, .788235f, .803922f, .819608f, .835294f, .850980f, .866667f, .882353f, .898039f, .913725f, .929412f, .945098f, .960784f, .976471f, .992157f, 1e+37f }; - - struct vec3F { float c[3]; }; - - static inline void compute_least_squares_endpoints4_rgb( - vec3F* pXl, vec3F* pXh, - int total_r, int total_g, int total_b, - float iz00, float iz10, float iz11, - uint32_t s, const uint32_t r_sum[17], const uint32_t g_sum[17], const uint32_t b_sum[17]) - { - const float iz01 = iz10; - - const uint32_t f1 = g_unique_total_orders4[s][0]; - const uint32_t f2 = g_unique_total_orders4[s][0] + g_unique_total_orders4[s][1]; - const uint32_t f3 = g_unique_total_orders4[s][0] + g_unique_total_orders4[s][1] + g_unique_total_orders4[s][2]; - uint32_t uq00_r = (r_sum[f2] - r_sum[f1]) + (r_sum[f3] - r_sum[f2]) * 2 + (r_sum[16] - r_sum[f3]) * 3; - uint32_t uq00_g = (g_sum[f2] - g_sum[f1]) + (g_sum[f3] - g_sum[f2]) * 2 + (g_sum[16] - g_sum[f3]) * 3; - uint32_t uq00_b = (b_sum[f2] - b_sum[f1]) + (b_sum[f3] - b_sum[f2]) * 2 + (b_sum[16] - b_sum[f3]) * 3; - - float q10_r = (float)(total_r * 3 - uq00_r); - float q10_g = (float)(total_g * 3 - uq00_g); - float q10_b = (float)(total_b * 3 - uq00_b); - - pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; - pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; - - pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; - pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; - - pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; - pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; - } - - static inline bool compute_least_squares_endpoints4_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh, int total_r, int total_g, int total_b) - { - uint32_t uq00_r = 0, uq00_g = 0, uq00_b = 0; - uint32_t weight_accum = 0; - for (uint32_t i = 0; i < 16; i++) - { - const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; - const uint8_t sel = pSelectors[i]; - - weight_accum += g_weight_vals4[sel]; - uq00_r += sel * r; - uq00_g += sel * g; - uq00_b += sel * b; - } - - int q10_r = total_r * 3 - uq00_r; - int q10_g = total_g * 3 - uq00_g; - int q10_b = total_b * 3 - uq00_b; - - float z00 = (float)((weight_accum >> 16) & 0xFF); - float z10 = (float)((weight_accum >> 8) & 0xFF); - float z11 = (float)(weight_accum & 0xFF); - float z01 = z10; - - float det = z00 * z11 - z01 * z10; - if (fabs(det) < 1e-8f) - return false; - - det = (3.0f / 255.0f) / det; - - float iz00, iz01, iz10, iz11; - iz00 = z11 * det; - iz01 = -z01 * det; - iz10 = -z10 * det; - iz11 = z00 * det; - - pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; - pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; - - pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; - pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; - - pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; - pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; - - return true; - } - - static inline void compute_least_squares_endpoints3_rgb( - vec3F* pXl, vec3F* pXh, - int total_r, int total_g, int total_b, - float iz00, float iz10, float iz11, - uint32_t s, const uint32_t r_sum[17], const uint32_t g_sum[17], const uint32_t b_sum[17]) - { - const float iz01 = iz10; - - // Compensates for BC1 3-color ordering, which is selector 0, 2, 1 - const uint32_t f1 = g_unique_total_orders3[s][0]; - const uint32_t f2 = g_unique_total_orders3[s][0] + g_unique_total_orders3[s][2]; - uint32_t uq00_r = (r_sum[16] - r_sum[f2]) * 2 + (r_sum[f2] - r_sum[f1]); - uint32_t uq00_g = (g_sum[16] - g_sum[f2]) * 2 + (g_sum[f2] - g_sum[f1]); - uint32_t uq00_b = (b_sum[16] - b_sum[f2]) * 2 + (b_sum[f2] - b_sum[f1]); - - float q10_r = (float)(total_r * 2 - uq00_r); - float q10_g = (float)(total_g * 2 - uq00_g); - float q10_b = (float)(total_b * 2 - uq00_b); - - pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; - pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; - - pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; - pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; - - pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; - pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; - } - - static inline bool compute_least_squares_endpoints3_rgb(bool use_black, const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh) - { - int uq00_r = 0, uq00_g = 0, uq00_b = 0; - uint32_t weight_accum = 0; - int total_r = 0, total_g = 0, total_b = 0; - for (uint32_t i = 0; i < 16; i++) - { - const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; - if (use_black) - { - if ((r | g | b) < 4) - continue; - } - - const uint8_t sel = pSelectors[i]; - assert(sel <= 3); - if (sel == 3) - continue; - - weight_accum += g_weight_vals3[sel]; - - static const uint8_t s_tran[3] = { 0, 2, 1 }; - const uint8_t tsel = s_tran[sel]; - uq00_r += tsel * r; - uq00_g += tsel * g; - uq00_b += tsel * b; - - total_r += r; - total_g += g; - total_b += b; - } - - int q10_r = total_r * 2 - uq00_r; - int q10_g = total_g * 2 - uq00_g; - int q10_b = total_b * 2 - uq00_b; - - float z00 = (float)((weight_accum >> 16) & 0xFF); - float z10 = (float)((weight_accum >> 8) & 0xFF); - float z11 = (float)(weight_accum & 0xFF); - float z01 = z10; - - float det = z00 * z11 - z01 * z10; - if (fabs(det) < 1e-8f) - return false; - - det = (2.0f / 255.0f) / det; - - float iz00, iz01, iz10, iz11; - iz00 = z11 * det; - iz01 = -z01 * det; - iz10 = -z10 * det; - iz11 = z00 * det; - - pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; - pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; - - pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; - pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; - - pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; - pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; - - return true; - } - - static inline void bc1_get_block_colors4(uint32_t block_r[4], uint32_t block_g[4], uint32_t block_b[4], uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb) - { - block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); - block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2); - - if (g_bc1_approx_mode == bc1_approx_mode::cBC1Ideal) - { - block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3; - block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3; - } - else if (g_bc1_approx_mode == bc1_approx_mode::cBC1IdealRound4) - { - block_r[1] = (block_r[0] * 2 + block_r[3] + 1) / 3; block_g[1] = (block_g[0] * 2 + block_g[3] + 1) / 3; block_b[1] = (block_b[0] * 2 + block_b[3] + 1) / 3; - block_r[2] = (block_r[3] * 2 + block_r[0] + 1) / 3; block_g[2] = (block_g[3] * 2 + block_g[0] + 1) / 3; block_b[2] = (block_b[3] * 2 + block_b[0] + 1) / 3; - } - else if (g_bc1_approx_mode == bc1_approx_mode::cBC1AMD) - { - block_r[1] = interp_5_6_amd(block_r[0], block_r[3]); block_g[1] = interp_5_6_amd(block_g[0], block_g[3]); block_b[1] = interp_5_6_amd(block_b[0], block_b[3]); - block_r[2] = interp_5_6_amd(block_r[3], block_r[0]); block_g[2] = interp_5_6_amd(block_g[3], block_g[0]); block_b[2] = interp_5_6_amd(block_b[3], block_b[0]); - } - else - { - block_r[1] = interp_5_nv(lr, hr); block_g[1] = interp_6_nv(block_g[0], block_g[3]); block_b[1] = interp_5_nv(lb, hb); - block_r[2] = interp_5_nv(hr, lr); block_g[2] = interp_6_nv(block_g[3], block_g[0]); block_b[2] = interp_5_nv(hb, lb); - } - } - - static inline void bc1_get_block_colors3(uint32_t block_r[3], uint32_t block_g[3], uint32_t block_b[3], uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb) - { - block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); - block_r[1] = (hr << 3) | (hr >> 2); block_g[1] = (hg << 2) | (hg >> 4); block_b[1] = (hb << 3) | (hb >> 2); - - if ((g_bc1_approx_mode == bc1_approx_mode::cBC1Ideal) || (g_bc1_approx_mode == bc1_approx_mode::cBC1IdealRound4)) - { - block_r[2] = (block_r[0] + block_r[1]) / 2; block_g[2] = (block_g[0] + block_g[1]) / 2; block_b[2] = (block_b[0] + block_b[1]) / 2; - } - else if (g_bc1_approx_mode == bc1_approx_mode::cBC1AMD) - { - block_r[2] = interp_half_5_6_amd(block_r[0], block_r[1]); block_g[2] = interp_half_5_6_amd(block_g[0], block_g[1]); block_b[2] = interp_half_5_6_amd(block_b[0], block_b[1]); - } - else - { - block_r[2] = interp_half_5_nv(lr, hr); block_g[2] = interp_half_6_nv(block_g[0], block_g[1]); block_b[2] = interp_half_5_nv(lb, hb); - } - } - - static inline void bc1_find_sels4_noerr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16]) - { - uint32_t block_r[4], block_g[4], block_b[4]; - bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); - - int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; - - int dots[4]; - for (uint32_t i = 0; i < 4; i++) - dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; - - int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; - - ar *= 2; ag *= 2; ab *= 2; - - static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; - - for (uint32_t i = 0; i < 16; i += 4) - { - const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab; - const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab; - const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab; - const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab; - - sels[i+0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; - sels[i+1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; - sels[i+2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)]; - sels[i+3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)]; - } - } - - static inline uint32_t bc1_find_sels4_fasterr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) - { - uint32_t block_r[4], block_g[4], block_b[4]; - bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); - - int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; - - int dots[4]; - for (uint32_t i = 0; i < 4; i++) - dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; - - int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; - - ar *= 2; ag *= 2; ab *= 2; - - static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; - - uint32_t total_err = 0; - - for (uint32_t i = 0; i < 16; i += 4) - { - const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab; - const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab; - const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab; - const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab; - - uint8_t sel0 = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; - uint8_t sel1 = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; - uint8_t sel2 = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)]; - uint8_t sel3 = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)]; - - sels[i+0] = sel0; - sels[i+1] = sel1; - sels[i+2] = sel2; - sels[i+3] = sel3; - - total_err += squarei(pSrc_pixels[i+0].r - block_r[sel0]) + squarei(pSrc_pixels[i+0].g - block_g[sel0]) + squarei(pSrc_pixels[i+0].b - block_b[sel0]); - total_err += squarei(pSrc_pixels[i+1].r - block_r[sel1]) + squarei(pSrc_pixels[i+1].g - block_g[sel1]) + squarei(pSrc_pixels[i+1].b - block_b[sel1]); - total_err += squarei(pSrc_pixels[i+2].r - block_r[sel2]) + squarei(pSrc_pixels[i+2].g - block_g[sel2]) + squarei(pSrc_pixels[i+2].b - block_b[sel2]); - total_err += squarei(pSrc_pixels[i+3].r - block_r[sel3]) + squarei(pSrc_pixels[i+3].g - block_g[sel3]) + squarei(pSrc_pixels[i+3].b - block_b[sel3]); - - if (total_err >= cur_err) - break; - } - - return total_err; - } - - static inline uint32_t bc1_find_sels4_check2_err(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) - { - uint32_t block_r[4], block_g[4], block_b[4]; - bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); - - int dr = block_r[3] - block_r[0], dg = block_g[3] - block_g[0], db = block_b[3] - block_b[0]; - - const float f = 4.0f / (float)(squarei(dr) + squarei(dg) + squarei(db) + .00000125f); - - uint32_t total_err = 0; - - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r; - const int g = pSrc_pixels[i].g; - const int b = pSrc_pixels[i].b; - - int sel = (int)((float)((r - (int)block_r[0]) * dr + (g - (int)block_g[0]) * dg + (b - (int)block_b[0]) * db) * f + .5f); - sel = clampi(sel, 1, 3); - - uint32_t err0 = squarei((int)block_r[sel - 1] - (int)r) + squarei((int)block_g[sel - 1] - (int)g) + squarei((int)block_b[sel - 1] - (int)b); - uint32_t err1 = squarei((int)block_r[sel] - (int)r) + squarei((int)block_g[sel] - (int)g) + squarei((int)block_b[sel] - (int)b); - - int best_sel = sel; - uint32_t best_err = err1; - if (err0 == err1) - { - // Prefer non-interpolation - if ((best_sel - 1) == 0) - best_sel = 0; - } - else if (err0 < best_err) - { - best_sel = sel - 1; - best_err = err0; - } - - total_err += best_err; - - if (total_err >= cur_err) - break; - - sels[i] = (uint8_t)best_sel; - } - return total_err; - } - - static inline uint32_t bc1_find_sels4_fullerr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) - { - uint32_t block_r[4], block_g[4], block_b[4]; - bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); - - uint32_t total_err = 0; - - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r; - const int g = pSrc_pixels[i].g; - const int b = pSrc_pixels[i].b; - - uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b); - uint8_t best_sel = 0; - - for (uint32_t j = 1; (j < 4) && best_err; j++) - { - uint32_t err = squarei((int)block_r[j] - (int)r) + squarei((int)block_g[j] - (int)g) + squarei((int)block_b[j] - (int)b); - if ( (err < best_err) || ((err == best_err) && (j == 3)) ) - { - best_err = err; - best_sel = (uint8_t)j; - } - } - - total_err += best_err; - - if (total_err >= cur_err) - break; - - sels[i] = (uint8_t)best_sel; - } - return total_err; - } - - static inline uint32_t bc1_find_sels4(uint32_t flags, const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) - { - uint32_t err; - - if (flags & cEncodeBC1UseFasterMSEEval) - err = bc1_find_sels4_fasterr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err); - else if (flags & cEncodeBC1UseFullMSEEval) - err = bc1_find_sels4_fullerr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err); - else - err = bc1_find_sels4_check2_err(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err); - - return err; - } - - static inline uint32_t bc1_find_sels3_fullerr(bool use_black, const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) - { - uint32_t block_r[3], block_g[3], block_b[3]; - bc1_get_block_colors3(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); - - uint32_t total_err = 0; - - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r; - const int g = pSrc_pixels[i].g; - const int b = pSrc_pixels[i].b; - - uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b); - uint32_t best_sel = 0; - - uint32_t err1 = squarei((int)block_r[1] - (int)r) + squarei((int)block_g[1] - (int)g) + squarei((int)block_b[1] - (int)b); - if (err1 < best_err) - { - best_err = err1; - best_sel = 1; - } - - uint32_t err2 = squarei((int)block_r[2] - (int)r) + squarei((int)block_g[2] - (int)g) + squarei((int)block_b[2] - (int)b); - if (err2 < best_err) - { - best_err = err2; - best_sel = 2; - } - - if (use_black) - { - uint32_t err3 = squarei(r) + squarei(g) + squarei(b); - if (err3 < best_err) - { - best_err = err3; - best_sel = 3; - } - } - - total_err += best_err; - if (total_err >= cur_err) - return total_err; - - sels[i] = (uint8_t)best_sel; - } - - return total_err; - } - - static inline void precise_round_565(const vec3F &xl, const vec3F &xh, - int &trial_lr, int &trial_lg, int &trial_lb, - int &trial_hr, int &trial_hg, int &trial_hb) - { - trial_lr = (int)(xl.c[0] * 31.0f); - trial_lg = (int)(xl.c[1] * 63.0f); - trial_lb = (int)(xl.c[2] * 31.0f); - - trial_hr = (int)(xh.c[0] * 31.0f); - trial_hg = (int)(xh.c[1] * 63.0f); - trial_hb = (int)(xh.c[2] * 31.0f); - - if ((uint32_t)(trial_lr | trial_lb | trial_hr | trial_hb) > 31U) - { - trial_lr = ((uint32_t)trial_lr > 31U) ? (~trial_lr >> 31) & 31 : trial_lr; - trial_hr = ((uint32_t)trial_hr > 31U) ? (~trial_hr >> 31) & 31 : trial_hr; - - trial_lb = ((uint32_t)trial_lb > 31U) ? (~trial_lb >> 31) & 31 : trial_lb; - trial_hb = ((uint32_t)trial_hb > 31U) ? (~trial_hb >> 31) & 31 : trial_hb; - } - - if ((uint32_t)(trial_lg | trial_hg) > 63U) - { - trial_lg = ((uint32_t)trial_lg > 63U) ? (~trial_lg >> 31) & 63 : trial_lg; - trial_hg = ((uint32_t)trial_hg > 63U) ? (~trial_hg >> 31) & 63 : trial_hg; - } - - trial_lr = (trial_lr + (xl.c[0] > g_midpoint5[trial_lr])) & 31; - trial_lg = (trial_lg + (xl.c[1] > g_midpoint6[trial_lg])) & 63; - trial_lb = (trial_lb + (xl.c[2] > g_midpoint5[trial_lb])) & 31; - - trial_hr = (trial_hr + (xh.c[0] > g_midpoint5[trial_hr])) & 31; - trial_hg = (trial_hg + (xh.c[1] > g_midpoint6[trial_hg])) & 63; - trial_hb = (trial_hb + (xh.c[2] > g_midpoint5[trial_hb])) & 31; - } - - static inline void precise_round_565_noscale(vec3F xl, vec3F xh, - int &trial_lr, int &trial_lg, int &trial_lb, - int &trial_hr, int &trial_hg, int &trial_hb) - { - xl.c[0] *= 1.0f/255.0f; - xl.c[1] *= 1.0f/255.0f; - xl.c[2] *= 1.0f/255.0f; - - xh.c[0] *= 1.0f/255.0f; - xh.c[1] *= 1.0f/255.0f; - xh.c[2] *= 1.0f/255.0f; - - precise_round_565(xl, xh, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb); - } - - static inline void bc1_encode4(bc1_block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) - { - uint32_t lc16 = bc1_block::pack_unscaled_color(lr, lg, lb); - uint32_t hc16 = bc1_block::pack_unscaled_color(hr, hg, hb); - - // Always forbid 3 color blocks - if (lc16 == hc16) - { - uint8_t mask = 0; - - // Make l > h - if (hc16 > 0) - hc16--; - else - { - // lc16 = hc16 = 0 - assert(lc16 == hc16 && hc16 == 0); - - hc16 = 0; - lc16 = 1; - mask = 0x55; // select hc16 - } - - assert(lc16 > hc16); - pDst_block->set_low_color(static_cast(lc16)); - pDst_block->set_high_color(static_cast(hc16)); - - pDst_block->m_selectors[0] = mask; - pDst_block->m_selectors[1] = mask; - pDst_block->m_selectors[2] = mask; - pDst_block->m_selectors[3] = mask; - } - else - { - uint8_t invert_mask = 0; - if (lc16 < hc16) - { - std::swap(lc16, hc16); - invert_mask = 0x55; - } - - assert(lc16 > hc16); - pDst_block->set_low_color((uint16_t)lc16); - pDst_block->set_high_color((uint16_t)hc16); - - uint32_t packed_sels = 0; - static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 }; - for (uint32_t i = 0; i < 16; i++) - packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); - - pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask; - pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask; - pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask; - pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; - } - } - - static inline void bc1_encode3(bc1_block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) - { - uint32_t lc16 = bc1_block::pack_unscaled_color(lr, lg, lb); - uint32_t hc16 = bc1_block::pack_unscaled_color(hr, hg, hb); - - bool invert_flag = false; - if (lc16 > hc16) - { - std::swap(lc16, hc16); - invert_flag = true; - } - - assert(lc16 <= hc16); - - pDst_block->set_low_color((uint16_t)lc16); - pDst_block->set_high_color((uint16_t)hc16); - - uint32_t packed_sels = 0; - - if (invert_flag) - { - static const uint8_t s_sel_trans_inv[4] = { 1, 0, 2, 3 }; - - for (uint32_t i = 0; i < 16; i++) - packed_sels |= ((uint32_t)s_sel_trans_inv[sels[i]] << (i * 2)); - } - else - { - for (uint32_t i = 0; i < 16; i++) - packed_sels |= ((uint32_t)sels[i] << (i * 2)); - } - - pDst_block->m_selectors[0] = (uint8_t)packed_sels; - pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8); - pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16); - pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24); - } - - struct bc1_encode_results - { - int lr, lg, lb; - int hr, hg, hb; - uint8_t sels[16]; - bool m_3color; - }; - - static bool try_3color_block_useblack(const color32* pSrc_pixels, uint32_t flags, uint32_t &cur_err, bc1_encode_results &results) - { - int total_r = 0, total_g = 0, total_b = 0; - int max_r = 0, max_g = 0, max_b = 0; - int min_r = 255, min_g = 255, min_b = 255; - int total_pixels = 0; - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; - if ((r | g | b) < 4) - continue; - - max_r = std::max(max_r, r); max_g = std::max(max_g, g); max_b = std::max(max_b, b); - min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b); - total_r += r; total_g += g; total_b += b; - - total_pixels++; - } - - if (!total_pixels) - return false; - - int half_total_pixels = total_pixels >> 1; - int avg_r = (total_r + half_total_pixels) / total_pixels; - int avg_g = (total_g + half_total_pixels) / total_pixels; - int avg_b = (total_b + half_total_pixels) / total_pixels; - - uint32_t low_c = 0, high_c = 0; - - int icov[6] = { 0, 0, 0, 0, 0, 0 }; - for (uint32_t i = 0; i < 16; i++) - { - int r = (int)pSrc_pixels[i].r; - int g = (int)pSrc_pixels[i].g; - int b = (int)pSrc_pixels[i].b; - - if ((r | g | b) < 4) - continue; - - r -= avg_r; - g -= avg_g; - b -= avg_b; - - icov[0] += r * r; - icov[1] += r * g; - icov[2] += r * b; - icov[3] += g * g; - icov[4] += g * b; - icov[5] += b * b; - } - - float cov[6]; - for (uint32_t i = 0; i < 6; i++) - cov[i] = (float)(icov[i]) * (1.0f / 255.0f); - - float xr = (float)(max_r - min_r); - float xg = (float)(max_g - min_g); - float xb = (float)(max_b - min_b); - - if (icov[2] < 0) - xr = -xr; - - if (icov[4] < 0) - xg = -xg; - - for (uint32_t power_iter = 0; power_iter < 4; power_iter++) - { - float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; - float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; - float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; - xr = r; xg = g; xb = b; - } - - float k = maximum(fabsf(xr), fabsf(xg), fabsf(xb)); - int saxis_r = 306, saxis_g = 601, saxis_b = 117; - if (k >= 2) - { - float m = 1024.0f / k; - saxis_r = (int)(xr * m); - saxis_g = (int)(xg * m); - saxis_b = (int)(xb * m); - } - - int low_dot = INT_MAX, high_dot = INT_MIN; - for (uint32_t i = 0; i < 16; i++) - { - int r = (int)pSrc_pixels[i].r, g = (int)pSrc_pixels[i].g, b = (int)pSrc_pixels[i].b; - - if ((r | g | b) < 4) - continue; - - int dot = r * saxis_r + g * saxis_g + b * saxis_b; - if (dot < low_dot) - { - low_dot = dot; - low_c = i; - } - if (dot > high_dot) - { - high_dot = dot; - high_c = i; - } - } - - int lr = to_5(pSrc_pixels[low_c].r); - int lg = to_6(pSrc_pixels[low_c].g); - int lb = to_5(pSrc_pixels[low_c].b); - - int hr = to_5(pSrc_pixels[high_c].r); - int hg = to_6(pSrc_pixels[high_c].g); - int hb = to_5(pSrc_pixels[high_c].b); - - uint8_t trial_sels[16]; - uint32_t trial_err = bc1_find_sels3_fullerr(true, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX); - - if (trial_err) - { - const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; - for (uint32_t trials = 0; trials < total_ls_passes; trials++) - { - vec3F xl, xh; - int lr2, lg2, lb2, hr2, hg2, hb2; - if (!compute_least_squares_endpoints3_rgb(true, pSrc_pixels, trial_sels, &xl, &xh)) - { - lr2 = g_bc1_match5_half[avg_r].m_hi; - lg2 = g_bc1_match6_half[avg_g].m_hi; - lb2 = g_bc1_match5_half[avg_b].m_hi; - - hr2 = g_bc1_match5_half[avg_r].m_lo; - hg2 = g_bc1_match6_half[avg_g].m_lo; - hb2 = g_bc1_match5_half[avg_b].m_lo; - } - else - { - precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2); - } - - if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2)) - break; - - uint8_t trial_sels2[16]; - uint32_t trial_err2 = bc1_find_sels3_fullerr(true, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err); - - if (trial_err2 < trial_err) - { - trial_err = trial_err2; - lr = lr2; lg = lg2; lb = lb2; - hr = hr2; hg = hg2; hb = hb2; - memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); - } - else - break; - } - } - - if (trial_err < cur_err) - { - results.m_3color = true; - results.lr = lr; - results.lg = lg; - results.lb = lb; - results.hr = hr; - results.hg = hg; - results.hb = hb; - memcpy(results.sels, trial_sels, 16); - - cur_err = trial_err; - - return true; - } - - return false; - } - - static bool try_3color_block(const color32* pSrc_pixels, uint32_t flags, uint32_t &cur_err, - int avg_r, int avg_g, int avg_b, int lr, int lg, int lb, int hr, int hg, int hb, int total_r, int total_g, int total_b, uint32_t total_orderings_to_try, - bc1_encode_results &results) - { - uint8_t trial_sels[16]; - uint32_t trial_err = bc1_find_sels3_fullerr(false, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX); - - if (trial_err) - { - const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; - for (uint32_t trials = 0; trials < total_ls_passes; trials++) - { - vec3F xl, xh; - int lr2, lg2, lb2, hr2, hg2, hb2; - if (!compute_least_squares_endpoints3_rgb(false, pSrc_pixels, trial_sels, &xl, &xh)) - { - lr2 = g_bc1_match5_half[avg_r].m_hi; - lg2 = g_bc1_match6_half[avg_g].m_hi; - lb2 = g_bc1_match5_half[avg_b].m_hi; - - hr2 = g_bc1_match5_half[avg_r].m_lo; - hg2 = g_bc1_match6_half[avg_g].m_lo; - hb2 = g_bc1_match5_half[avg_b].m_lo; - } - else - { - precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2); - } - - if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2)) - break; - - uint8_t trial_sels2[16]; - uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err); - - if (trial_err2 < trial_err) - { - trial_err = trial_err2; - lr = lr2; lg = lg2; lb = lb2; - hr = hr2; hg = hg2; hb = hb2; - memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); - } - else - break; - } - } - - if ((trial_err) && (flags & cEncodeBC1UseLikelyTotalOrderings) && (total_orderings_to_try)) - { - hist3 h; - for (uint32_t i = 0; i < 16; i++) - { - assert(trial_sels[i] < 3); - h.m_hist[trial_sels[i]]++; - } - - const uint32_t orig_total_order_index = h.lookup_total_ordering_index(); - - int r0, g0, b0, r3, g3, b3; - r0 = (lr << 3) | (lr >> 2); g0 = (lg << 2) | (lg >> 4); b0 = (lb << 3) | (lb >> 2); - r3 = (hr << 3) | (hr >> 2); g3 = (hg << 2) | (hg >> 4); b3 = (hb << 3) | (hb >> 2); - - int ar = r3 - r0, ag = g3 - g0, ab = b3 - b0; - - int dots[16]; - for (uint32_t i = 0; i < 16; i++) - { - int r = pSrc_pixels[i].r; - int g = pSrc_pixels[i].g; - int b = pSrc_pixels[i].b; - int d = 0x1000000 + (r * ar + g * ag + b * ab); - assert(d >= 0); - dots[i] = (d << 4) + i; - } - - std::sort(dots, dots + 16); - - uint32_t r_sum[17], g_sum[17], b_sum[17]; - uint32_t r = 0, g = 0, b = 0; - for (uint32_t i = 0; i < 16; i++) - { - const uint32_t p = dots[i] & 15; - - r_sum[i] = r; - g_sum[i] = g; - b_sum[i] = b; - - r += pSrc_pixels[p].r; - g += pSrc_pixels[p].g; - b += pSrc_pixels[p].b; - } - - r_sum[16] = total_r; - g_sum[16] = total_g; - b_sum[16] = total_b; - - const uint32_t q_total = (flags & cEncodeBC1Exhaustive) ? NUM_UNIQUE_TOTAL_ORDERINGS3 : std::min(total_orderings_to_try, MAX_TOTAL_ORDERINGS3); - for (uint32_t q = 0; q < q_total; q++) - { - const uint32_t s = (flags & cEncodeBC1Exhaustive) ? q : g_best_total_orderings3[orig_total_order_index][q]; - - int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; - - vec3F xl, xh; - - if ((s == TOTAL_ORDER_3_0_16) || (s == TOTAL_ORDER_3_1_16) || (s == TOTAL_ORDER_3_2_16)) - { - trial_lr = g_bc1_match5_half[avg_r].m_hi; - trial_lg = g_bc1_match6_half[avg_g].m_hi; - trial_lb = g_bc1_match5_half[avg_b].m_hi; - - trial_hr = g_bc1_match5_half[avg_r].m_lo; - trial_hg = g_bc1_match6_half[avg_g].m_lo; - trial_hb = g_bc1_match5_half[avg_b].m_lo; - } - else - { - compute_least_squares_endpoints3_rgb(&xl, &xh, total_r, total_g, total_b, - g_selector_factors3[s][0], g_selector_factors3[s][1], g_selector_factors3[s][2], s, r_sum, g_sum, b_sum); - - precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); - } - - uint8_t trial_sels2[16]; - uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels2, UINT32_MAX); - - if (trial_err2 < trial_err) - { - trial_err = trial_err2; - - lr = trial_lr; - lg = trial_lg; - lb = trial_lb; - - hr = trial_hr; - hg = trial_hg; - hb = trial_hb; - - memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); - } - - } // s - } - - if (trial_err < cur_err) - { - results.m_3color = true; - results.lr = lr; - results.lg = lg; - results.lb = lb; - results.hr = hr; - results.hg = hg; - results.hb = hb; - memcpy(results.sels, trial_sels, 16); - - cur_err = trial_err; - - return true; - } - - return false; - } - - void encode_bc1(uint32_t level, void* pDst, const uint8_t* pPixels, bool allow_3color, bool allow_transparent_texels_for_black) - { - uint32_t flags = 0, total_orderings4 = 1, total_orderings3 = 1; - - static_assert(MAX_TOTAL_ORDERINGS3 >= 32, "MAX_TOTAL_ORDERINGS3 >= 32"); - static_assert(MAX_TOTAL_ORDERINGS4 >= 32, "MAX_TOTAL_ORDERINGS4 >= 32"); - - switch (level) - { - case 0: - // Faster/higher quality than stb_dxt default. - flags = cEncodeBC1BoundingBoxInt; - break; - case 1: - // Faster/higher quality than stb_dxt default. A bit higher average quality vs. mode 0. - flags = cEncodeBC1Use2DLS; - break; - case 2: - // On average mode 2 is a little weaker than modes 0/1, but it's stronger on outliers (very tough textures). - // Slightly stronger than stb_dxt. - flags = 0; - break; - case 3: - // Slightly stronger than stb_dxt HIGHQUAL. - flags = cEncodeBC1TwoLeastSquaresPasses; - break; - case 4: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1Use6PowerIters; - break; - default: - case 5: - // stb_dxt HIGHQUAL + permit 3 color (if it's enabled). - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - break; - case 6: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - break; - case 7: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 4; - break; - case 8: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 8; - break; - case 9: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 11; - total_orderings3 = 3; - break; - case 10: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 20; - total_orderings3 = 8; - break; - case 11: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 28; - total_orderings3 = 16; - break; - case 12: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 32; - total_orderings3 = 32; - break; - case 13: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (20 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 32; - total_orderings3 = 32; - break; - case 14: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 32; - total_orderings3 = 32; - break; - case 15: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = ((((32 + MAX_TOTAL_ORDERINGS4) / 2) + 32) / 2); - total_orderings3 = 32; - break; - case 16: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = (32 + MAX_TOTAL_ORDERINGS4) / 2; - total_orderings3 = 32; - break; - case 17: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = MAX_TOTAL_ORDERINGS4; - total_orderings3 = 32; - break; - case 18: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = MAX_TOTAL_ORDERINGS4; - total_orderings3 = 32; - break; - case 19: - // This hidden mode is *extremely* slow and abuses the encoder. It's just for testing/training. - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | cEncodeBC1Exhaustive | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 32; - total_orderings3 = 32; - break; - } - - encode_bc1(pDst, pPixels, flags, total_orderings4, total_orderings3); - } - - static inline void encode_bc1_pick_initial(const color32 *pSrc_pixels, uint32_t flags, bool grayscale_flag, - int min_r, int min_g, int min_b, int max_r, int max_g, int max_b, - int avg_r, int avg_g, int avg_b, int total_r, int total_g, int total_b, - int &lr, int &lg, int &lb, int &hr, int &hg, int &hb) - { - if (grayscale_flag) - { - const int fr = pSrc_pixels[0].r; - - // Grayscale blocks are a common enough case to specialize. - if ((max_r - min_r) < 2) - { - lr = lb = hr = hb = to_5(fr); - lg = hg = to_6(fr); - } - else - { - lr = lb = to_5(min_r); - lg = to_6(min_r); - - hr = hb = to_5(max_r); - hg = to_6(max_r); - } - } - else if (flags & cEncodeBC1Use2DLS) - { - // 2D Least Squares approach from Humus's example, with added inset and optimal rounding. - int big_chan = 0, min_chan_val = min_r, max_chan_val = max_r; - if ((max_g - min_g) > (max_chan_val - min_chan_val)) - { - big_chan = 1; min_chan_val = min_g; max_chan_val = max_g; - } - if ((max_b - min_b) > (max_chan_val - min_chan_val)) - { - big_chan = 2; min_chan_val = min_b; max_chan_val = max_b; - } - int sum_xy_r = 0, sum_xy_g = 0, sum_xy_b = 0; - vec3F l, h; - if (big_chan == 0) - { - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; - sum_xy_r += r * r; sum_xy_g += r * g; sum_xy_b += r * b; - } - - int sum_x = total_r; - int sum_x2 = sum_xy_r; - - float div = (float)(16 * sum_x2 - sum_x * sum_x); - float b_y = 0.0f, b_z = 0.0f; - if (fabs(div) > 1e-8f) - { - div = 1.0f / div; - b_y = (16 * sum_xy_g - sum_x * total_g) * div; - b_z = (16 * sum_xy_b - sum_x * total_b) * div; - } - - float a_y = (total_g - b_y * sum_x) / 16.0f; - float a_z = (total_b - b_z * sum_x) / 16.0f; - - l.c[1] = a_y + b_y * min_chan_val; - l.c[2] = a_z + b_z * min_chan_val; - - h.c[1] = a_y + b_y * max_chan_val; - h.c[2] = a_z + b_z * max_chan_val; - - float dg = (h.c[1] - l.c[1]); - float db = (h.c[2] - l.c[2]); - - h.c[1] = l.c[1] + dg * (15.0f/16.0f); - h.c[2] = l.c[2] + db * (15.0f/16.0f); - - l.c[1] = l.c[1] + dg * (1.0f/16.0f); - l.c[2] = l.c[2] + db * (1.0f/16.0f); - - float d = (float)(max_chan_val - min_chan_val); - float fmin_chan_val = min_chan_val + d * (1.0f/16.0f); - float fmax_chan_val = min_chan_val + d * (15.0f/16.0f); - - l.c[0] = fmin_chan_val; - h.c[0] = fmax_chan_val; - } - else if (big_chan == 1) - { - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; - sum_xy_r += g * r; sum_xy_g += g * g; sum_xy_b += g * b; - } - - int sum_x = total_g; - int sum_x2 = sum_xy_g; - - float div = (float)(16 * sum_x2 - sum_x * sum_x); - float b_x = 0.0f, b_z = 0.0f; - if (fabs(div) > 1e-8f) - { - div = 1.0f / div; - b_x = (16 * sum_xy_r - sum_x * total_r) * div; - b_z = (16 * sum_xy_b - sum_x * total_b) * div; - } - - float a_x = (total_r - b_x * sum_x) / 16.0f; - float a_z = (total_b - b_z * sum_x) / 16.0f; - - l.c[0] = a_x + b_x * min_chan_val; - l.c[2] = a_z + b_z * min_chan_val; - - h.c[0] = a_x + b_x * max_chan_val; - h.c[2] = a_z + b_z * max_chan_val; - - float dr = (h.c[0] - l.c[0]); - float db = (h.c[2] - l.c[2]); - - h.c[0] = l.c[0] + dr * (15.0f/16.0f); - h.c[2] = l.c[2] + db * (15.0f/16.0f); - - l.c[0] = l.c[0] + dr * (1.0f/16.0f); - l.c[2] = l.c[2] + db * (1.0f/16.0f); - - float d = (float)(max_chan_val - min_chan_val); - float fmin_chan_val = min_chan_val + d * (1.0f/16.0f); - float fmax_chan_val = min_chan_val + d * (15.0f/16.0f); - - l.c[1] = fmin_chan_val; - h.c[1] = fmax_chan_val; - } - else - { - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; - sum_xy_r += b * r; sum_xy_g += b * g; sum_xy_b += b * b; - } - - int sum_x = total_b; - int sum_x2 = sum_xy_b; - - float div = (float)(16 * sum_x2 - sum_x * sum_x); - float b_x = 0.0f, b_y = 0.0f; - if (fabs(div) > 1e-8f) - { - div = 1.0f / div; - b_x = (16 * sum_xy_r - sum_x * total_r) * div; - b_y = (16 * sum_xy_g - sum_x * total_g) * div; - } - - float a_x = (total_r - b_x * sum_x) / 16.0f; - float a_y = (total_g - b_y * sum_x) / 16.0f; - - l.c[0] = a_x + b_x * min_chan_val; - l.c[1] = a_y + b_y * min_chan_val; - - h.c[0] = a_x + b_x * max_chan_val; - h.c[1] = a_y + b_y * max_chan_val; - - float dr = (h.c[0] - l.c[0]); - float dg = (h.c[1] - l.c[1]); - - h.c[0] = l.c[0] + dr * (15.0f/16.0f); - h.c[1] = l.c[1] + dg * (15.0f/16.0f); - - l.c[0] = l.c[0] + dr * (1.0f/16.0f); - l.c[1] = l.c[1] + dg * (1.0f/16.0f); - - float d = (float)(max_chan_val - min_chan_val); - float fmin_chan_val = min_chan_val + d * (1.0f/16.0f); - float fmax_chan_val = min_chan_val + d * (15.0f/16.0f); - - l.c[2] = fmin_chan_val; - h.c[2] = fmax_chan_val; - } - - precise_round_565_noscale(l, h, lr, lg, lb, hr, hg, hb); - } - else if (flags & cEncodeBC1BoundingBox) - { - // Algorithm from icbc.h compress_dxt1_fast() - vec3F l, h; - l.c[0] = min_r * (1.0f/255.0f); - l.c[1] = min_g * (1.0f/255.0f); - l.c[2] = min_b * (1.0f/255.0f); - - h.c[0] = max_r * (1.0f/255.0f); - h.c[1] = max_g * (1.0f/255.0f); - h.c[2] = max_b * (1.0f/255.0f); - - const float bias = 8.0f / 255.0f; - float inset_r = (h.c[0] - l.c[0] - bias) * (1.0f/16.0f); - float inset_g = (h.c[1] - l.c[1] - bias) * (1.0f/16.0f); - float inset_b = (h.c[2] - l.c[2] - bias) * (1.0f/16.0f); - - l.c[0] = clampf(l.c[0] + inset_r, 0.0f, 1.0f); - l.c[1] = clampf(l.c[1] + inset_g, 0.0f, 1.0f); - l.c[2] = clampf(l.c[2] + inset_b, 0.0f, 1.0f); - - h.c[0] = clampf(h.c[0] - inset_r, 0.0f, 1.0f); - h.c[1] = clampf(h.c[1] - inset_g, 0.0f, 1.0f); - h.c[2] = clampf(h.c[2] - inset_b, 0.0f, 1.0f); - - int icov_xz = 0, icov_yz = 0; - for (uint32_t i = 0; i < 16; i++) - { - int r = (int)pSrc_pixels[i].r - avg_r; - int g = (int)pSrc_pixels[i].g - avg_g; - int b = (int)pSrc_pixels[i].b - avg_b; - icov_xz += r * b; - icov_yz += g * b; - } - - if (icov_xz < 0) - std::swap(l.c[0], h.c[0]); - - if (icov_yz < 0) - std::swap(l.c[1], h.c[1]); - - precise_round_565(l, h, lr, lg, lb, hr, hg, hb); - } - else if (flags & cEncodeBC1BoundingBoxInt) - { - // Algorithm from icbc.h compress_dxt1_fast(), but converted to integer. - int inset_r = (max_r - min_r - 8) >> 4; - int inset_g = (max_g - min_g - 8) >> 4; - int inset_b = (max_b - min_b - 8) >> 4; - - min_r += inset_r; - min_g += inset_g; - min_b += inset_b; - if ((uint32_t)(min_r | min_g | min_b) > 255U) - { - min_r = clampi(min_r, 0, 255); - min_g = clampi(min_g, 0, 255); - min_b = clampi(min_b, 0, 255); - } - - max_r -= inset_r; - max_g -= inset_g; - max_b -= inset_b; - if ((uint32_t)(max_r | max_g | max_b) > 255U) - { - max_r = clampi(max_r, 0, 255); - max_g = clampi(max_g, 0, 255); - max_b = clampi(max_b, 0, 255); - } - - int icov_xz = 0, icov_yz = 0; - for (uint32_t i = 0; i < 16; i++) - { - int r = (int)pSrc_pixels[i].r - avg_r; - int g = (int)pSrc_pixels[i].g - avg_g; - int b = (int)pSrc_pixels[i].b - avg_b; - icov_xz += r * b; - icov_yz += g * b; - } - - int x0 = min_r; - int y0 = min_g; - int x1 = max_r; - int y1 = max_g; - - if (icov_xz < 0) - std::swap(x0, x1); - - if (icov_yz < 0) - std::swap(y0, y1); - - lr = to_5(x0); - lg = to_6(y0); - lb = to_5(min_b); - - hr = to_5(x1); - hg = to_6(y1); - hb = to_5(max_b); - } - else - { - // Select 2 colors along the principle axis. (There must be a faster/simpler way.) - uint32_t low_c = 0, high_c = 0; - - int icov[6] = { 0, 0, 0, 0, 0, 0 }; - for (uint32_t i = 0; i < 16; i++) - { - int r = (int)pSrc_pixels[i].r - avg_r; - int g = (int)pSrc_pixels[i].g - avg_g; - int b = (int)pSrc_pixels[i].b - avg_b; - icov[0] += r * r; - icov[1] += r * g; - icov[2] += r * b; - icov[3] += g * g; - icov[4] += g * b; - icov[5] += b * b; - } - - int saxis_r = 306, saxis_g = 601, saxis_b = 117; - - float xr = (float)(max_r - min_r); - float xg = (float)(max_g - min_g); - float xb = (float)(max_b - min_b); - - if (icov[2] < 0) - xr = -xr; - - if (icov[4] < 0) - xg = -xg; - - float cov[6]; - for (uint32_t i = 0; i < 6; i++) - cov[i] = (float)(icov[i]) * (1.0f / 255.0f); - - const uint32_t total_power_iters = (flags & cEncodeBC1Use6PowerIters) ? 6 : 4; - for (uint32_t power_iter = 0; power_iter < total_power_iters; power_iter++) - { - float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; - float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; - float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; - xr = r; xg = g; xb = b; - } - - float k = maximum(fabsf(xr), fabsf(xg), fabsf(xb)); - if (k >= 2) - { - float m = 2048.0f / k; - saxis_r = (int)(xr * m); - saxis_g = (int)(xg * m); - saxis_b = (int)(xb * m); - } - - int low_dot = INT_MAX, high_dot = INT_MIN; - - saxis_r = (int)((uint32_t)saxis_r << 4U); - saxis_g = (int)((uint32_t)saxis_g << 4U); - saxis_b = (int)((uint32_t)saxis_b << 4U); - - for (uint32_t i = 0; i < 16; i += 4) - { - int dot0 = ((pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b) & ~0xF) + i; - int dot1 = ((pSrc_pixels[i + 1].r * saxis_r + pSrc_pixels[i + 1].g * saxis_g + pSrc_pixels[i + 1].b * saxis_b) & ~0xF) + i + 1; - int dot2 = ((pSrc_pixels[i + 2].r * saxis_r + pSrc_pixels[i + 2].g * saxis_g + pSrc_pixels[i + 2].b * saxis_b) & ~0xF) + i + 2; - int dot3 = ((pSrc_pixels[i + 3].r * saxis_r + pSrc_pixels[i + 3].g * saxis_g + pSrc_pixels[i + 3].b * saxis_b) & ~0xF) + i + 3; - - int min_d01 = std::min(dot0, dot1); - int max_d01 = std::max(dot0, dot1); - - int min_d23 = std::min(dot2, dot3); - int max_d23 = std::max(dot2, dot3); - - int min_d = std::min(min_d01, min_d23); - int max_d = std::max(max_d01, max_d23); - - low_dot = std::min(low_dot, min_d); - high_dot = std::max(high_dot, max_d); - } - low_c = low_dot & 15; - high_c = high_dot & 15; - - lr = to_5(pSrc_pixels[low_c].r); - lg = to_6(pSrc_pixels[low_c].g); - lb = to_5(pSrc_pixels[low_c].b); - - hr = to_5(pSrc_pixels[high_c].r); - hg = to_6(pSrc_pixels[high_c].g); - hb = to_5(pSrc_pixels[high_c].b); - } - } - - static const int8_t s_adjacent_voxels[16][4] = - { - { 1,0,0, 3 }, // 0 - { 0,1,0, 4 }, // 1 - { 0,0,1, 5 }, // 2 - { -1,0,0, 0 }, // 3 - { 0,-1,0, 1 }, // 4 - { 0,0,-1, 2 }, // 5 - { 1,1,0, 9 }, // 6 - { 1,0,1, 10 }, // 7 - { 0,1,1, 11 }, // 8 - { -1,-1,0, 6 }, // 9 - { -1,0,-1, 7 }, // 10 - { 0,-1,-1, 8 }, // 11 - { -1,1,0, 13 }, // 12 - { 1,-1,0, 12 }, // 13 - { 0,-1,1, 15 }, // 14 - { 0,1,-1, 14 }, // 15 - }; - - // From icbc's high quality mode. - static inline void encode_bc1_endpoint_search(const color32 *pSrc_pixels, bool any_black_pixels, - uint32_t flags, bc1_encode_results &results, uint32_t cur_err) - { - int &lr = results.lr, &lg = results.lg, &lb = results.lb, &hr = results.hr, &hg = results.hg, &hb = results.hb; - uint8_t *sels = results.sels; - - int prev_improvement_index = 0, forbidden_direction = -1; - - const int endpoint_search_rounds = (flags & cEncodeBC1EndpointSearchRoundsMask) >> cEncodeBC1EndpointSearchRoundsShift; - for (int i = 0; i < endpoint_search_rounds; i++) - { - assert(s_adjacent_voxels[ s_adjacent_voxels[i & 15][3] ][3] == (i & 15)); - - if (forbidden_direction == (i & 31)) - continue; - - const int8_t delta[3] = { s_adjacent_voxels[i & 15][0], s_adjacent_voxels[i & 15][1], s_adjacent_voxels[i & 15][2] }; - - int trial_lr = lr, trial_lg = lg, trial_lb = lb, trial_hr = hr, trial_hg = hg, trial_hb = hb; - - if ((i >> 4) & 1) - { - trial_lr = clampi(trial_lr + delta[0], 0, 31); - trial_lg = clampi(trial_lg + delta[1], 0, 63); - trial_lb = clampi(trial_lb + delta[2], 0, 31); - } - else - { - trial_hr = clampi(trial_hr + delta[0], 0, 31); - trial_hg = clampi(trial_hg + delta[1], 0, 63); - trial_hb = clampi(trial_hb + delta[2], 0, 31); - } - - uint8_t trial_sels[16]; - - uint32_t trial_err; - if (results.m_3color) - { - trial_err = bc1_find_sels3_fullerr( - ((any_black_pixels) && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)), - pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err); - } - else - { - trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err); - } - - if (trial_err < cur_err) - { - cur_err = trial_err; - - forbidden_direction = s_adjacent_voxels[i & 15][3] | (i & 16); - - lr = trial_lr; lg = trial_lg; lb = trial_lb; hr = trial_hr; hg = trial_hg; hb = trial_hb; - - memcpy(sels, trial_sels, 16); - - prev_improvement_index = i; - } - - if (i - prev_improvement_index > 32) - break; - } - } - - void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try, uint32_t total_orderings_to_try3) - { - assert(g_initialized); - - const color32* pSrc_pixels = (const color32*)pPixels; - bc1_block* pDst_block = static_cast(pDst); - - int avg_r, avg_g, avg_b, min_r, min_g, min_b, max_r, max_g, max_b; - - const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b; - - uint32_t j; - for (j = 15; j >= 1; --j) - if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) - break; - - if (j == 0) - { - encode_bc1_solid_block(pDst, fr, fg, fb, (flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0); - return; - } - - int total_r = fr, total_g = fg, total_b = fb; - - max_r = fr; max_g = fg; max_b = fb; - min_r = fr; min_g = fg; min_b = fb; - - uint32_t grayscale_flag = (fr == fg) && (fr == fb); - uint32_t any_black_pixels = (fr | fg | fb) < 4; - - for (uint32_t i = 1; i < 16; i++) - { - const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; - - grayscale_flag &= ((r == g) && (r == b)); - any_black_pixels |= ((r | g | b) < 4); - - max_r = std::max(max_r, r); max_g = std::max(max_g, g); max_b = std::max(max_b, b); - min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b); - total_r += r; total_g += g; total_b += b; - } - - avg_r = (total_r + 8) >> 4; avg_g = (total_g + 8) >> 4; avg_b = (total_b + 8) >> 4; - - bc1_encode_results results; - results.m_3color = false; - - uint8_t *sels = results.sels; - int &lr = results.lr, &lg = results.lg, &lb = results.lb, &hr = results.hr, &hg = results.hg, &hb = results.hb; - int orig_lr = 0, orig_lg = 0, orig_lb = 0, orig_hr = 0, orig_hg = 0, orig_hb = 0; - - lr = 0; lg = 0; lb = 0; hr = 0; hg = 0; hb = 0; - - const bool needs_block_error = ((flags & (cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use3ColorBlocks | cEncodeBC1UseFullMSEEval | cEncodeBC1EndpointSearchRoundsMask)) != 0) || - (any_black_pixels && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)); - - uint32_t cur_err = UINT32_MAX; - - if (!needs_block_error) - { - assert((flags & cEncodeBC1TryAllInitialEndponts) == 0); - - encode_bc1_pick_initial(pSrc_pixels, flags, grayscale_flag != 0, - min_r, min_g, min_b, max_r, max_g, max_b, - avg_r, avg_g, avg_b, total_r, total_g, total_b, - lr, lg, lb, hr, hg, hb); - - orig_lr = lr; orig_lg = lg; orig_lb = lb; orig_hr = hr; orig_hg = hg; orig_hb = hb; - - bc1_find_sels4_noerr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); - - const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; - for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) - { - int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; - - vec3F xl, xh; - if (!compute_least_squares_endpoints4_rgb(pSrc_pixels, sels, &xl, &xh, total_r, total_g, total_b)) - { - // All selectors equal - treat it as a solid block which should always be equal or better. - trial_lr = g_bc1_match5_equals_1[avg_r].m_hi; - trial_lg = g_bc1_match6_equals_1[avg_g].m_hi; - trial_lb = g_bc1_match5_equals_1[avg_b].m_hi; - - trial_hr = g_bc1_match5_equals_1[avg_r].m_lo; - trial_hg = g_bc1_match6_equals_1[avg_g].m_lo; - trial_hb = g_bc1_match5_equals_1[avg_b].m_lo; - - // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. - } - else - { - precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); - } - - if ((lr == trial_lr) && (lg == trial_lg) && (lb == trial_lb) && (hr == trial_hr) && (hg == trial_hg) && (hb == trial_hb)) - break; - - bc1_find_sels4_noerr(pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, sels); - - lr = trial_lr; - lg = trial_lg; - lb = trial_lb; - hr = trial_hr; - hg = trial_hg; - hb = trial_hb; - - } // ls_pass - } - else - { - const uint32_t total_rounds = (flags & cEncodeBC1TryAllInitialEndponts) ? 2 : 1; - for (uint32_t round = 0; round < total_rounds; round++) - { - uint32_t modified_flags = flags; - if (round == 1) - { - modified_flags &= ~(cEncodeBC1Use2DLS | cEncodeBC1BoundingBox); - modified_flags |= cEncodeBC1BoundingBox; - } - - int round_lr, round_lg, round_lb, round_hr, round_hg, round_hb; - uint8_t round_sels[16]; - - encode_bc1_pick_initial(pSrc_pixels, modified_flags, grayscale_flag != 0, - min_r, min_g, min_b, max_r, max_g, max_b, - avg_r, avg_g, avg_b, total_r, total_g, total_b, - round_lr, round_lg, round_lb, round_hr, round_hg, round_hb); - - int orig_round_lr = round_lr, orig_round_lg = round_lg, orig_round_lb = round_lb, orig_round_hr = round_hr, orig_round_hg = round_hg, orig_round_hb = round_hb; - - uint32_t round_err = bc1_find_sels4(flags, pSrc_pixels, round_lr, round_lg, round_lb, round_hr, round_hg, round_hb, round_sels, UINT32_MAX); - - const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; - for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) - { - int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; - - vec3F xl, xh; - if (!compute_least_squares_endpoints4_rgb(pSrc_pixels, round_sels, &xl, &xh, total_r, total_g, total_b)) - { - // All selectors equal - treat it as a solid block which should always be equal or better. - trial_lr = g_bc1_match5_equals_1[avg_r].m_hi; - trial_lg = g_bc1_match6_equals_1[avg_g].m_hi; - trial_lb = g_bc1_match5_equals_1[avg_b].m_hi; - - trial_hr = g_bc1_match5_equals_1[avg_r].m_lo; - trial_hg = g_bc1_match6_equals_1[avg_g].m_lo; - trial_hb = g_bc1_match5_equals_1[avg_b].m_lo; - - // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. - } - else - { - precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); - } - - if ((round_lr == trial_lr) && (round_lg == trial_lg) && (round_lb == trial_lb) && (round_hr == trial_hr) && (round_hg == trial_hg) && (round_hb == trial_hb)) - break; - - uint8_t trial_sels[16]; - uint32_t trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, round_err); - - if (trial_err < round_err) - { - round_lr = trial_lr; - round_lg = trial_lg; - round_lb = trial_lb; - - round_hr = trial_hr; - round_hg = trial_hg; - round_hb = trial_hb; - - round_err = trial_err; - memcpy(round_sels, trial_sels, 16); - } - else - break; - - } // ls_pass - - if (round_err <= cur_err) - { - cur_err = round_err; - - lr = round_lr; - lg = round_lg; - lb = round_lb; - hr = round_hr; - hg = round_hg; - hb = round_hb; - - orig_lr = orig_round_lr; - orig_lg = orig_round_lg; - orig_lb = orig_round_lb; - orig_hr = orig_round_hr; - orig_hg = orig_round_hg; - orig_hb = orig_round_hb; - - memcpy(sels, round_sels, 16); - } - - } // round - } - - if ((cur_err) && (flags & cEncodeBC1UseLikelyTotalOrderings)) - { - assert(needs_block_error); - - const uint32_t total_iters = (flags & cEncodeBC1Iterative) ? 2 : 1; - for (uint32_t iter_index = 0; iter_index < total_iters; iter_index++) - { - const uint32_t orig_err = cur_err; - - hist4 h; - for (uint32_t i = 0; i < 16; i++) - { - assert(sels[i] < 4); - h.m_hist[sels[i]]++; - } - - const uint32_t orig_total_order_index = h.lookup_total_ordering_index(); - - int r0, g0, b0, r3, g3, b3; - r0 = (lr << 3) | (lr >> 2); g0 = (lg << 2) | (lg >> 4); b0 = (lb << 3) | (lb >> 2); - r3 = (hr << 3) | (hr >> 2); g3 = (hg << 2) | (hg >> 4); b3 = (hb << 3) | (hb >> 2); - - int ar = r3 - r0, ag = g3 - g0, ab = b3 - b0; - - int dots[16]; - for (uint32_t i = 0; i < 16; i++) - { - int r = pSrc_pixels[i].r; - int g = pSrc_pixels[i].g; - int b = pSrc_pixels[i].b; - int d = 0x1000000 + (r * ar + g * ag + b * ab); - assert(d >= 0); - dots[i] = (d << 4) + i; - } - - std::sort(dots, dots + 16); - - uint32_t r_sum[17], g_sum[17], b_sum[17]; - uint32_t r = 0, g = 0, b = 0; - for (uint32_t i = 0; i < 16; i++) - { - const uint32_t p = dots[i] & 15; - - r_sum[i] = r; - g_sum[i] = g; - b_sum[i] = b; - - r += pSrc_pixels[p].r; - g += pSrc_pixels[p].g; - b += pSrc_pixels[p].b; - } - - r_sum[16] = total_r; - g_sum[16] = total_g; - b_sum[16] = total_b; - - const uint32_t q_total = (flags & cEncodeBC1Exhaustive) ? NUM_UNIQUE_TOTAL_ORDERINGS4 : clampi(total_orderings_to_try, MIN_TOTAL_ORDERINGS, MAX_TOTAL_ORDERINGS4); - for (uint32_t q = 0; q < q_total; q++) - { - const uint32_t s = (flags & cEncodeBC1Exhaustive) ? q : g_best_total_orderings4[orig_total_order_index][q]; - - int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; - - vec3F xl, xh; - - if ((s == TOTAL_ORDER_4_0_16) || (s == TOTAL_ORDER_4_1_16) || (s == TOTAL_ORDER_4_2_16) || (s == TOTAL_ORDER_4_3_16)) - { - trial_lr = g_bc1_match5_equals_1[avg_r].m_hi; - trial_lg = g_bc1_match6_equals_1[avg_g].m_hi; - trial_lb = g_bc1_match5_equals_1[avg_b].m_hi; - - trial_hr = g_bc1_match5_equals_1[avg_r].m_lo; - trial_hg = g_bc1_match6_equals_1[avg_g].m_lo; - trial_hb = g_bc1_match5_equals_1[avg_b].m_lo; - } - else - { - compute_least_squares_endpoints4_rgb(&xl, &xh, total_r, total_g, total_b, - g_selector_factors4[s][0], g_selector_factors4[s][1], g_selector_factors4[s][2], s, r_sum, g_sum, b_sum); - - precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); - } - - uint8_t trial_sels[16]; - - uint32_t trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err); - - if (trial_err < cur_err) - { - cur_err = trial_err; - - lr = trial_lr; - lg = trial_lg; - lb = trial_lb; - - hr = trial_hr; - hg = trial_hg; - hb = trial_hb; - - memcpy(sels, trial_sels, 16); - } - - } // s - - if ((!cur_err) || (cur_err == orig_err)) - break; - - } // iter_index - } - - if ( ((flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0) && (cur_err) ) - { - if (flags & cEncodeBC1Use3ColorBlocks) - { - assert(needs_block_error); - try_3color_block(pSrc_pixels, flags, cur_err, avg_r, avg_g, avg_b, orig_lr, orig_lg, orig_lb, orig_hr, orig_hg, orig_hb, total_r, total_g, total_b, total_orderings_to_try3, results); - } - - if ((any_black_pixels) && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)) - { - assert(needs_block_error); - try_3color_block_useblack(pSrc_pixels, flags, cur_err, results); - } - } - - if ( (flags & cEncodeBC1EndpointSearchRoundsMask) && (cur_err) ) - { - assert(needs_block_error); - - encode_bc1_endpoint_search(pSrc_pixels, any_black_pixels != 0, flags, results, cur_err); - } - - if (results.m_3color) - bc1_encode3(pDst_block, results.lr, results.lg, results.lb, results.hr, results.hg, results.hb, results.sels); - else - bc1_encode4(pDst_block, results.lr, results.lg, results.lb, results.hr, results.hg, results.hb, results.sels); - } - - // BC3-5 - - struct bc4_block - { - enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 }; - uint8_t m_endpoints[2]; - - uint8_t m_selectors[cTotalSelectorBytes]; - - inline uint32_t get_low_alpha() const { return m_endpoints[0]; } - inline uint32_t get_high_alpha() const { return m_endpoints[1]; } - inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } - - inline uint64_t get_selector_bits() const - { - return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) | - (((uint64_t)m_selectors[4]) << 32U) | - (((uint64_t)m_selectors[5]) << 40U); - } - - inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const - { - assert((x < 4U) && (y < 4U)); - return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits))& (cMaxSelectorValues - 1); - } - - static inline uint32_t get_block_values6(uint8_t* pDst, uint32_t l, uint32_t h) - { - pDst[0] = static_cast(l); - pDst[1] = static_cast(h); - pDst[2] = static_cast((l * 4 + h) / 5); - pDst[3] = static_cast((l * 3 + h * 2) / 5); - pDst[4] = static_cast((l * 2 + h * 3) / 5); - pDst[5] = static_cast((l + h * 4) / 5); - pDst[6] = 0; - pDst[7] = 255; - return 6; - } - - static inline uint32_t get_block_values8(uint8_t* pDst, uint32_t l, uint32_t h) - { - pDst[0] = static_cast(l); - pDst[1] = static_cast(h); - pDst[2] = static_cast((l * 6 + h) / 7); - pDst[3] = static_cast((l * 5 + h * 2) / 7); - pDst[4] = static_cast((l * 4 + h * 3) / 7); - pDst[5] = static_cast((l * 3 + h * 4) / 7); - pDst[6] = static_cast((l * 2 + h * 5) / 7); - pDst[7] = static_cast((l + h * 6) / 7); - return 8; - } - - static inline uint32_t get_block_values(uint8_t* pDst, uint32_t l, uint32_t h) - { - if (l > h) - return get_block_values8(pDst, l, h); - else - return get_block_values6(pDst, l, h); - } - }; - - void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride) - { - assert(g_initialized); - - uint32_t min0_v, max0_v, min1_v, max1_v, min2_v, max2_v, min3_v, max3_v; - - { - min0_v = max0_v = pPixels[0 * stride]; - min1_v = max1_v = pPixels[1 * stride]; - min2_v = max2_v = pPixels[2 * stride]; - min3_v = max3_v = pPixels[3 * stride]; - } - - { - uint32_t v0 = pPixels[4 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0); - uint32_t v1 = pPixels[5 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1); - uint32_t v2 = pPixels[6 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2); - uint32_t v3 = pPixels[7 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3); - } - - { - uint32_t v0 = pPixels[8 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0); - uint32_t v1 = pPixels[9 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1); - uint32_t v2 = pPixels[10 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2); - uint32_t v3 = pPixels[11 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3); - } - - { - uint32_t v0 = pPixels[12 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0); - uint32_t v1 = pPixels[13 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1); - uint32_t v2 = pPixels[14 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2); - uint32_t v3 = pPixels[15 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3); - } - - const uint32_t min_v = minimum(min0_v, min1_v, min2_v, min3_v); - const uint32_t max_v = maximum(max0_v, max1_v, max2_v, max3_v); - - uint8_t* pDst_bytes = static_cast(pDst); - pDst_bytes[0] = (uint8_t)max_v; - pDst_bytes[1] = (uint8_t)min_v; - - if (max_v == min_v) - { - memset(pDst_bytes + 2, 0, 6); - return; - } - - const uint32_t delta = max_v - min_v; - - // min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors. - const int t0 = delta * 13; - const int t1 = delta * 11; - const int t2 = delta * 9; - const int t3 = delta * 7; - const int t4 = delta * 5; - const int t5 = delta * 3; - const int t6 = delta * 1; - - // BC4 floors in its divisions, which we compensate for with the 4 bias. - // This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one). - const int bias = 4 - min_v * 14; - - static const uint32_t s_tran0[8] = { 1U , 7U , 6U , 5U , 4U , 3U , 2U , 0U }; - static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U }; - static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U }; - static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U }; - - uint64_t a0, a1, a2, a3; - { - const int v0 = pPixels[0 * stride] * 14 + bias; - const int v1 = pPixels[1 * stride] * 14 + bias; - const int v2 = pPixels[2 * stride] * 14 + bias; - const int v3 = pPixels[3 * stride] * 14 + bias; - a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]; - a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]; - a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]; - a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]; - } - - { - const int v0 = pPixels[4 * stride] * 14 + bias; - const int v1 = pPixels[5 * stride] * 14 + bias; - const int v2 = pPixels[6 * stride] * 14 + bias; - const int v3 = pPixels[7 * stride] * 14 + bias; - a0 |= (uint64_t)(s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U); - a1 |= (uint64_t)(s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U); - a2 |= (uint64_t)(s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); - a3 |= (uint64_t)(s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); - } - - { - const int v0 = pPixels[8 * stride] * 14 + bias; - const int v1 = pPixels[9 * stride] * 14 + bias; - const int v2 = pPixels[10 * stride] * 14 + bias; - const int v3 = pPixels[11 * stride] * 14 + bias; - a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U); - a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U); - a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U); - a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U); - } - - { - const int v0 = pPixels[12 * stride] * 14 + bias; - const int v1 = pPixels[13 * stride] * 14 + bias; - const int v2 = pPixels[14 * stride] * 14 + bias; - const int v3 = pPixels[15 * stride] * 14 + bias; - a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U); - a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U); - a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U); - a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U); - } - - const uint64_t f = a0 | a1 | a2 | a3; - - pDst_bytes[2] = (uint8_t)f; - pDst_bytes[3] = (uint8_t)(f >> 8U); - pDst_bytes[4] = (uint8_t)(f >> 16U); - pDst_bytes[5] = (uint8_t)(f >> 24U); - pDst_bytes[6] = (uint8_t)(f >> 32U); - pDst_bytes[7] = (uint8_t)(f >> 40U); - } - - void encode_bc3(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try) - { - assert(g_initialized); - - // 3-color blocks are not allowed with BC3 (on most GPU's). - flags &= ~(cEncodeBC1Use3ColorBlocksForBlackPixels | cEncodeBC1Use3ColorBlocks); - - encode_bc4(pDst, pPixels + 3, 4); - encode_bc1(static_cast(pDst) + 8, pPixels, flags, total_orderings_to_try); - } - - void encode_bc3(uint32_t level, void* pDst, const uint8_t* pPixels) - { - assert(g_initialized); - - encode_bc4(pDst, pPixels + 3, 4); - encode_bc1(level, static_cast(pDst) + 8, pPixels, false, false); - } - - void encode_bc5(void* pDst, const uint8_t* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride) - { - assert(g_initialized); - - encode_bc4(pDst, pPixels + chan0, stride); - encode_bc4(static_cast(pDst) + 8, pPixels + chan1, stride); - } - - // Returns true if the block uses 3 color punchthrough alpha mode. - bool unpack_bc1(const void* pBlock_bits, void* pPixels, bool set_alpha, bc1_approx_mode mode) - { - color32* pDst_pixels = static_cast(pPixels); - - static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8"); - static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8"); - - const bc1_block* pBlock = static_cast(pBlock_bits); - - const uint32_t l = pBlock->get_low_color(); - const uint32_t h = pBlock->get_high_color(); - - color32 c[4]; - - const int cr0 = (l >> 11) & 31; - const int cg0 = (l >> 5) & 63; - const int cb0 = l & 31; - const int r0 = (cr0 << 3) | (cr0 >> 2); - const int g0 = (cg0 << 2) | (cg0 >> 4); - const int b0 = (cb0 << 3) | (cb0 >> 2); - - const int cr1 = (h >> 11) & 31; - const int cg1 = (h >> 5) & 63; - const int cb1 = h & 31; - const int r1 = (cr1 << 3) | (cr1 >> 2); - const int g1 = (cg1 << 2) | (cg1 >> 4); - const int b1 = (cb1 << 3) | (cb1 >> 2); - - bool used_punchthrough = false; - - if (l > h) - { - c[0].set_noclamp_rgba(r0, g0, b0, 255); - c[1].set_noclamp_rgba(r1, g1, b1, 255); - switch (mode) - { - case bc1_approx_mode::cBC1Ideal: - c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); - c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); - break; - case bc1_approx_mode::cBC1IdealRound4: - c[2].set_noclamp_rgba((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255); - c[3].set_noclamp_rgba((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255); - break; - case bc1_approx_mode::cBC1NVidia: - c[2].set_noclamp_rgba(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255); - c[3].set_noclamp_rgba(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255); - break; - case bc1_approx_mode::cBC1AMD: - c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); - c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); - break; - } - } - else - { - c[0].set_noclamp_rgba(r0, g0, b0, 255); - c[1].set_noclamp_rgba(r1, g1, b1, 255); - switch (mode) - { - case bc1_approx_mode::cBC1Ideal: - case bc1_approx_mode::cBC1IdealRound4: - c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); - break; - case bc1_approx_mode::cBC1NVidia: - c[2].set_noclamp_rgba(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255); - break; - case bc1_approx_mode::cBC1AMD: - c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); - break; - } - - c[3].set_noclamp_rgba(0, 0, 0, 0); - used_punchthrough = true; - } - - if (set_alpha) - { - for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) - { - pDst_pixels[0] = c[pBlock->get_selector(0, y)]; - pDst_pixels[1] = c[pBlock->get_selector(1, y)]; - pDst_pixels[2] = c[pBlock->get_selector(2, y)]; - pDst_pixels[3] = c[pBlock->get_selector(3, y)]; - } - } - else - { - for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) - { - pDst_pixels[0].set_rgb(c[pBlock->get_selector(0, y)]); - pDst_pixels[1].set_rgb(c[pBlock->get_selector(1, y)]); - pDst_pixels[2].set_rgb(c[pBlock->get_selector(2, y)]); - pDst_pixels[3].set_rgb(c[pBlock->get_selector(3, y)]); - } - } - - return used_punchthrough; - } - - void unpack_bc4(const void* pBlock_bits, uint8_t* pPixels, uint32_t stride) - { - static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8"); - - const bc4_block* pBlock = static_cast(pBlock_bits); - - uint8_t sel_values[8]; - bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha()); - - const uint64_t selector_bits = pBlock->get_selector_bits(); - - for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U)) - { - pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)]; - pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)]; - pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)]; - pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)]; - } - } - - // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3. - bool unpack_bc3(const void* pBlock_bits, void* pPixels, bc1_approx_mode mode) - { - color32* pDst_pixels = static_cast(pPixels); - - bool success = true; - - if (unpack_bc1((const uint8_t*)pBlock_bits + sizeof(bc4_block), pDst_pixels, true, mode)) - success = false; - - unpack_bc4(pBlock_bits, &pDst_pixels[0].a, sizeof(color32)); - - return success; - } - - // writes RG - void unpack_bc5(const void* pBlock_bits, void* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride) - { - unpack_bc4(pBlock_bits, (uint8_t *)pPixels + chan0, stride); - unpack_bc4((const uint8_t*)pBlock_bits + sizeof(bc4_block), (uint8_t *)pPixels + chan1, stride); - } - -} // namespace rgbcx +} +#endif // #ifndef RGBCX_INCLUDE_H +#ifdef RGBCX_IMPLEMENTATION #endif //#ifdef RGBCX_IMPLEMENTATION /* diff --git a/libkram/bc7enc/rgbcx_table4_small.h b/libkram/bc7enc/rgbcx_table4_small.h new file mode 100644 index 0000000..30ccc70 --- /dev/null +++ b/libkram/bc7enc/rgbcx_table4_small.h @@ -0,0 +1,969 @@ +{ 202, 120, 13, 318, 15, 23, 403, 450, 5, 51, 260, 128, 77, 21, 33, 494, 515, 523, 4, 141, 269, 1, 2, 700, 137, 49, 48, 102, 7, 64, 753, 82 }, +{ 13,141,23,217,115,51,77,2,64,21,0,4,5,317,137,269,202,33,318,7,291,352,9,10,3,180,32,6,365,102,341,349 }, +{ 29,58,262,1,52,74,6,171,5,287,151,334,27,500,75,26,331,223,53,635,220,19,50,45,46,17,14,396,163,409,324,70 }, +{ 40,51,33,453,14,23,62,56,12,196,730,475,153,99,403,775,117,130,585,34,4,17,162,11,139,57,102,38,108,47,123,440 }, +{ 33,23,51,13,102,64,202,128,12,40,15,196,153,10,1,2,77,99,141,0,515,5,117,3,120,403,700,165,22,14,269,453 }, +{ 13,23,51,4,77,141,202,33,115,64,32,128,0,11,177,40,15,102,2,217,7,137,269,21,90,59,515,1,180,403,22,6 }, +{ 26,235,19,47,648,624,78,145,27,112,122,64,444,6,630,453,25,42,65,130,711,85,390,113,416,108,665,29,730,138,644,95 }, +{ 64,141,352,751,217,247,237,437,177,269,86,954,947,875,32,318,95,77,304,92,597,180,232,291,128,864,349,588,372,202,312,1 }, +{ 642,898,180,638,901,341,82,197,10,951,15,515,165,762,700,253,811,753,752,365,143,479,244,569,8,110,351,873,55,31,499,116 }, +{ 221,23,51,125,438,254,13,21,39,49,308,656,0,115,530,159,158,401,30,166,912,386,165,688,518,9,105,627,424,22,421,33 }, +{ 143,31,1,44,197,8,180,125,116,55,13,498,23,341,638,242,93,15,2,141,0,901,752,115,36,206,165,479,338,365,515,762 }, +{ 12,23,51,13,14,15,37,99,515,38,700,117,2,196,134,153,753,64,54,33,128,120,21,0,328,5,139,82,453,719,457,1 }, +{ 13,15,23,515,961,700,457,753,51,115,4,165,197,2,38,569,1,474,0,37,99,719,5,12,629,14,11,3,33,77,64,10 }, +{ 15,515,700,753,1,0,2,4,3,23,134,12,961,5,10,197,11,33,82,120,457,51,165,7,6,341,217,21,77,9,40,180 }, +{ 13,51,23,457,719,961,730,401,165,453,0,117,386,15,134,1,758,153,12,54,515,99,11,2,700,5,753,4,308,33,6,899 }, +{ 134,898,82,117,13,33,77,102,23,260,341,351,120,901,197,153,961,111,196,110,180,457,854,10,450,8,165,40,4,115,0,365 }, +{ 60,18,126,167,35,16,191,71,24,92,121,271,68,107,212,146,118,150,199,7,21,1,9,575,727,5,566,48,0,132,108,273 }, +{ 62,136,129,123,128,41,162,17,249,211,214,789,618,710,38,678,248,507,57,64,152,269,119,3,177,183,597,106,4,179,216,90 }, +{ 403,523,51,475,494,453,817,899,202,23,450,13,421,120,102,730,33,128,4,1,805,5,7,153,757,260,318,196,77,457,326,65 }, +{ 4,59,3,62,12,33,56,193,27,21,102,17,40,77,76,84,32,0,6,123,119,177,128,11,18,611,605,25,13,51,73,210 }, +{ 43,20,319,422,414,945,0,7,819,61,5,376,325,173,804,904,470,693,97,707,14,49,22,104,147,107,95,32,426,1,330,577 }, +{ 13,23,51,2,0,115,4,141,217,33,10,77,1,15,64,180,3,515,7,6,22,102,11,5,40,9,165,700,202,197,317,341 }, +{ 28,49,0,105,1,24,65,159,35,55,95,239,16,2,109,7,9,14,170,320,347,168,424,158,10,301,124,5,67,21,64,36 }, +{ 15,515,700,753,0,1,13,2,117,4,12,10,5,165,457,3,9,134,11,7,6,51,77,64,961,82,33,197,14,341,120,141 }, +{ 7,71,14,149,97,18,60,16,150,92,398,189,140,124,24,273,35,2,69,302,154,68,0,336,517,43,66,28,118,251,230,1 }, +{ 4,102,33,77,40,59,11,624,210,12,128,342,5,503,91,139,64,32,25,494,202,678,416,0,403,275,21,450,196,318,523,177 }, +{ 25,19,42,6,122,813,256,235,85,26,436,53,297,573,680,390,445,63,27,416,80,233,65,73,389,283,45,605,194,17,250,343 }, +{ 402,102,202,128,33,300,403,23,12,77,40,21,342,117,483,99,25,494,6,4,63,32,84,569,139,757,475,318,19,26,196,134 }, +{ 158,9,0,109,39,49,65,22,35,168,55,24,68,124,159,16,185,344,333,154,254,272,175,289,1,577,95,28,105,810,30,169 }, +{ 197,180,115,237,498,165,2,5,287,546,400,3,61,34,509,13,297,80,341,52,45,186,58,881,23,873,468,176,64,17,311,250 }, +{ 120,968,373,260,704,110,450,202,137,318,77,95,269,326,217,717,661,652,851,349,93,1,518,98,827,291,21,177,82,33,848,719 }, +{ 44,116,144,268,434,489,367,384,98,127,918,93,948,31,206,940,855,0,203,137,9,22,617,141,332,105,393,492,959,282,299,131 }, +{ 13,77,23,33,51,0,64,141,102,4,2,115,1,6,202,15,10,128,269,7,177,180,3,40,22,11,515,217,117,318,700,137 }, +{ 15,515,700,753,4,11,141,40,165,23,64,180,13,202,32,3,51,125,5,197,21,128,0,93,77,1,120,82,269,117,110,59 }, +{ 176,231,585,62,34,14,412,161,56,236,527,57,17,3,51,202,4,23,369,283,128,13,472,440,84,361,136,457,381,130,719,53 }, +{ 9,0,180,217,237,101,141,352,88,100,230,64,175,317,115,498,68,39,30,1,702,83,213,36,365,208,752,13,252,321,952,546 }, +{ 28,9,22,1,49,0,109,39,83,95,86,30,13,105,128,55,141,168,158,67,31,159,208,12,96,5,185,2,160,64,137,23 }, +{ 72,4,38,12,51,89,477,11,57,76,401,308,23,474,99,148,413,179,59,13,431,152,54,569,17,3,205,629,197,421,405,15 }, +{ 457,13,23,961,15,51,515,700,165,12,753,629,11,1,719,117,0,3,2,37,569,197,40,328,33,5,153,134,99,64,38,196 }, +{ 254,100,310,9,30,1,39,625,166,265,190,0,272,557,131,731,31,98,578,688,404,93,101,88,49,21,127,264,44,36,252,478 }, +{ 51,23,12,13,15,128,99,120,10,202,515,153,64,82,700,33,165,2,5,117,403,1,141,0,3,196,37,453,753,197,260,93 }, +{ 38,99,542,139,453,117,196,23,457,13,328,111,37,134,961,11,12,51,40,775,587,401,474,54,153,477,41,629,33,475,14,277 }, +{ 6,85,25,233,343,91,26,63,138,29,19,65,283,4,81,235,42,122,605,64,648,256,174,370,74,389,718,59,45,194,445,416 }, +{ 49,5,97,20,197,21,18,193,0,64,408,729,173,350,43,422,165,7,14,104,61,32,509,713,523,102,120,95,125,397,35,232 }, +{ 144,116,268,434,384,489,367,206,93,855,940,44,98,332,617,127,959,911,137,282,203,31,22,219,141,9,131,276,417,0,1,120 }, +{ 17,106,64,62,32,255,136,292,476,162,129,241,123,141,41,237,720,214,209,352,519,211,186,148,752,247,507,90,21,77,197,119 }, +{ 2,29,52,50,5,58,14,6,27,1,366,357,45,53,17,19,171,151,26,181,133,38,218,764,287,583,61,113,3,487,600,281 }, +{ 130,59,196,412,381,730,711,236,77,210,202,402,453,99,401,108,361,803,291,283,153,4,57,51,128,183,14,719,503,117,23,11 }, +{ 13,23,51,141,77,4,33,64,115,0,217,10,180,202,2,102,11,9,15,165,40,21,128,352,22,7,197,3,317,515,269,1 }, +{ 23,13,202,51,120,15,21,5,141,1,128,269,137,515,64,102,125,48,98,33,260,523,318,93,700,165,450,77,2,12,403,82 }, +{ 1,2,14,46,29,67,38,52,5,171,58,24,103,69,96,70,83,181,54,75,163,223,16,45,112,309,155,0,186,35,18,108 }, +{ 15,515,700,753,13,0,1,2,153,5,23,10,117,3,9,7,134,165,12,6,341,33,4,14,77,457,115,21,719,180,217,82 }, +{ 197,165,509,13,391,180,308,115,23,546,5,498,2,29,3,401,901,61,34,80,14,457,250,569,237,873,38,297,45,15,468,386 }, +{ 19,73,27,250,200,714,444,472,26,53,34,17,813,322,283,390,128,297,78,123,432,14,436,136,106,690,57,122,389,80,503,3 }, +{ 3,17,21,45,62,32,38,12,155,14,2,328,5,99,401,536,828,13,227,488,106,51,719,119,540,76,165,221,115,629,209,41 }, +{ 115,341,873,197,365,13,901,180,569,752,317,1,10,498,143,634,261,0,509,15,943,237,44,31,116,601,165,127,282,23,141,64 }, +{ 453,51,23,403,33,421,475,102,15,153,196,515,13,700,117,523,12,40,753,21,4,134,0,494,670,899,22,801,730,10,11,401 }, +{ 23,13,51,33,12,117,153,134,453,196,15,99,515,40,14,700,128,102,11,753,77,64,403,202,0,401,475,37,65,2,3,38 }, +{ 2,7,5,14,70,1,29,61,52,45,6,112,66,16,21,32,592,46,38,135,87,58,186,315,290,128,113,0,64,48,227,23 }, +{ 33,23,102,51,128,13,64,202,141,1,77,10,153,40,196,117,2,3,0,5,15,269,403,12,137,134,318,165,120,6,453,99 }, +{ 16,92,7,20,43,35,126,71,60,14,107,18,68,97,0,121,279,149,24,246,191,48,118,575,55,140,362,783,230,150,375,566 }, +{ 13,23,4,33,77,64,51,102,141,128,32,10,0,202,40,115,59,22,90,11,177,21,291,6,7,318,180,117,137,2,95,165 }, +{ 507,162,129,41,4,211,62,38,123,59,57,248,183,130,99,11,3,361,202,17,402,556,266,305,803,210,128,184,152,136,313,117 }, +{ 643,123,193,650,802,18,25,389,718,256,65,289,84,91,619,511,415,90,235,63,57,510,324,216,862,102,6,183,108,397,217,736 }, +{ 13,23,15,1,515,51,0,2,700,5,753,165,141,115,12,3,4,180,21,197,457,7,6,10,120,9,33,202,77,32,8,11 }, +{ 23,51,13,453,64,403,12,21,5,202,128,475,165,141,523,95,125,115,3,1,4,730,120,32,2,494,180,719,457,197,450,401 }, +{ 204,74,135,66,6,174,192,7,138,172,85,353,348,580,280,97,95,500,29,64,426,32,87,889,65,81,25,2,52,43,568,673 }, +{ 35,0,68,69,24,9,1,16,65,103,149,133,18,114,28,50,83,2,189,7,46,14,101,336,175,124,251,55,71,218,38,238 }, +{ 16,101,0,118,9,18,24,68,35,154,71,124,60,212,191,520,55,806,694,167,28,39,364,375,1,346,252,65,604,302,22,21 }, +{ 0,9,16,35,1,24,68,18,65,21,103,67,13,149,28,189,71,23,101,238,114,7,335,133,486,141,22,212,48,50,30,118 }, +{ 13,202,23,77,33,51,128,5,21,141,115,32,102,64,4,0,318,269,10,15,291,2,494,177,11,217,3,515,22,137,6,700 }, +{ 16,92,60,35,7,18,24,68,150,149,14,71,0,375,97,126,118,107,230,191,246,273,140,55,175,653,9,575,2,28,566,517 }, +{ 76,90,21,179,316,148,205,32,464,288,184,257,245,1,89,2,460,57,152,45,38,358,645,5,12,449,350,48,37,17,4,14 }, +{ 19,27,26,813,80,297,17,495,436,53,73,200,4,378,250,59,106,25,45,128,361,42,113,469,122,390,77,40,736,6,11,136 }, +{ 6,26,235,138,19,145,112,70,331,262,25,42,52,624,27,453,122,47,500,78,648,85,29,2,630,632,409,113,50,226,108,75 }, +{ 7,16,14,24,92,35,18,2,46,9,60,140,0,87,50,5,54,13,12,38,171,23,126,21,58,64,1,70,128,71,220,163 }, +{ 90,205,257,184,32,179,460,5,245,45,2,288,769,524,57,21,152,229,17,1,497,4,292,59,619,452,432,76,476,11,266,14 }, +{ 15,515,700,753,4,5,11,141,13,1,33,3,0,128,202,23,180,21,2,64,269,32,117,134,120,40,102,318,153,17,137,352 }, +{ 47,130,711,108,453,412,730,196,390,283,78,27,51,183,381,236,128,200,719,14,153,472,503,34,59,250,3,4,57,803,123,432 }, +{ 12,277,51,474,111,153,23,99,13,37,961,94,629,542,569,431,79,139,38,134,117,453,33,188,196,40,115,15,11,157,401,515 }, +{ 17,495,469,106,26,378,80,27,161,483,19,742,527,436,383,862,73,136,53,814,297,6,119,84,62,56,25,3,209,611,4,128 }, +{ 81,681,636,91,0,750,370,104,718,138,18,693,173,784,29,397,348,74,192,673,174,65,6,207,64,280,306,52,671,32,355,319 }, +{ 15,515,700,753,33,77,4,102,115,117,40,13,1,153,134,11,5,217,23,196,2,21,3,317,32,365,0,341,291,59,12,51 }, +{ 0,9,28,35,68,1,65,67,101,39,69,175,16,238,13,22,96,124,18,24,251,30,55,12,23,2,50,141,114,5,154,103 }, +{ 23,33,77,13,117,40,11,102,64,4,51,403,153,453,10,0,196,134,128,65,12,291,86,99,95,59,15,141,202,180,137,719 }, +{ 214,90,289,6,874,64,25,65,235,42,751,249,256,312,194,85,746,875,174,32,525,288,519,835,247,348,233,544,217,524,437,352 }, +{ 1,22,2,0,36,67,28,5,49,95,12,50,168,83,105,55,7,9,14,194,103,23,114,21,584,46,10,13,38,69,208,159 }, +{ 269,141,13,202,33,180,318,77,291,137,102,352,128,23,349,51,31,217,372,317,125,197,44,21,11,5,901,1,18,0,4,494 }, +{ 435,144,274,88,203,418,30,1,190,410,96,778,100,530,521,326,466,795,686,166,960,321,382,264,367,822,131,31,692,9,213,93 }, +{ 76,72,90,21,37,179,12,205,32,428,148,38,308,405,4,413,57,184,749,245,316,221,54,645,288,1,152,155,464,257,2,14 }, +{ 77,33,64,102,13,141,23,2,40,1,51,10,0,115,6,180,202,128,4,3,177,269,15,7,22,165,291,14,217,318,137,11 }, +{ 397,81,4,32,65,788,693,804,681,11,249,21,91,64,690,494,3,0,422,56,348,725,194,123,23,59,523,319,61,510,95,90 }, +{ 60,126,16,7,92,121,314,246,35,107,150,132,14,146,24,18,199,298,232,71,359,140,672,97,392,649,5,423,95,21,22,388 }, +{ 15,515,141,217,115,700,13,23,120,317,753,180,33,260,110,137,341,51,1,365,4,77,64,202,0,40,36,352,197,269,10,21 }, +{ 111,134,117,474,23,13,961,12,569,431,37,15,51,115,515,700,277,99,753,38,197,405,457,4,72,94,629,45,11,89,54,148 }, +{ 23,13,51,5,1,15,2,21,12,202,141,0,515,165,120,32,4,64,700,3,115,197,269,125,753,7,9,128,6,180,453,403 }, +{ 13,141,4,23,5,2,115,217,202,51,180,137,269,352,77,1,317,3,21,318,0,15,9,64,10,197,11,341,33,515,752,7 }, +{ 165,125,197,13,391,21,23,558,48,380,97,120,298,33,14,426,66,115,32,386,900,180,6,98,357,237,326,509,51,278,221,457 }, +{ 120,82,15,260,515,1,351,77,450,700,13,21,141,23,753,202,217,93,110,33,51,854,5,128,326,102,137,180,817,48,269,352 }, +{ 23,13,15,51,515,700,961,753,0,457,1,2,4,115,10,453,569,5,33,165,11,719,14,40,64,197,3,21,474,629,38,401 }, +{ 264,166,39,30,9,100,435,254,93,921,190,363,1,625,411,382,897,656,203,478,404,812,438,110,473,88,18,691,156,141,274,272 }, +{ 9,0,252,100,166,39,101,265,364,68,88,329,520,18,419,676,118,167,404,604,16,1,21,30,212,158,553,49,382,274,48,13 }, +{ 15,515,700,753,4,11,141,5,3,13,202,1,180,21,2,165,269,23,40,64,0,318,12,32,128,51,77,117,523,197,120,457 }, +{ 24,1,2,69,35,16,67,18,14,50,0,46,68,9,38,7,133,71,83,149,28,108,189,218,65,114,238,29,75,54,5,96 }, +{ 90,289,214,64,874,13,77,712,66,751,4,23,51,192,32,0,202,194,312,177,33,65,234,104,875,288,59,5,835,416,102,95 }, +{ 0,9,49,127,98,31,301,28,371,159,1,395,512,737,158,761,916,623,16,44,242,39,170,18,293,105,24,272,101,22,23,385 }, +{ 17,62,136,214,123,129,32,292,119,209,710,106,141,162,128,64,45,4,77,249,11,618,211,3,207,130,519,183,38,177,21,269 }, +{ 5,107,581,356,279,32,441,362,493,660,13,298,0,534,49,147,21,22,132,121,97,423,7,590,259,683,14,786,126,508,60,246 }, +{ 51,13,15,730,453,23,515,719,386,457,12,700,403,475,899,1,6,523,753,421,99,401,165,33,2,19,361,5,0,670,120,27 }, +{ 49,28,9,159,272,22,254,131,158,327,95,105,0,39,35,168,347,286,374,55,65,627,424,912,68,578,1,24,239,175,688,169 }, +{ 15,515,700,33,753,4,77,141,341,317,1,10,13,180,102,22,40,117,115,365,5,901,23,197,134,11,217,351,64,82,21,137 }, +{ 134,15,13,515,23,700,12,753,51,474,37,961,197,10,457,569,4,0,99,2,115,38,165,153,94,3,139,11,1,82,33,5 }, +{ 7,2,20,58,5,14,128,66,6,29,32,43,21,52,16,38,631,61,74,97,46,135,113,25,202,192,13,0,884,45,112,87 }, +{ 77,13,33,202,23,128,102,4,141,342,117,0,269,318,134,22,11,21,32,153,403,291,49,64,137,51,40,15,494,5,196,98 }, +{ 2,1,14,6,46,38,29,65,5,36,67,0,103,7,22,86,133,50,108,208,52,83,24,323,283,69,28,18,10,25,23,75 }, +{ 15,515,700,753,1,5,4,2,3,13,0,11,180,341,12,33,10,197,134,365,77,23,21,901,6,117,165,7,37,32,17,102 }, +{ 203,268,206,93,417,940,31,8,120,137,44,499,959,473,202,692,728,559,0,260,10,326,141,564,817,127,341,1,450,22,110,23 }, +{ 15,82,515,120,700,0,10,753,33,8,64,165,110,31,260,93,13,197,23,22,40,4,351,44,77,9,11,153,102,51,1,196 }, +{ 60,0,16,7,14,43,20,71,28,10,2,22,154,18,13,24,92,1,51,576,35,615,805,925,68,126,124,149,97,64,23,55 }, +{ 19,6,26,80,5,84,27,17,25,2,504,129,45,240,56,123,4,119,618,1,76,106,64,51,14,3,128,65,32,710,0,42 }, +{ 15,515,700,753,13,4,77,23,33,51,0,5,8,10,11,31,44,1,82,22,202,64,110,102,93,21,291,40,141,180,9,49 }, +{ 195,98,271,223,132,167,146,407,1,360,121,834,393,591,212,199,293,259,522,107,354,147,156,191,807,590,48,18,125,16,765,541 }, +{ 128,202,77,210,402,318,33,102,6,40,403,29,342,269,196,757,99,139,2,111,42,4,494,117,275,300,13,12,678,0,177,122 }, +{ 13,33,23,40,51,102,4,117,77,64,134,0,128,153,202,196,453,11,15,12,1,22,403,141,59,14,10,475,515,65,700,95 }, +{ 7,16,14,24,18,2,28,0,92,71,1,22,6,35,60,20,168,10,154,118,5,302,124,69,97,109,703,158,420,12,149,66 }, +{ 15,1,515,23,0,13,700,2,51,753,180,5,120,165,197,21,115,4,33,9,141,7,12,6,3,457,386,202,260,523,8,31 }, +{ 60,107,121,132,146,126,199,279,150,92,16,649,441,35,955,7,21,0,423,5,18,195,598,298,493,356,32,653,22,362,953,10 }, +{ 31,44,98,276,284,299,116,935,9,201,0,131,39,127,144,662,1,137,371,492,567,489,93,254,49,268,22,28,30,293,434,737 }, +{ 13,15,23,515,700,0,1,51,753,4,2,10,77,202,5,115,3,165,197,457,9,12,11,961,33,120,22,141,180,7,6,40 }, +{ 123,162,184,257,17,183,229,130,129,3,84,136,99,152,556,383,57,497,12,205,4,62,56,452,80,266,128,14,40,119,27,106 }, +{ 196,33,117,40,153,23,134,13,51,102,453,0,15,475,12,14,515,2,22,700,4,21,753,64,401,670,730,1,9,11,10,99 }, +{ 224,219,187,131,258,385,442,871,836,31,98,908,44,574,127,944,137,839,116,36,613,1,254,39,926,160,829,96,93,371,860,827 }, +{ 121,195,156,132,146,360,590,407,786,522,883,591,259,929,626,941,150,687,5,55,296,379,467,178,586,465,279,21,1,13,60,354 }, +{ 2,1,14,29,6,5,46,52,38,19,114,75,26,65,108,96,25,50,36,70,103,309,17,236,218,74,12,86,0,3,10,112 }, +{ 15,515,82,700,120,753,10,0,8,197,260,165,351,64,13,110,117,93,31,1,9,33,22,23,457,44,450,77,102,898,40,49 }, +{ 7,66,97,2,172,74,226,52,29,135,192,232,43,324,92,5,38,20,222,14,6,568,87,107,353,620,580,16,138,174,448,32 }, +{ 62,129,123,162,136,249,618,183,507,57,4,152,17,59,11,184,117,77,3,128,211,41,130,205,12,40,33,106,64,229,38,313 }, +{ 1,13,15,2,4,515,23,0,3,115,700,5,51,77,341,141,753,180,33,217,197,202,901,6,21,165,11,365,318,317,10,102 }, +{ 6,26,235,19,145,47,112,78,64,27,453,95,29,444,25,624,85,108,648,70,32,130,74,42,711,630,632,138,65,122,113,730 }, +{ 23,51,12,15,13,99,515,153,117,10,700,37,120,82,165,2,753,64,128,0,403,3,5,1,134,197,453,31,202,457,110,21 }, +{ 16,24,18,71,64,35,92,7,246,146,9,108,60,118,199,5,140,2,267,0,230,830,32,133,1,68,50,330,247,563,36,12 }, +{ 15,515,700,753,0,1,13,2,23,3,4,217,51,5,115,8,9,180,341,10,7,6,317,77,33,372,901,197,365,11,120,165 }, +{ 234,639,178,202,77,142,5,455,450,49,416,0,147,427,198,21,315,329,13,318,325,557,120,344,113,259,22,128,61,105,23,494 }, +{ 1,31,36,44,141,180,55,2,64,22,98,116,13,352,0,115,10,127,5,164,253,498,237,165,341,197,4,86,15,170,125,23 }, +{ 15,120,13,141,23,260,217,515,1,77,51,110,180,700,317,82,269,137,115,202,21,753,64,5,351,291,0,450,352,93,36,326 }, +{ 26,6,112,396,19,145,25,122,648,287,42,74,624,222,416,45,138,66,644,151,113,651,29,573,64,280,445,27,525,85,70,58 }, +{ 156,360,5,146,121,21,271,522,354,132,49,13,18,195,16,340,60,591,446,586,727,0,107,407,167,48,1,463,199,566,32,23 }, +{ 5,61,49,147,178,612,660,120,21,182,23,427,259,683,33,4,77,70,13,3,376,98,64,0,481,344,48,595,291,263,141,51 }, +{ 89,79,468,179,358,205,94,405,115,498,72,180,365,431,37,111,341,734,188,317,482,217,11,4,245,152,413,216,12,474,490,752 }, +{ 24,16,35,68,18,71,7,92,0,108,9,14,118,101,336,175,375,302,28,124,154,55,149,60,398,1,65,2,140,273,345,230 }, +{ 51,730,421,801,453,386,23,523,13,475,719,401,670,365,899,403,115,457,758,165,33,494,450,6,423,805,629,56,569,514,958,388 }, +{ 113,45,6,311,29,2,151,614,145,491,112,80,5,27,61,74,315,66,209,631,19,25,58,17,73,26,1,243,70,64,611,287 }, +{ 4,339,188,471,11,59,79,12,377,94,99,33,77,102,51,111,37,152,13,961,474,542,40,342,3,23,128,403,202,177,184,57 }, +{ 15,4,515,11,700,33,82,40,0,120,753,10,8,110,13,93,23,165,77,260,64,31,22,51,44,102,351,1,125,9,197,21 }, +{ 16,24,18,0,35,68,28,71,124,118,60,7,9,55,14,92,109,101,419,175,22,252,154,375,149,302,158,346,2,49,1,126 }, +{ 17,45,227,21,106,3,2,243,209,5,48,32,221,62,207,50,29,186,290,270,263,52,14,496,400,119,46,255,54,430,38,721 }, +{ 340,354,586,658,156,195,698,668,1,296,9,18,883,363,447,379,303,98,411,13,31,163,51,5,371,48,919,846,121,21,360,70 }, +{ 277,153,111,12,23,51,474,99,38,37,139,117,41,457,79,453,542,13,11,33,134,157,629,188,961,14,196,401,102,569,15,94 }, +{ 0,18,16,159,49,24,9,105,35,68,7,28,22,1,60,344,55,101,109,2,14,158,13,23,71,118,455,286,272,424,5,327 }, +{ 0,105,9,49,16,18,158,28,518,24,101,320,1,68,170,301,272,127,7,286,35,890,109,39,159,98,21,344,31,55,371,23 }, +{ 141,1,180,15,13,2,365,217,515,352,317,115,341,0,4,5,269,700,23,21,3,752,197,77,753,51,31,901,10,202,8,64 }, +{ 4,23,51,33,19,17,102,153,485,880,40,403,196,26,300,453,27,117,78,0,12,200,47,5,11,14,342,99,53,77,475,2 }, +{ 62,184,56,440,130,229,183,3,556,152,99,162,12,266,17,548,136,57,305,161,123,14,452,4,383,403,257,34,40,84,33,139 }, +{ 13,23,77,141,64,202,33,51,269,115,0,102,21,4,217,128,5,32,318,137,291,9,15,2,180,10,3,317,177,515,7,6 }, +{ 1,22,36,105,170,0,86,2,31,28,239,64,55,5,10,98,9,44,127,95,654,67,301,143,13,12,49,23,320,141,83,21 }, +{ 15,515,700,753,0,1,13,2,23,901,5,8,51,82,9,180,457,4,7,12,3,6,10,120,341,141,22,898,197,351,115,260 }, +{ 1,39,274,98,100,265,190,30,438,310,166,223,88,96,909,31,264,625,530,9,382,812,21,252,593,0,254,539,44,131,23,778 }, +{ 18,212,167,118,363,1,447,411,146,60,271,16,781,121,647,9,621,562,21,478,664,68,815,5,354,98,48,101,24,446,777,463 }, +{ 24,28,22,0,7,1,2,16,14,65,35,49,158,95,109,159,55,105,10,18,124,9,67,5,239,149,12,289,108,68,21,424 }, +{ 105,22,131,272,286,98,55,239,1,31,320,9,127,327,36,185,28,374,86,219,0,64,187,44,578,164,224,913,535,115,601,13 }, +{ 22,31,28,301,127,98,44,0,105,1,512,395,9,293,109,299,95,338,239,125,242,116,36,320,55,841,900,685,599,23,13,763 }, +{ 2,1,58,29,5,14,52,46,186,334,45,155,151,50,400,75,38,69,502,61,48,227,223,7,163,17,262,67,549,21,70,113 }, +{ 7,107,135,232,97,14,2,92,66,16,172,192,278,387,298,356,38,35,448,52,46,43,60,29,20,126,324,526,357,359,64,5 }, +{ 20,43,104,426,173,7,560,414,707,784,319,81,0,861,422,819,38,74,715,52,376,97,879,32,330,22,49,64,66,95,192,526 }, +{ 104,74,636,66,204,0,355,81,222,25,29,319,145,784,20,65,90,4,174,194,7,64,6,746,138,173,750,715,91,43,192,32 }, +{ 0,9,101,35,68,39,65,28,252,124,67,154,364,336,100,166,30,1,289,55,149,346,16,114,158,88,439,24,429,22,570,194 }, +{ 57,14,4,231,236,585,176,59,369,23,361,13,719,51,300,342,12,457,56,3,62,38,202,401,34,46,2,322,11,215,210,507 }, +{ 1,2,15,3,141,0,515,5,33,700,13,64,77,180,6,128,753,10,4,269,102,202,11,7,134,197,352,120,117,318,12,291 }, +{ 5,1,21,202,13,32,48,23,0,61,259,22,494,120,70,49,51,18,137,128,465,12,178,115,2,453,403,141,58,3,90,450 }, +{ 141,205,4,72,59,79,245,11,352,94,152,76,247,216,21,188,452,217,497,12,89,37,111,339,588,77,64,875,864,115,358,464 }, +{ 15,515,700,753,0,1,2,13,5,4,23,3,8,341,365,51,115,10,120,457,6,141,77,197,31,7,165,9,202,450,961,260 }, +{ 5,2,50,14,58,38,171,46,29,1,45,186,17,52,155,218,48,281,61,487,54,36,67,21,328,334,151,227,760,114,400,133 }, +{ 457,120,70,125,318,64,23,48,795,291,202,761,751,415,77,846,269,758,21,237,96,260,391,165,87,1,128,5,221,13,137,763 }, +{ 13,23,51,33,4,40,117,102,453,64,153,196,0,77,15,11,12,475,1,65,134,10,515,22,21,14,700,59,403,141,2,753 }, +{ 229,152,57,266,452,381,432,12,313,184,99,471,17,4,62,339,157,3,129,59,128,11,369,37,77,38,40,123,5,497,188,257 }, +{ 49,28,109,22,159,9,272,95,105,131,55,35,254,168,39,327,169,0,1,286,175,374,347,158,420,67,36,194,312,424,627,346 }, +{ 5,2,61,29,45,58,80,311,1,17,209,227,52,243,106,869,454,151,592,496,48,334,14,155,6,186,46,171,75,21,255,667 }, +{ 244,44,110,141,260,30,269,352,839,131,574,228,373,276,1,406,219,717,217,137,253,224,120,93,36,31,567,116,661,187,341,88 }, +{ 12,99,79,139,11,453,196,51,277,474,111,23,542,37,94,188,33,13,401,775,40,961,313,102,4,339,153,485,629,134,300,431 }, +{ 16,35,9,0,68,24,149,69,67,18,1,114,65,230,71,7,103,133,50,167,212,118,101,191,140,64,399,28,124,283,55,565 }, +{ 88,30,274,435,131,613,190,100,93,829,166,1,187,795,530,127,382,957,960,160,31,137,466,264,39,800,406,254,28,473,521,219 }, +{ 167,16,18,118,212,24,60,71,101,68,191,9,375,411,363,35,0,1,589,199,302,21,447,55,146,126,92,271,647,121,562,48 }, +{ 64,141,86,177,77,128,147,597,304,95,269,102,275,4,352,49,120,5,372,194,465,13,588,237,947,216,202,180,612,751,107,534 }, +{ 18,65,90,403,523,289,240,214,194,102,701,475,202,217,283,862,389,51,33,0,494,421,453,817,84,64,847,899,352,13,23,437 }, +{ 13,51,23,202,5,12,21,128,15,115,0,1,141,120,64,32,4,2,515,403,165,457,3,10,700,99,453,318,719,450,308,401 }, +{ 98,223,393,31,1,271,834,791,167,44,202,64,93,697,5,116,77,125,450,446,212,18,541,293,51,120,195,132,284,13,807,765 }, +{ 15,515,700,753,4,11,23,13,40,51,82,165,0,110,93,33,141,64,120,5,10,77,3,102,180,32,202,125,8,197,31,21 }, +{ 15,515,700,753,0,1,13,2,901,23,5,341,3,51,82,8,4,180,961,9,115,10,12,6,898,7,351,141,134,22,31,120 }, +{ 234,416,77,5,315,639,325,202,147,198,113,49,450,61,455,142,0,21,22,342,329,494,178,58,102,427,318,230,13,120,43,470 }, +{ 60,146,16,18,156,126,121,271,199,360,132,24,167,0,640,10,71,522,21,92,5,340,107,354,118,150,22,195,446,35,28,212 }, +{ 4,361,11,14,56,368,377,161,27,12,300,77,59,200,17,554,202,33,40,494,495,21,210,80,757,25,128,23,19,38,444,53 }, +{ 141,82,217,351,15,352,120,1,180,260,515,64,854,36,700,317,752,372,13,269,77,753,922,21,349,23,202,110,93,137,51,373 }, +{ 15,515,700,753,77,13,0,1,23,33,102,2,51,4,3,5,291,217,10,9,450,120,341,7,317,6,11,117,115,8,260,180 }, +{ 15,515,120,13,700,23,77,141,1,260,0,753,180,51,137,202,115,365,110,291,217,5,128,9,21,341,197,269,2,450,317,165 }, +{ 174,6,348,85,138,74,280,204,66,233,192,355,289,65,81,580,636,353,25,91,104,343,673,214,64,95,42,712,792,32,194,90 }, +{ 152,497,452,59,4,216,11,79,94,77,128,188,269,339,588,33,76,529,318,32,141,471,12,202,111,21,5,51,37,90,72,177 }, +{ 417,499,10,141,253,244,110,559,8,564,180,260,728,120,352,638,642,341,951,206,143,752,901,93,137,661,922,373,44,31,811,197 }, +{ 13,77,23,33,4,51,0,102,128,59,141,40,64,115,177,10,137,22,202,2,7,11,90,1,117,180,269,14,49,6,134,3 }, +{ 1,2,22,0,36,5,67,50,14,28,12,86,38,46,83,168,194,65,103,114,49,7,10,95,21,69,23,24,128,51,55,13 }, +{ 17,106,119,207,255,306,742,378,84,62,136,45,3,5,240,80,61,56,209,383,311,790,655,32,2,440,76,151,58,29,179,263 }, +{ 3,128,1,141,2,202,33,5,64,15,0,515,102,13,269,10,700,180,134,51,120,6,77,318,23,137,17,117,753,197,82,153 }, +{ 514,38,377,328,11,57,41,248,880,266,556,4,152,361,471,757,485,403,305,102,3,211,313,99,457,130,12,14,157,40,23,54 }, +{ 68,0,167,101,9,118,264,520,16,18,21,478,562,1,124,212,100,936,664,777,191,88,806,154,48,24,759,604,35,252,265,65 }, +{ 230,689,699,213,466,352,217,831,30,443,418,144,854,201,840,855,1,251,203,317,530,957,96,93,822,539,36,752,351,137,83,800 }, +{ 33,77,102,117,15,82,13,134,23,64,0,515,120,153,51,4,40,128,700,260,202,141,196,22,753,11,351,10,1,326,95,269 }, +{ 11,40,33,51,117,13,542,328,14,134,38,153,23,12,485,231,102,54,775,37,3,377,111,139,211,4,457,403,369,475,99,719 }, +{ 33,64,77,128,141,2,1,202,102,13,23,117,0,15,3,153,51,134,10,40,6,5,515,269,137,180,318,165,700,7,196,753 }, +{ 15,515,700,753,4,1,5,11,13,21,33,180,93,141,64,2,23,77,82,3,0,102,32,40,352,341,10,197,98,110,117,901 }, +{ 1,2,14,67,50,46,38,24,103,83,0,5,36,28,29,133,114,96,65,52,18,75,54,108,22,7,238,58,160,9,361,69 }, +{ 258,201,276,137,160,860,116,261,295,843,567,144,131,44,187,268,943,219,284,31,202,935,141,98,662,203,127,96,36,93,224,1 }, +{ 7,2,14,16,46,87,75,52,92,278,29,38,140,70,1,5,35,294,24,262,135,69,171,172,58,409,112,60,50,66,97,12 }, +{ 13,23,0,2,51,1,33,4,115,10,15,141,77,3,5,180,217,515,9,7,64,11,700,6,102,40,197,22,317,753,165,202 }, +{ 74,145,6,66,25,204,42,29,222,337,138,26,7,525,192,174,746,287,544,135,415,2,609,632,112,64,87,0,85,45,712,396 }, +{ 77,33,102,15,217,13,23,141,202,515,51,700,291,4,269,753,317,180,21,64,318,115,128,0,275,2,352,196,3,5,137,11 }, +{ 187,219,258,871,44,442,160,574,137,224,908,116,839,131,36,926,276,201,93,228,202,860,31,613,144,531,406,1,902,30,190,318 }, +{ 1,372,141,5,21,77,225,744,96,30,23,349,13,291,269,284,69,442,459,144,303,839,217,622,160,330,260,48,120,410,189,352 }, +{ 66,222,2,74,29,87,135,6,7,145,52,25,294,337,226,172,138,331,42,70,97,112,26,1,632,192,43,5,415,609,461,353 }, +{ 45,17,106,209,5,2,21,29,48,207,3,186,243,155,255,263,454,119,400,496,270,14,290,62,425,1,171,32,659,52,38,56 }, +{ 93,88,141,120,30,213,260,373,100,717,459,82,110,1,166,450,180,321,217,372,36,269,131,225,22,352,326,466,473,187,244,410 }, +{ 266,57,152,381,313,471,12,229,99,369,339,62,157,3,4,37,77,38,188,17,11,162,40,184,129,59,475,775,128,452,403,453 }, +{ 217,352,317,141,752,15,180,515,372,365,700,341,753,349,77,21,291,1,115,244,64,120,13,98,269,82,5,498,864,351,23,144 }, +{ 14,514,369,102,403,377,51,719,880,153,23,13,457,11,485,4,401,12,328,453,33,40,117,57,629,38,730,236,134,670,361,961 }, +{ 107,7,172,14,92,135,2,359,60,314,46,16,126,278,232,150,279,32,38,392,298,5,35,97,24,192,259,288,330,52,356,312 }, +{ 0,4,25,13,59,90,65,23,26,19,18,12,5,216,91,51,389,33,77,11,22,85,27,81,21,177,746,45,42,194,37,123 }, +{ 5,49,315,202,416,77,455,639,450,21,197,137,350,13,408,0,329,318,494,344,61,402,64,509,347,120,113,48,95,713,308,401 }, +{ 130,47,381,390,59,90,200,214,289,6,65,472,29,64,874,648,50,751,624,26,52,32,4,194,875,714,85,249,247,33,881,19 }, +{ 51,23,453,13,719,12,457,165,37,730,99,4,386,197,401,17,11,2,3,15,5,961,475,6,515,64,54,700,32,115,0,403 }, +{ 15,515,1,13,700,2,23,0,753,5,3,180,51,4,165,12,141,21,197,457,7,115,6,9,352,10,120,202,8,341,11,77 }, +{ 0,9,1,67,35,28,68,16,24,65,18,69,50,114,103,12,22,13,5,101,2,96,23,83,149,21,39,55,7,175,433,124 }, +{ 28,105,22,0,1,320,170,9,49,301,109,95,127,31,98,55,65,35,2,24,168,159,36,713,16,740,13,338,21,44,512,23 }, +{ 13,77,4,51,23,33,102,202,128,59,40,0,64,141,117,403,115,11,15,318,153,269,22,515,475,134,10,494,177,1,90,210 }, +{ 13,23,0,51,77,33,2,141,4,10,1,64,115,102,3,6,22,15,217,11,180,7,40,515,165,202,177,9,269,128,700,5 }, +{ 456,116,492,8,949,268,867,391,203,51,499,13,719,386,31,791,457,918,125,10,23,93,479,685,417,0,22,338,506,551,870,730 }, +{ 17,237,45,180,106,62,32,64,115,41,136,498,255,21,197,129,241,13,3,227,23,352,165,752,350,365,449,155,4,546,476,38 }, +{ 1,15,180,515,0,2,341,700,901,352,4,141,13,3,752,5,753,217,317,115,365,23,197,21,51,165,31,6,269,202,77,7 }, +{ 205,141,216,269,497,4,588,76,59,152,128,452,79,77,875,11,72,94,188,217,352,12,247,37,90,64,32,1,474,23,947,372 }, +{ 64,247,217,237,317,180,752,115,349,141,498,13,437,304,23,372,352,164,579,291,33,864,177,197,0,490,72,10,482,77,269,51 }, +{ 2,1,0,13,15,141,3,77,5,515,64,33,23,180,6,700,4,117,217,7,10,11,102,165,753,197,115,134,40,352,12,269 }, +{ 11,40,38,328,33,542,12,313,41,339,23,157,377,117,369,51,471,99,775,485,13,305,457,57,14,475,37,248,4,54,188,719 }, +{ 33,77,102,40,13,23,0,51,4,128,64,202,117,141,22,196,153,10,134,15,59,269,1,137,65,11,403,318,453,86,515,177 }, +{ 472,80,34,250,495,161,17,14,469,176,128,4,389,106,283,436,216,527,3,297,483,177,53,56,231,194,119,84,719,57,255,59 }, +{ 317,352,180,141,217,752,115,341,365,244,1,269,202,901,253,15,21,498,372,4,137,515,13,2,700,318,5,197,23,143,753,349 }, +{ 9,39,101,18,265,100,333,520,252,16,0,329,593,1,553,364,68,167,310,30,121,254,118,158,363,166,60,604,272,24,286,404 }, +{ 15,515,1,180,700,901,0,2,753,341,752,4,3,13,115,365,317,5,23,197,141,217,165,352,6,22,36,9,137,51,7,10 }, +{ 131,39,9,829,166,613,578,827,1,30,716,254,100,98,31,224,0,406,228,310,616,219,44,846,127,190,938,96,265,371,856,438 }, +{ 17,64,62,106,141,751,136,292,32,129,352,41,38,476,86,128,214,237,5,177,123,209,217,45,269,954,162,710,180,3,90,4 }, +{ 25,42,235,65,650,736,605,6,630,85,123,343,233,256,26,122,63,389,141,249,416,444,368,194,19,108,138,174,90,0,544,511 }, +{ 184,229,152,57,266,432,497,452,17,381,619,257,313,12,4,205,59,3,99,471,157,128,5,129,339,369,77,11,32,45,202,2 }, +{ 137,202,160,860,141,30,93,567,36,276,295,261,131,39,9,964,201,843,1,98,800,318,116,22,943,187,10,219,206,44,269,535 }, +{ 0,493,125,64,49,9,279,10,35,18,93,55,293,31,14,13,194,165,325,48,22,132,21,107,98,389,44,581,342,259,174,137 }, +{ 15,515,700,753,4,33,13,77,23,5,51,32,102,40,93,11,349,141,21,8,82,202,64,31,110,10,117,0,1,44,3,318 }, +{ 110,253,854,811,352,141,244,951,180,642,661,384,498,143,752,317,911,10,269,206,559,351,261,120,902,533,922,959,365,160,332,217 }, +{ 2,29,70,1,75,52,6,220,26,112,145,331,74,163,19,69,38,324,46,58,14,5,25,21,278,223,50,307,66,7,67,409 }, +{ 13,23,77,33,51,4,64,141,115,102,0,2,128,177,40,11,202,10,6,180,7,15,269,1,32,217,59,22,291,3,137,515 }, +{ 340,897,691,478,658,264,914,382,100,812,363,1,724,156,166,698,88,521,39,404,682,447,296,96,303,411,30,909,9,274,656,772 }, +{ 9,18,310,101,265,159,326,120,105,158,33,363,77,195,51,55,13,39,354,132,23,7,28,639,16,137,98,1,252,272,709,49 }, +{ 57,313,471,12,99,369,157,339,266,152,38,37,475,453,328,775,11,40,59,188,77,514,401,403,342,4,139,33,377,51,229,14 }, +{ 16,7,24,14,35,140,60,92,18,69,71,2,189,1,46,230,108,388,150,38,21,172,278,67,246,267,50,309,236,135,451,0 }, +{ 206,417,93,940,959,473,499,203,8,137,559,728,31,202,44,120,450,141,10,260,116,564,22,326,269,318,268,244,0,1,253,638 }, +{ 15,515,700,753,1,0,13,2,23,4,3,51,5,217,7,77,341,115,8,9,10,33,6,180,317,349,291,120,11,165,457,901 }, +{ 1,2,5,14,48,21,290,32,50,45,38,46,263,207,155,72,76,29,17,408,425,171,89,52,7,0,292,449,3,227,513,428 }, +{ 121,132,354,167,271,223,146,98,18,463,1,668,446,195,407,60,212,447,781,48,360,363,411,522,156,393,807,9,21,16,293,13 }, +{ 131,578,105,371,219,224,716,616,187,49,9,254,737,159,385,98,258,127,272,761,0,916,623,910,28,286,39,31,22,518,924,242 }, +{ 302,467,97,6,273,1,24,484,124,51,36,18,2,398,453,421,523,69,7,23,13,403,386,150,66,0,298,65,426,165,22,158 }, +{ 30,190,530,88,1,100,778,539,625,274,382,410,96,731,960,39,795,321,9,131,264,144,840,748,44,166,669,957,36,31,435,228 }, +{ 141,1,2,128,64,33,15,202,3,0,180,5,13,77,515,134,269,102,197,700,10,137,318,6,120,165,753,352,4,82,23,117 }, +{ 44,201,567,116,131,224,295,662,489,268,219,31,434,144,187,276,110,384,93,261,699,137,36,442,120,1,613,30,228,64,141,244 }, +{ 12,15,51,23,515,37,99,13,700,0,10,117,753,38,165,82,134,120,11,453,197,64,115,569,1,629,401,22,457,474,110,153 }, +{ 7,135,2,92,172,14,66,140,38,52,97,46,29,74,16,324,278,226,6,87,1,571,262,5,357,232,35,380,69,314,24,330 }, +{ 125,386,23,963,949,60,51,391,165,221,13,197,118,21,719,193,541,421,517,150,393,7,401,453,308,5,791,551,326,558,48,173 }, +{ 6,85,42,25,138,222,174,235,280,256,525,289,26,214,64,746,90,32,544,65,204,19,66,337,355,95,348,415,74,29,5,312 }, +{ 1,14,5,50,2,67,24,0,46,69,48,21,58,103,16,12,18,38,54,96,83,7,502,45,36,181,35,9,430,28,10,155 }, +{ 811,351,642,180,951,752,110,638,253,10,82,352,197,341,365,564,499,854,873,55,9,417,282,901,244,22,559,143,206,141,28,898 }, +{ 23,13,51,15,12,453,403,165,4,515,115,719,475,457,700,523,2,21,0,99,202,197,14,5,386,753,128,401,37,308,33,117 }, +{ 120,13,23,77,141,1,15,93,217,82,260,51,137,202,110,515,21,180,165,5,128,102,64,351,291,700,269,352,326,203,177,0 }, +{ 1,5,0,22,12,2,36,21,10,23,86,13,28,51,9,128,48,14,32,50,7,3,96,137,54,4,202,49,37,65,208,323 }, +{ 219,98,23,127,301,51,258,308,170,910,13,165,22,105,293,616,125,242,276,401,201,395,964,115,55,284,31,374,327,206,512,900 }, +{ 64,180,80,165,5,237,2,250,34,58,297,61,197,17,22,29,186,498,231,445,247,3,752,311,95,32,483,153,27,45,115,469 }, +{ 13,77,23,33,0,2,1,64,141,51,102,10,15,3,115,40,180,6,515,128,7,22,269,202,4,217,700,5,177,117,14,165 }, +{ 15,120,51,515,13,450,23,700,202,153,196,753,260,64,128,141,730,4,326,386,21,523,33,318,5,457,95,32,403,1,77,269 }, +{ 2,1,5,29,32,45,207,263,14,425,58,72,76,21,7,408,48,46,52,186,17,292,38,6,61,89,476,50,155,720,119,3 }, +{ 15,515,700,753,4,13,11,5,1,23,33,21,3,141,32,2,40,180,117,64,269,202,102,197,0,165,120,51,341,352,153,12 }, +{ 76,5,214,129,2,123,45,710,17,249,618,460,179,32,1,257,205,519,90,207,245,184,162,61,769,209,292,106,6,29,14,128 }, +{ 1,15,23,13,120,141,51,515,202,21,700,165,0,180,137,2,5,77,128,93,753,260,269,197,326,33,110,352,82,102,318,48 }, +{ 7,2,135,14,29,87,66,52,97,172,70,112,5,58,46,337,92,16,20,43,1,38,232,155,74,294,6,461,409,151,262,32 }, +{ 574,187,384,926,860,110,258,434,269,531,141,244,160,261,253,116,699,959,940,717,533,36,219,31,902,661,871,295,201,352,10,260 }, +{ 156,354,296,1,182,586,64,379,340,937,850,698,31,48,98,44,120,18,163,23,30,658,195,125,77,284,223,291,774,481,96,39 }, +{ 250,80,34,472,17,495,176,469,33,194,64,483,4,297,141,14,161,27,53,667,56,833,73,527,585,231,106,51,84,814,2,59 }, +{ 97,7,81,140,66,92,172,192,24,298,43,6,74,69,314,426,462,14,501,16,21,508,60,189,267,232,230,104,48,20,135,330 }, +{ 31,44,116,144,268,393,492,434,367,489,127,98,918,0,384,9,22,206,948,105,93,203,1,456,332,940,299,28,137,49,293,125 }, +{ 15,128,33,3,13,51,141,1,202,64,23,2,515,120,102,0,5,82,10,700,165,197,269,153,403,110,753,137,196,318,117,12 }, +{ 31,98,127,9,0,105,22,28,44,512,293,395,299,1,242,49,685,763,320,599,125,116,109,276,284,95,870,159,23,456,36,900 }, +{ 7,24,124,1,6,97,2,69,14,18,23,92,21,67,66,16,5,484,43,20,118,65,36,22,28,0,51,140,13,71,29,150 }, +{ 1,64,442,303,284,349,202,141,622,67,154,447,260,44,652,429,9,335,237,919,197,98,167,33,682,269,547,77,863,411,340,201 }, +{ 1,15,2,141,515,0,700,13,3,180,10,753,5,64,77,33,4,6,7,197,102,269,165,23,134,11,352,341,291,349,22,120 }, +{ 99,139,12,453,196,277,775,40,475,33,23,401,215,51,11,14,77,111,313,130,38,211,37,266,129,15,339,153,719,3,369,515 }, +{ 33,77,102,4,23,128,13,141,202,64,51,0,40,59,269,115,117,137,153,1,318,11,10,177,15,134,22,90,196,2,403,32 }, +{ 7,2,14,58,70,112,16,5,87,38,46,52,6,128,135,1,32,21,155,29,66,64,0,97,92,186,172,294,13,23,20,37 }, +{ 15,13,515,1,700,2,23,0,753,5,3,4,51,10,341,115,365,180,11,33,317,77,6,7,217,12,197,165,117,9,64,102 }, +{ 2,1,14,29,75,69,67,6,52,46,38,24,103,220,83,25,70,87,262,74,96,267,50,366,26,16,226,394,357,66,108,19 }, +{ 9,105,18,39,1,0,16,557,101,272,252,890,326,49,265,21,137,100,23,938,13,310,159,5,31,24,254,51,30,128,202,132 }, +{ 80,209,45,61,667,17,6,106,5,2,151,29,483,255,454,833,27,311,112,19,738,378,1,58,113,26,25,469,119,887,32,64 }, +{ 13,23,51,15,5,1,515,0,21,2,12,141,700,165,202,115,753,32,180,4,3,197,10,120,457,9,269,128,64,341,7,33 }, +{ 99,12,453,277,139,157,369,474,339,51,38,23,37,196,188,401,775,111,11,313,328,475,153,266,4,471,79,40,33,629,102,14 }, +{ 7,92,16,232,97,140,126,14,60,107,66,35,298,387,314,104,246,462,441,150,0,38,24,2,172,357,230,330,5,633,22,289 }, +{ 13,77,23,202,318,141,33,4,51,269,102,177,115,403,137,2,40,494,90,11,342,128,31,117,21,32,7,12,64,134,14,10 }, +{ 13,2,0,23,141,1,77,3,180,33,6,64,15,10,115,51,4,5,217,197,7,165,515,102,22,11,700,269,40,352,177,14 }, +{ 15,515,700,753,4,11,1,93,13,5,180,110,82,21,120,23,2,33,10,141,3,165,197,102,901,0,32,341,117,40,153,12 }, +{ 15,515,700,753,1,13,0,2,23,4,77,51,3,5,341,291,7,33,6,115,10,9,8,217,11,177,120,180,102,165,197,365 }, +{ 20,43,198,325,173,904,104,234,66,147,77,319,416,422,97,426,5,0,7,450,861,202,712,725,2,32,639,376,38,324,945,315 }, +{ 105,0,9,28,49,301,170,1,127,159,22,16,31,98,512,623,24,109,158,395,35,68,371,65,713,55,2,242,293,21,44,18 }, +{ 213,88,689,466,230,30,321,435,699,352,217,201,795,831,144,854,1,443,96,539,530,840,418,251,855,190,93,100,669,31,957,662 }, +{ 130,453,47,196,4,57,14,59,236,711,51,153,730,77,412,381,23,202,108,128,361,13,283,117,11,719,200,46,34,78,210,2 }, +{ 1,2,5,14,0,50,36,22,38,46,65,67,12,86,114,28,103,29,208,7,10,128,21,83,218,23,96,54,194,6,133,51 }, +{ 6,26,74,19,165,453,14,730,1,125,197,50,29,51,138,357,13,2,108,391,70,719,46,457,47,500,386,262,112,23,235,52 }, +{ 9,10,376,20,43,0,49,18,30,120,2,33,325,104,501,470,77,788,725,102,523,39,858,5,904,414,174,55,137,37,342,13 }, +{ 15,515,700,753,0,1,13,23,51,77,120,202,341,82,5,4,9,260,2,137,141,128,115,351,901,8,180,10,197,21,450,33 }, +{ 105,131,272,578,9,49,371,219,159,616,286,320,224,187,716,98,28,22,0,623,127,258,910,737,385,31,239,347,254,109,424,95 }, +{ 457,51,13,23,961,12,719,99,453,15,4,515,165,401,629,3,700,11,17,14,2,37,753,41,57,569,38,45,0,33,5,32 }, +{ 202,120,5,33,318,77,450,102,1,260,403,128,494,21,165,13,269,12,326,23,342,523,402,2,817,64,15,141,125,82,457,475 }, +{ 141,269,352,217,180,64,349,137,202,160,317,15,372,515,700,752,318,753,244,13,437,291,165,864,22,237,5,82,954,21,77,418 }, +{ 70,29,2,145,74,112,26,6,75,52,19,66,632,1,87,220,5,135,163,287,307,25,226,7,58,396,294,278,113,409,69,151 }, +{ 82,351,317,15,752,180,898,352,141,901,515,341,10,700,365,1,753,498,0,217,253,115,55,854,33,5,143,32,21,160,36,197 }, +{ 39,9,310,254,0,30,101,49,252,272,100,265,105,455,159,557,190,333,286,688,18,166,1,158,709,16,625,627,31,131,327,329 }, +{ 2,58,29,5,1,151,186,52,70,45,7,549,14,75,112,400,113,155,61,46,227,163,311,315,66,6,307,27,17,220,287,74 }, +{ 141,217,13,21,352,23,269,77,180,115,317,64,202,15,349,137,5,51,165,291,318,752,372,4,0,102,33,365,197,32,341,125 }, +{ 68,35,0,9,65,101,149,124,24,154,175,16,28,7,67,1,18,189,114,398,55,14,345,39,118,133,69,2,230,429,71,283 }, +{ 66,7,29,2,112,52,20,43,97,151,74,192,135,5,173,525,337,45,145,58,415,25,14,32,644,70,544,226,222,21,6,580 }, +{ 31,125,44,22,116,299,242,55,1,170,64,36,479,870,456,685,10,599,558,0,268,506,28,740,23,903,492,164,393,206,2,86 }, +{ 188,11,79,12,99,377,94,33,542,339,40,474,111,37,4,51,102,453,139,775,13,475,23,961,277,471,134,57,431,266,115,117 }, +{ 658,698,340,98,296,303,1,31,850,363,156,919,44,774,586,385,120,77,82,10,223,30,354,291,23,914,478,87,260,163,48,13 }, +{ 15,515,700,753,82,4,1,13,901,33,197,11,5,10,23,165,2,0,180,3,21,77,51,120,365,115,217,40,117,102,32,401 }, +{ 15,515,700,753,4,11,5,13,1,141,3,180,23,202,21,2,269,64,165,33,40,32,0,318,120,128,12,197,117,352,51,17 }, +{ 91,6,233,85,370,718,81,65,25,256,63,343,42,74,235,123,138,511,397,249,26,194,650,355,64,87,544,18,90,643,66,214 }, +{ 23,13,202,51,21,120,1,5,141,128,450,64,318,403,15,137,260,33,12,48,32,31,125,494,269,102,165,515,77,2,197,14 }, +{ 180,317,365,341,752,217,115,352,901,482,372,498,1,141,15,253,515,244,2,700,0,21,13,82,23,4,579,351,753,291,269,77 }, +{ 13,115,197,341,9,352,468,237,64,498,23,165,22,509,901,546,482,180,28,569,317,51,365,873,391,95,86,217,49,837,752,706 }, +{ 13,23,51,1,141,5,165,202,21,120,64,125,180,15,2,33,197,115,128,32,260,269,12,82,4,515,137,7,318,93,0,700 }, +{ 214,289,90,174,874,6,138,280,65,81,64,85,355,751,194,233,312,348,835,91,0,32,343,636,249,29,875,288,519,104,247,74 }, +{ 15,515,700,753,4,5,11,13,1,33,23,21,2,3,102,32,141,77,180,117,31,64,0,40,134,196,120,352,12,44,197,6 }, +{ 33,15,13,515,117,23,700,217,134,753,0,51,153,77,141,2,4,64,196,1,3,180,10,115,5,102,6,11,22,202,165,7 }, +{ 15,515,700,753,33,4,77,102,1,40,13,117,11,115,134,5,21,153,23,217,3,32,2,317,120,196,180,141,51,12,59,260 }, +{ 15,515,700,753,13,0,1,23,2,217,51,3,4,5,8,317,115,9,341,10,202,180,6,365,7,82,457,22,120,901,33,291 }, +{ 7,2,135,20,97,14,66,52,337,673,192,29,43,355,353,5,16,294,107,376,147,226,331,560,64,470,222,104,415,32,4,324 }, +{ 195,132,142,167,146,77,363,271,121,354,202,120,647,178,786,212,687,0,101,878,16,522,60,5,450,411,35,55,98,639,259,318 }, +{ 202,77,20,0,318,66,104,128,102,269,177,43,33,7,216,291,494,5,2,342,74,173,97,112,450,22,337,10,234,52,64,678 }, +{ 107,362,612,356,359,97,414,43,259,20,392,7,298,147,819,683,465,173,729,660,319,14,5,779,581,595,246,35,501,92,0,230 }, +{ 6,165,14,453,13,51,19,23,386,457,74,391,308,2,26,401,47,758,603,108,719,366,1,29,309,730,324,197,133,70,115,867 }, +{ 179,72,205,180,247,245,4,490,352,59,317,152,79,498,94,217,148,76,752,864,11,216,141,405,89,452,197,111,497,188,37,21 }, +{ 107,7,298,314,14,359,32,392,232,279,172,97,60,581,387,126,121,0,534,493,356,92,441,95,13,21,35,147,22,5,16,362 }, +{ 156,271,354,586,360,132,591,195,121,18,340,1,5,13,21,48,668,446,23,463,296,658,60,55,407,698,146,70,626,51,163,24 }, +{ 13,23,51,4,0,12,457,15,11,453,2,515,5,1,99,10,115,165,700,475,401,403,3,961,40,14,37,753,719,32,64,569 }, +{ 48,125,21,165,13,221,23,763,423,508,197,5,98,92,193,16,441,386,64,314,293,457,391,140,49,60,102,693,683,51,35,867 }, +{ 202,77,120,450,5,318,1,494,0,195,18,132,523,403,326,604,354,260,121,576,203,167,234,817,682,49,35,615,21,20,13,102 }, +{ 39,9,166,30,0,101,158,68,404,190,333,274,252,310,88,100,49,28,344,35,21,22,419,131,438,1,16,65,530,694,124,10 }, +{ 15,515,700,753,110,4,1,11,165,180,93,13,82,5,2,197,33,120,0,3,10,23,21,115,901,217,341,77,317,51,32,117 }, +{ 2,29,1,14,6,52,5,46,50,26,70,19,103,58,38,67,96,262,516,309,218,133,108,27,75,17,112,114,24,487,331,83 }, +{ 120,77,15,13,1,141,260,23,515,217,110,51,137,700,317,202,165,291,180,21,753,128,0,177,326,93,450,82,64,269,197,5 }, +{ 255,59,554,297,183,56,33,444,108,358,123,196,269,122,77,153,57,177,117,730,19,467,605,130,128,50,275,4,291,475,134,133 }, +{ 13,23,51,12,153,14,117,120,165,134,99,401,38,453,15,128,197,719,64,515,475,403,37,33,196,700,40,125,5,0,54,2 }, +{ 64,33,174,348,95,108,467,554,56,0,25,306,233,6,63,511,343,120,13,85,29,561,543,707,319,180,899,355,77,49,256,18 }, +{ 120,260,51,23,77,15,202,1,93,82,141,450,13,326,515,137,21,5,64,33,110,700,128,165,318,203,269,102,351,753,197,125 }, +{ 15,515,700,753,4,13,11,1,5,21,23,2,33,64,3,180,32,141,22,102,77,0,10,93,82,352,117,40,341,31,165,6 }, +{ 15,515,700,753,341,13,23,141,33,1,0,217,4,77,180,10,82,351,51,137,5,64,9,317,21,11,102,40,260,202,854,115 }, +{ 105,272,131,22,327,286,28,239,320,9,109,578,219,49,98,224,95,159,538,371,616,127,187,64,713,55,0,170,168,258,716,623 }, +{ 16,18,68,35,24,60,71,118,92,126,0,9,101,191,7,55,154,175,212,14,167,150,302,28,375,1,107,124,346,273,21,108 }, +{ 20,147,43,470,376,142,904,178,427,798,0,595,198,325,858,319,61,202,173,97,5,422,14,22,107,259,32,49,887,77,414,392 }, +{ 13,23,51,12,33,15,99,64,128,515,453,202,117,153,37,102,700,40,134,196,120,0,2,753,141,14,38,3,82,403,77,21 }, +{ 383,17,62,136,84,119,56,440,3,504,240,80,378,129,123,548,106,128,4,11,14,555,162,32,184,361,59,64,205,5,469,57 }, +{ 70,1,48,652,5,638,846,888,21,349,269,260,340,562,767,761,163,883,774,141,125,518,591,0,23,9,87,13,371,303,622,31 }, +{ 66,135,6,97,74,278,69,7,14,324,267,172,2,140,462,1,357,38,808,550,92,841,189,29,16,25,298,87,75,204,24,335 }, +{ 51,23,33,13,102,40,12,128,64,77,10,202,0,196,117,4,14,99,134,453,65,153,11,475,139,403,22,141,86,2,21,15 }, +{ 88,100,264,166,274,435,772,1,382,921,96,478,30,438,639,909,897,521,190,466,960,410,9,144,530,418,31,329,265,691,778,93 }, +{ 62,440,136,56,84,3,504,548,555,383,4,17,129,128,507,361,123,59,119,162,14,57,152,328,161,11,202,495,184,27,80,215 }, +{ 911,617,332,959,206,141,253,244,282,384,110,120,10,260,352,143,951,811,269,373,160,417,93,531,728,203,434,940,137,55,36,717 }, +{ 120,15,260,141,77,1,515,82,700,351,33,23,450,13,110,326,64,217,269,753,203,137,102,5,165,21,51,291,93,177,373,128 }, +{ 15,515,700,753,0,1,2,23,13,51,5,9,82,901,180,8,3,4,120,6,7,141,93,12,197,341,10,33,115,730,64,125 }, +{ 7,104,97,107,356,232,66,560,298,289,14,707,38,568,359,64,20,0,65,324,22,214,92,32,192,5,387,43,712,90,172,95 }, +{ 6,1,2,66,67,14,74,24,108,29,69,83,458,7,25,38,135,103,36,150,451,114,52,594,75,65,380,18,267,602,19,278 }, +{ 13,23,51,12,115,21,202,5,457,15,4,1,64,719,0,403,2,3,453,165,99,141,401,128,32,515,10,37,523,197,120,700 }, +{ 57,59,4,11,412,381,77,53,421,291,250,368,99,14,27,369,803,283,23,108,403,19,339,210,0,401,12,444,236,40,361,736 }, +{ 15,515,700,1,0,753,2,13,23,5,51,180,3,115,6,7,457,4,9,8,12,82,197,165,141,901,120,719,33,64,21,22 }, +{ 64,95,180,247,929,146,90,126,197,32,237,60,288,165,316,92,5,13,77,7,217,955,522,22,16,314,132,4,317,10,312,86 }, +{ 15,1,120,13,23,515,0,51,700,180,141,2,5,202,21,260,753,165,137,33,77,110,197,128,326,7,450,4,102,9,269,12 }, +{ 14,2,16,46,1,7,24,69,75,35,38,50,29,220,52,140,267,67,18,54,70,309,5,60,92,189,171,87,71,163,58,0 }, +{ 31,98,127,44,9,299,0,276,293,284,116,49,935,599,105,22,456,201,28,1,39,125,242,137,371,144,131,492,159,272,51,395 }, +{ 6,27,151,53,573,445,297,113,26,73,436,19,491,250,396,315,45,112,145,58,614,881,25,34,611,200,17,80,70,5,138,631 }, +{ 32,693,81,788,90,804,403,56,494,21,84,397,202,65,18,77,64,681,214,725,523,784,526,33,102,825,240,0,115,241,817,91 }, +{ 24,7,14,2,18,16,65,0,108,149,28,69,1,71,154,36,124,35,67,140,189,429,92,68,66,22,55,118,302,150,9,6 }, +{ 0,68,9,35,65,101,189,212,114,67,124,69,1,154,149,39,230,64,252,16,88,702,103,100,18,336,28,329,520,83,30,755 }, +{ 5,2,186,29,61,45,17,1,52,48,58,171,155,227,80,209,311,21,14,46,50,106,243,513,334,502,496,38,3,6,32,592 }, +{ 15,515,700,753,13,1,2,0,3,4,5,23,341,11,10,33,6,51,165,117,153,7,180,12,365,901,77,569,197,115,64,9 }, +{ 13,15,23,515,0,51,1,700,4,2,753,10,3,5,12,77,33,961,165,457,197,11,115,9,22,102,40,403,202,21,14,59 }, +{ 15,515,700,753,13,0,1,23,2,33,102,5,4,10,9,3,51,115,77,7,6,341,12,11,217,40,457,196,180,165,8,523 }, +{ 166,39,30,274,190,100,333,438,530,310,88,252,0,9,539,265,1,656,404,101,625,131,778,254,31,455,676,329,724,158,21,23 }, +{ 734,148,94,308,431,115,37,89,111,413,79,468,197,629,341,474,569,12,13,873,179,401,11,4,180,23,205,72,59,365,134,51 }, +{ 539,228,224,219,816,190,30,258,871,840,669,93,406,530,957,187,160,531,748,137,131,88,863,36,728,839,44,213,352,116,202,466 }, +{ 393,791,125,801,730,551,386,23,31,175,93,98,51,13,144,788,126,203,21,345,116,22,949,110,575,165,326,44,0,4,60,221 }, +{ 13,23,77,141,0,4,51,2,33,115,64,1,10,3,6,15,11,102,7,217,180,40,515,22,128,177,202,9,700,269,165,5 }, +{ 2,29,7,70,52,14,1,58,112,46,75,5,171,163,87,220,307,151,186,334,38,66,155,16,69,135,278,45,262,97,6,21 }, +{ 88,321,213,100,230,435,689,466,1,382,30,352,217,699,410,96,795,36,921,752,190,141,144,180,44,831,317,83,443,31,840,251 }, +{ 363,411,101,520,354,9,195,668,132,156,447,1,905,364,18,23,765,664,146,5,360,13,121,96,98,31,252,39,100,759,264,551 }, +{ 13,23,51,730,12,719,453,457,401,475,5,21,403,2,0,1,15,4,3,899,99,32,165,11,515,308,197,115,6,961,700,523 }, +{ 72,76,89,12,37,4,308,179,38,528,90,431,54,205,148,184,401,57,152,474,23,59,51,245,428,11,32,99,405,316,257,21 }, +{ 376,20,43,147,470,173,97,595,107,319,414,142,819,5,729,178,858,7,427,32,426,104,14,0,392,362,259,61,230,77,560,246 }, +{ 202,141,269,494,318,137,51,128,403,4,217,96,77,5,64,177,291,180,15,352,102,10,33,349,2,317,0,341,120,515,21,453 }, +{ 77,202,33,128,102,318,494,269,13,0,117,23,342,291,403,15,134,51,153,141,177,515,82,137,196,700,203,64,22,351,753,4 }, +{ 253,110,951,352,499,811,10,854,180,638,244,559,642,752,564,8,141,143,417,341,901,260,206,197,922,661,93,15,498,373,165,911 }, +{ 141,13,23,180,4,217,5,1,269,317,21,0,2,202,115,51,352,77,3,197,64,341,318,15,291,9,137,93,32,165,515,33 }, +{ 9,0,18,252,16,101,68,39,24,118,35,109,158,329,28,167,60,364,333,265,49,100,22,419,553,55,1,677,71,7,212,159 }, +{ 28,109,9,39,0,158,49,22,168,35,55,175,1,65,67,185,194,159,289,95,272,114,30,105,86,584,36,169,254,2,83,24 }, +{ 15,515,13,700,1,753,2,23,0,3,4,5,33,341,11,51,6,10,197,115,901,180,77,40,102,12,365,165,141,217,7,317 }, +{ 173,693,104,422,5,18,61,32,102,0,20,13,784,560,33,66,397,526,49,207,29,25,510,707,65,6,11,344,21,263,81,77 }, +{ 23,13,386,51,308,801,719,221,401,949,21,730,165,421,102,115,125,33,341,670,468,117,770,1,120,6,197,14,403,97,67,958 }, +{ 0,49,105,16,28,24,159,9,158,320,1,68,35,239,170,18,109,7,55,65,2,95,301,124,347,14,21,154,22,127,286,31 }, +{ 2,5,1,207,45,29,32,58,76,61,6,263,292,655,72,14,17,476,7,119,52,306,70,64,21,90,186,214,106,38,3,790 }, +{ 21,6,125,49,13,64,715,66,115,95,197,33,22,32,204,165,56,278,0,408,241,120,4,808,681,350,263,85,81,571,135,509 }, +{ 612,427,325,107,202,5,376,49,64,392,403,470,21,147,31,788,494,14,362,465,858,98,20,804,518,43,845,318,125,97,725,534 }, +{ 32,21,76,72,2,1,14,5,241,449,89,38,350,221,155,48,50,292,37,46,45,90,270,54,17,179,214,12,148,430,476,413 }, +{ 24,0,28,16,7,124,35,154,14,149,65,18,9,68,55,108,175,71,2,1,22,109,92,67,484,336,118,69,302,398,570,420 }, +{ 1,5,14,2,48,50,38,67,46,21,0,54,45,270,281,12,24,32,155,96,513,103,290,83,61,58,36,17,37,72,69,181 }, +{ 13,961,569,197,37,15,23,474,515,94,148,111,12,165,629,341,700,79,901,401,51,405,753,10,134,4,115,734,873,11,89,117 }, +{ 33,23,102,51,13,40,77,128,64,202,141,15,4,12,0,1,2,117,22,11,10,403,153,515,99,318,137,269,139,196,700,134 }, +{ 0,1,24,67,9,16,18,35,28,69,103,50,5,2,65,12,83,68,7,96,14,22,21,149,75,114,13,133,23,71,218,54 }, +{ 384,617,940,332,855,911,206,959,434,282,141,10,93,253,244,110,144,268,120,36,352,137,417,203,116,31,44,269,160,201,143,951 }, +{ 30,93,473,137,31,704,450,652,190,203,800,254,166,274,326,144,269,160,127,303,120,625,88,848,110,435,77,521,349,131,340,744 }, +{ 53,27,73,26,19,250,297,200,25,630,17,6,611,122,34,42,714,235,472,65,436,14,80,684,690,106,45,113,680,108,64,4 }, +{ 15,515,1,2,700,0,753,3,5,141,180,4,13,77,33,10,217,6,7,134,11,352,197,64,165,341,317,23,12,115,102,40 }, +{ 254,530,39,613,688,221,30,31,438,190,228,960,1,44,141,21,180,406,23,166,9,202,13,96,137,48,131,829,317,269,393,51 }, +{ 9,39,28,35,30,166,158,36,0,175,101,346,364,67,49,68,168,420,88,1,194,131,100,352,55,83,190,64,137,570,86,65 }, +{ 62,56,3,548,555,507,440,161,34,4,215,136,162,514,361,527,17,14,211,130,328,11,383,123,84,183,38,57,184,152,205,494 }, +{ 92,126,107,7,356,493,97,279,359,298,16,246,35,60,14,441,362,121,43,423,5,132,392,20,508,230,199,146,232,173,150,414 }, +{ 15,82,141,515,291,922,349,700,217,260,372,120,351,93,77,753,318,352,373,854,1,326,269,21,13,102,144,202,64,23,203,137 }, +{ 141,217,352,115,180,13,269,317,752,77,23,21,341,197,5,372,244,291,9,64,51,102,4,1,365,2,165,33,3,48,237,351 }, +{ 78,47,390,19,130,453,108,27,711,813,730,444,412,283,196,690,123,14,128,26,250,389,650,236,200,65,51,4,34,183,297,73 }, +{ 34,250,297,80,472,64,495,17,311,3,148,45,667,61,176,53,243,27,90,161,469,141,483,151,62,128,29,4,58,56,5,231 }, +{ 51,23,33,13,551,77,102,326,421,21,523,120,5,899,453,692,202,153,308,615,115,958,450,401,791,68,221,93,475,18,403,4 }, +{ 98,223,393,363,411,1,478,834,664,156,284,691,447,791,914,293,354,724,697,9,807,541,759,51,18,421,48,264,948,586,195,848 }, +{ 7,14,107,232,16,92,2,60,46,5,359,121,24,526,220,620,135,1,172,21,126,314,132,77,18,75,32,278,12,23,52,38 }, +{ 32,76,2,1,21,72,241,14,5,48,292,89,476,45,720,270,179,90,17,214,148,38,50,29,129,155,350,46,290,227,123,464 }, +{ 15,515,700,753,13,23,33,77,51,4,102,0,32,202,1,11,128,82,117,141,40,5,110,8,3,90,137,21,10,318,403,165 }, +{ 66,6,69,2,1,74,14,135,278,267,380,24,29,97,67,38,103,75,7,388,324,25,52,150,87,83,189,357,335,108,204,172 }, +{ 152,4,339,59,79,471,188,11,77,94,128,33,529,377,12,111,102,202,452,402,216,99,13,542,51,40,474,37,64,291,23,961 }, +{ 15,515,700,753,1,0,196,13,33,2,77,5,23,102,3,10,9,7,217,4,6,153,117,177,14,457,115,12,40,730,11,134 }, +{ 17,209,45,106,207,5,255,119,62,2,61,3,263,742,306,655,425,378,32,56,29,136,84,80,311,58,186,240,243,383,14,21 }, +{ 120,260,450,15,1,23,817,13,515,523,326,5,700,51,82,31,202,64,21,753,318,93,32,269,98,33,351,77,102,125,457,165 }, +{ 116,492,268,93,23,206,203,0,551,918,13,51,8,22,417,940,120,10,499,31,949,791,125,523,165,473,341,730,421,959,401,391 }, +{ 15,515,700,753,165,13,0,1,197,23,4,82,120,2,180,12,260,719,8,3,386,117,5,523,901,11,341,51,10,9,141,351 }, +{ 14,24,69,7,2,66,108,1,67,6,36,398,18,267,150,97,29,38,83,149,65,74,28,0,189,71,388,16,273,124,46,22 }, +{ 330,96,523,335,367,662,141,839,1,922,372,615,244,717,269,443,418,352,403,692,217,854,752,180,36,64,498,576,349,201,98,284 }, +{ 184,90,257,205,245,229,57,152,769,17,524,5,32,497,45,432,619,2,452,266,4,106,1,21,179,59,76,3,460,292,381,128 }, +{ 7,14,16,2,46,5,70,107,87,13,58,307,92,32,38,23,202,0,172,24,18,21,60,128,77,35,20,10,9,4,171,112 }, +{ 7,66,140,16,14,92,97,69,267,172,189,24,380,2,35,60,298,451,230,135,314,74,150,71,38,357,6,330,67,423,21,443 }, +{ 121,167,354,132,18,446,147,101,212,146,407,16,55,35,647,191,20,271,199,68,60,259,463,107,9,126,363,7,195,43,14,411 }, +{ 76,90,179,32,205,21,184,460,257,288,45,245,316,5,57,152,241,2,358,1,229,72,524,148,48,769,17,4,12,38,14,720 }, +{ 147,259,178,878,427,465,581,198,786,798,142,534,325,929,20,362,35,132,107,376,43,5,279,77,49,146,70,202,590,771,33,14 }, +{ 473,93,450,778,141,30,855,466,144,203,330,530,88,523,459,372,201,617,839,704,254,321,934,326,39,36,82,717,332,213,559,403 }, +{ 523,475,51,899,730,453,23,719,403,33,457,13,421,386,4,120,117,196,102,153,15,801,450,817,515,260,202,11,700,99,165,125 }, +{ 15,1,13,515,0,2,700,5,23,753,4,3,341,317,10,115,180,11,33,64,217,77,117,165,197,7,6,365,9,141,102,134 }, +{ 19,4,119,40,33,202,27,84,102,56,77,73,504,485,26,494,757,63,862,59,23,300,25,12,128,11,5,13,342,880,469,6 }, +{ 32,20,2,13,5,21,23,6,12,38,43,29,64,7,95,51,61,207,48,147,90,178,17,182,49,0,115,202,52,362,37,22 }, +{ 339,188,11,79,4,94,377,12,99,111,542,102,37,33,474,51,471,40,453,152,77,13,59,403,342,23,117,57,475,134,128,38 }, +{ 34,128,283,176,495,231,318,432,503,275,529,527,161,53,3,202,56,291,585,469,73,17,14,412,57,27,80,245,250,381,402,51 }, +{ 15,515,13,700,1,217,141,120,23,180,753,115,365,51,317,341,77,260,0,291,110,137,202,5,21,269,64,36,349,2,4,10 }, +{ 13,15,961,515,700,753,4,12,2,457,3,11,197,51,37,569,115,23,5,0,99,10,1,134,6,111,165,33,72,40,38,79 }, +{ 15,515,700,753,13,1,0,2,23,33,5,3,10,4,9,115,7,102,6,51,12,217,77,11,40,457,569,341,117,317,14,719 }, +{ 5,76,2,32,292,214,45,1,129,519,123,179,90,710,17,29,460,72,14,207,21,249,58,205,464,263,618,48,6,245,3,257 }, +{ 72,76,32,4,21,12,38,23,99,54,89,3,14,17,51,57,11,90,13,488,179,2,59,148,45,37,5,115,401,1,10,421 }, +{ 98,223,393,1,834,264,284,791,724,293,478,772,697,909,363,682,905,447,541,821,411,51,421,9,807,48,765,31,730,96,386,410 }, +{ 341,13,509,8,23,638,165,901,762,10,569,242,391,197,873,642,506,499,629,961,15,180,116,456,206,546,417,1,338,457,515,867 }, +{ 1,2,5,50,14,38,46,114,0,36,29,22,218,65,86,96,137,21,133,285,12,10,323,181,17,58,51,23,67,7,28,6 }, +{ 481,878,202,13,5,23,182,32,269,21,1,318,77,142,557,494,141,33,640,137,70,291,2,51,260,415,929,403,120,58,4,259 }, +{ 15,515,700,753,1,4,13,0,2,5,341,3,11,180,134,12,10,317,197,365,33,21,23,165,117,6,77,7,217,37,32,498 }, +{ 25,119,19,6,26,42,27,17,4,790,45,814,2,469,483,84,122,1,0,33,32,128,76,80,611,113,73,56,5,240,202,77 }, +{ 14,2,7,1,24,0,65,6,16,69,67,22,124,28,108,5,18,36,86,10,38,46,66,398,289,168,12,83,21,23,610,13 }, +{ 51,23,128,13,15,202,12,120,33,64,141,82,10,515,0,403,700,3,1,99,117,269,153,165,753,5,318,197,102,260,2,137 }, +{ 16,35,24,0,9,18,7,1,68,69,50,71,103,65,67,189,133,23,28,13,60,537,149,335,75,21,64,5,114,2,12,14 }, +{ 754,803,133,576,880,543,2,1,657,50,14,38,46,5,29,67,218,36,58,171,52,96,24,103,775,0,114,83,181,54,65,45 }, +{ 21,32,5,3,2,17,14,72,76,1,12,23,38,51,4,54,10,0,89,13,99,137,45,36,421,115,543,11,22,128,221,48 }, +{ 434,384,268,144,855,940,617,206,332,116,93,911,959,282,203,137,141,489,44,120,10,110,244,36,98,31,269,253,367,417,160,9 }, +{ 15,2,1,0,13,515,5,700,3,23,180,217,141,10,753,4,117,6,77,33,64,7,11,197,352,317,341,134,165,115,12,9 }, +{ 2,113,6,25,1,0,29,4,7,833,5,45,32,61,128,19,77,151,74,145,64,42,14,210,655,106,59,177,27,17,21,738 }, +{ 116,268,918,203,551,31,8,692,206,791,403,499,417,93,940,421,0,23,22,120,13,523,44,51,299,473,959,1,10,475,202,125 }, +{ 107,126,132,612,362,279,20,146,259,493,199,121,590,43,660,147,35,376,939,60,941,534,683,5,0,953,16,7,49,649,595,470 }, +{ 15,515,700,753,13,1,0,23,2,33,77,4,3,51,5,102,115,10,9,341,6,7,11,342,217,12,120,180,40,317,141,8 }, +{ 53,27,17,161,469,378,73,527,19,136,383,250,495,56,862,26,62,84,80,106,200,4,34,14,440,297,3,128,585,5,129,123 }, +{ 17,45,209,106,5,207,243,454,119,255,2,263,186,290,29,3,21,62,425,61,84,32,58,56,48,408,655,136,306,14,742,227 }, +{ 4,152,59,452,128,79,216,11,339,471,529,188,94,77,202,12,291,33,318,377,99,51,23,5,402,349,32,474,102,13,205,111 }, +{ 15,515,700,753,1,0,2,13,3,5,23,4,180,51,115,9,6,12,7,8,197,33,10,961,901,77,141,752,110,22,120,341 }, +{ 951,752,638,811,351,642,180,253,10,341,197,901,110,873,8,244,15,352,165,898,143,515,564,762,499,55,365,700,82,753,141,854 }, +{ 6,262,197,350,74,26,115,509,841,583,165,38,21,13,47,50,235,19,33,324,453,4,308,196,138,99,64,903,675,1,223,130 }, +{ 125,165,391,23,386,221,21,13,558,457,51,867,197,115,401,758,77,97,308,791,7,180,48,120,963,451,743,89,603,134,403,450 }, +{ 1,14,2,5,16,46,7,38,58,24,50,0,69,48,35,67,54,18,12,75,21,45,513,155,430,37,270,9,61,163,223,32 }, +{ 23,13,51,0,12,15,4,1,115,2,515,453,10,457,5,3,202,21,165,700,403,11,37,64,77,401,9,197,753,59,475,99 }, +{ 129,84,17,56,27,495,19,548,80,123,162,378,3,504,161,469,618,73,40,53,4,26,205,184,106,183,62,6,257,128,862,12 }, +{ 28,9,22,49,109,1,67,0,39,55,168,158,83,36,35,86,420,194,185,159,95,105,69,208,272,103,50,114,2,254,169,30 }, +{ 242,391,8,456,116,13,23,492,341,165,867,51,499,457,479,638,338,509,719,10,1,642,417,762,401,93,206,268,901,569,22,197 }, +{ 211,162,248,130,57,4,41,556,507,266,183,152,305,361,11,129,62,229,38,471,514,313,157,300,377,3,440,128,123,328,339,59 }, +{ 7,92,97,16,298,140,60,126,14,35,279,314,232,246,43,230,508,173,71,107,423,24,150,779,20,189,66,18,607,21,0,653 }, +{ 15,515,700,753,1,0,2,13,23,5,3,180,51,901,6,4,7,12,9,115,8,457,165,82,120,197,10,64,141,341,22,117 }, +{ 0,18,403,25,523,74,6,24,42,91,22,102,13,51,49,193,475,681,95,85,730,64,899,397,273,750,247,673,32,805,757,288 }, +{ 56,0,18,65,33,554,84,343,64,6,90,561,22,19,899,108,27,63,289,475,240,467,370,32,233,214,24,123,95,287,28,194 }, +{ 31,98,127,9,0,44,293,105,395,299,49,242,28,22,599,116,1,284,276,125,456,685,763,159,272,623,23,935,393,144,201,137 }, +{ 1,5,2,14,38,46,50,48,21,7,58,45,270,61,155,171,0,290,69,32,29,54,67,16,24,666,663,17,37,75,502,52 }, +{ 23,51,13,453,457,12,719,4,15,99,401,2,961,3,11,730,475,515,0,1,165,115,629,700,14,17,403,40,5,33,37,64 }, +{ 968,967,966,965,964,963,962,961,960,959,958,957,956,955,954,953,952,951,950,949,948,947,946,945,944,943,942,941,940,939,938,937 }, +{ 2,1,14,29,67,103,6,46,52,75,24,133,38,218,83,309,36,108,70,114,96,5,238,74,25,26,220,236,65,50,69,87 }, +{ 7,71,16,92,24,60,14,97,150,140,35,189,149,298,18,230,43,508,2,423,69,0,38,314,66,279,399,517,251,20,232,273 }, +{ 23,1,120,51,13,202,77,141,260,21,15,5,128,82,2,450,269,165,102,318,48,32,137,515,125,64,12,115,351,180,33,7 }, +{ 77,13,33,23,64,51,4,102,141,128,40,1,2,202,0,6,177,115,137,15,59,10,11,7,269,22,515,180,318,3,700,95 }, +{ 101,9,18,363,264,520,411,604,676,682,905,271,16,821,167,0,621,364,39,100,121,118,166,781,647,252,1,848,447,265,404,60 }, +{ 144,203,326,382,166,418,93,88,96,822,1,141,859,77,744,438,110,269,921,367,521,274,100,39,494,120,403,473,217,576,13,291 }, +{ 13,21,180,125,5,23,191,32,18,16,146,199,115,24,165,118,0,225,22,1,60,197,64,901,375,241,48,12,408,71,522,818 }, +{ 15,515,700,753,13,0,23,8,1,51,82,102,2,33,4,9,180,165,5,77,10,110,12,197,120,260,18,326,351,403,22,457 }, +{ 33,77,102,64,13,23,128,51,141,202,1,40,0,2,117,10,15,4,6,318,269,134,22,515,180,115,177,153,137,196,3,700 }, +{ 174,544,104,525,74,0,151,25,6,624,29,66,2,636,81,45,204,177,64,416,7,644,5,138,222,319,355,77,22,122,789,216 }, +{ 141,304,372,352,291,947,177,269,128,954,77,349,217,202,64,318,498,437,102,864,86,13,115,180,137,5,210,197,32,950,678,7 }, +{ 161,200,53,17,714,27,34,73,472,62,585,56,440,383,136,78,527,19,4,3,106,361,14,250,80,514,377,84,322,390,862,548 }, +{ 32,76,72,21,38,14,89,54,12,37,2,241,5,428,17,1,181,221,350,45,3,4,449,90,148,179,99,292,794,770,477,46 }, +{ 33,23,128,64,141,13,77,51,102,202,2,15,1,3,40,10,5,153,269,515,165,0,117,196,180,318,6,700,137,134,120,22 }, +{ 96,137,30,0,9,39,840,202,669,406,141,530,613,1,180,88,22,160,679,576,28,403,31,219,49,228,829,100,36,15,10,856 }, +{ 180,141,352,1,15,752,115,0,217,365,2,515,13,901,341,317,23,4,197,700,269,5,3,31,753,244,21,165,253,202,51,44 }, +{ 1,2,67,0,28,50,83,65,14,46,103,114,24,38,36,9,69,5,18,7,22,133,55,218,16,124,29,54,96,160,12,480 }, +{ 180,115,352,317,365,217,752,901,141,15,341,1,515,253,700,0,753,873,2,197,31,137,165,244,4,120,160,44,98,5,202,3 }, +{ 5,32,347,49,13,21,95,713,23,1,77,33,60,64,107,4,126,928,296,850,0,241,197,102,652,195,180,534,165,153,379,10 }, +{ 341,180,365,901,317,115,15,752,515,700,217,873,753,82,0,110,197,141,951,165,1,564,13,351,253,12,10,3,2,4,308,244 }, +{ 17,45,21,3,106,5,155,38,227,32,2,209,62,54,12,243,14,181,552,587,46,540,207,794,37,48,430,119,255,221,770,29 }, +{ 16,24,35,18,7,0,50,1,9,14,75,69,2,5,12,21,60,13,67,71,23,48,10,108,223,181,189,103,46,64,92,51 }, +{ 127,13,98,165,308,23,286,293,258,51,219,395,197,115,301,401,31,391,22,105,457,170,239,276,55,338,629,116,180,479,509,569 }, +{ 539,213,748,840,957,669,30,466,88,217,144,251,863,190,137,93,230,228,679,352,317,203,617,321,258,530,160,219,96,831,816,689 }, +{ 5,48,1,21,2,14,0,36,12,38,32,54,430,181,50,270,72,99,281,45,17,10,46,22,37,218,67,3,290,76,23,51 }, +{ 13,23,0,4,33,51,2,115,141,1,77,217,180,10,9,317,3,102,11,5,15,197,7,202,22,165,40,64,515,6,341,31 }, +{ 13,15,117,515,23,12,37,134,165,700,38,54,457,753,51,64,153,197,14,10,33,82,961,0,99,89,115,719,141,3,4,1 }, +{ 5,21,2,3,1,32,14,12,48,17,0,10,51,23,38,22,4,72,13,54,36,45,137,76,99,114,86,37,11,64,540,430 }, +{ 202,128,77,318,291,33,269,102,275,141,494,342,40,678,0,177,20,210,402,7,4,5,137,6,13,450,403,32,49,120,23,22 }, +{ 1,2,24,14,67,46,69,50,38,103,16,18,75,35,83,29,52,96,5,108,0,7,54,71,149,394,236,309,70,133,220,58 }, +{ 15,515,1,700,0,2,753,13,23,5,180,3,51,4,165,457,12,197,115,6,7,21,9,141,8,901,33,82,120,77,10,110 }, +{ 0,28,65,14,67,2,124,24,1,9,7,69,55,154,36,16,46,114,175,35,83,22,429,18,109,149,68,189,108,336,251,133 }, +{ 56,162,403,3,129,775,99,161,17,40,527,33,880,4,14,128,475,12,548,23,102,202,361,117,34,184,383,200,183,196,64,53 }, +{ 151,2,29,58,112,45,186,113,5,70,52,1,311,6,315,66,61,7,74,27,631,17,80,87,287,243,209,227,14,491,19,869 }, +{ 6,1,74,2,75,29,25,66,26,70,52,138,67,324,357,42,19,220,14,85,87,108,38,451,309,103,24,69,380,135,114,65 }, +{ 15,515,700,13,23,0,1,120,753,51,180,2,260,202,5,141,77,102,9,450,115,21,197,165,7,137,110,33,12,269,901,4 }, +{ 5,45,17,2,14,46,48,38,181,50,155,3,186,54,61,29,21,227,281,80,540,106,12,400,52,1,58,32,328,171,209,487 }, +{ 16,18,265,121,158,35,60,9,39,7,329,105,252,68,24,1,132,167,159,22,0,49,286,101,21,146,23,327,120,709,5,14 }, +{ 108,467,283,56,389,650,123,412,33,177,899,475,216,453,269,349,619,65,51,730,403,670,23,196,523,128,84,13,401,789,503,543 }, +{ 514,3,11,377,328,4,361,507,57,403,14,880,130,485,176,215,236,38,152,102,211,56,62,757,54,585,300,556,34,555,40,229 }, +{ 3,555,62,266,130,99,507,139,514,12,152,229,215,305,57,40,440,33,403,471,38,56,475,14,361,313,775,328,196,548,123,23 }, +{ 120,202,318,15,77,13,1,450,33,269,515,260,5,128,494,51,23,700,102,141,40,753,326,403,817,137,523,21,177,922,342,7 }, +{ 15,1,515,23,0,13,700,2,51,753,180,5,165,21,197,12,3,120,115,4,141,6,9,7,457,33,386,202,82,8,31,341 }, +{ 15,180,515,82,351,700,10,317,753,115,217,365,141,898,33,901,13,23,110,854,752,77,1,197,4,341,143,36,64,352,102,9 }, +{ 104,289,66,707,214,90,712,64,97,173,20,0,414,194,874,43,32,7,568,560,65,38,426,312,715,192,376,74,835,5,324,147 }, +{ 84,56,0,554,63,65,453,249,123,643,18,26,847,475,511,403,416,561,524,289,370,73,9,19,45,42,719,194,27,467,33,730 }, +{ 21,346,13,350,308,826,197,101,352,68,570,0,165,23,9,841,115,100,509,694,221,230,35,217,569,88,124,749,1,777,212,154 }, +{ 16,92,7,24,60,18,35,140,126,14,50,71,46,330,2,75,246,5,121,267,571,1,230,309,220,0,9,64,146,236,54,108 }, +{ 82,15,515,898,365,700,180,33,341,753,77,901,10,115,55,351,21,5,1,4,13,102,36,217,2,165,752,120,197,117,11,317 }, +{ 16,24,35,18,69,71,140,1,103,7,189,68,0,50,9,108,2,133,60,267,230,46,149,67,167,118,92,14,75,21,191,38 }, +{ 60,71,16,18,7,20,43,118,35,68,375,28,608,0,175,566,154,92,14,149,628,33,22,13,2,10,279,23,107,356,55,117 }, +{ 187,258,871,295,201,434,219,224,489,384,268,110,261,839,44,699,93,116,36,131,141,228,144,160,940,567,244,406,137,574,98,253 }, +{ 66,7,97,172,192,712,232,324,204,74,43,448,387,426,568,20,526,107,104,135,356,729,173,0,22,5,32,95,2,64,500,560 }, +{ 15,515,700,753,1,4,0,341,13,3,134,2,5,33,11,77,12,10,23,197,365,901,7,40,217,32,21,6,51,180,961,37 }, +{ 0,28,24,9,35,65,16,124,68,55,109,154,7,39,22,149,158,14,175,1,49,252,18,71,2,168,289,419,108,420,67,101 }, +{ 7,16,14,92,2,46,140,24,220,35,38,60,75,1,50,18,87,54,5,126,29,52,278,262,314,107,71,21,172,135,330,394 }, +{ 7,92,16,14,172,126,2,60,140,35,135,314,278,46,24,38,232,107,330,66,5,18,150,246,230,97,52,1,121,563,279,21 }, +{ 6,26,235,53,297,436,27,19,25,73,113,445,90,214,65,42,64,289,250,611,624,32,45,648,614,17,85,491,34,122,200,416 }, +{ 352,141,1,217,854,752,351,180,244,36,110,661,82,258,816,160,295,219,567,224,230,269,922,144,260,268,93,201,137,116,489,202 }, +{ 16,60,35,18,126,107,68,191,92,121,7,14,598,20,493,279,167,446,118,0,28,43,463,55,24,212,375,566,9,150,575,21 }, +{ 15,1,515,2,4,13,0,700,3,5,23,753,341,77,51,115,33,11,180,10,197,141,6,165,7,901,102,40,9,202,217,12 }, +{ 23,51,13,202,21,5,1,120,15,137,128,125,32,2,12,141,33,165,64,515,403,318,700,48,180,7,6,450,115,523,475,260 }, +{ 131,716,224,371,219,187,737,616,385,254,9,98,105,924,31,258,836,39,127,578,49,916,44,761,272,137,944,159,0,242,442,22 }, +{ 15,515,700,1,753,2,5,0,4,13,3,180,11,141,197,10,341,217,33,134,165,6,77,7,317,12,352,64,365,32,102,40 }, +{ 66,74,7,173,174,29,192,2,222,20,226,43,353,52,712,6,0,138,500,204,97,145,64,104,426,673,355,90,25,5,65,87 }, +{ 5,259,786,534,590,493,279,49,13,581,465,21,929,35,941,132,147,32,23,612,362,626,107,121,178,0,146,61,48,939,10,18 }, +{ 2,14,16,7,278,69,135,140,46,24,267,35,92,38,1,189,29,52,309,60,66,75,71,172,74,357,18,87,67,6,230,5 }, +{ 165,13,308,197,391,23,401,15,51,457,180,509,115,569,3,629,961,719,34,758,317,734,14,29,46,2,17,901,38,453,5,217 }, +{ 1,22,2,14,0,28,7,168,67,49,65,24,36,95,5,105,55,35,12,46,69,16,114,159,194,50,10,9,158,83,164,109 }, +{ 34,453,3,196,130,14,322,11,47,51,377,236,361,4,730,153,514,711,57,440,62,17,161,108,176,59,485,56,162,412,202,117 }, +{ 18,16,21,23,48,13,24,35,121,5,156,60,51,1,7,132,141,221,163,115,0,271,447,340,363,202,125,71,2,781,22,698 }, +{ 165,13,457,23,197,961,629,569,341,41,12,38,401,901,54,51,115,17,15,509,421,37,62,45,719,57,32,328,117,758,157,99 }, +{ 2,1,77,141,33,64,3,102,0,23,13,5,128,10,6,15,180,202,269,40,51,515,7,165,137,117,318,4,700,153,197,352 }, +{ 68,212,0,124,101,9,154,16,562,191,21,149,65,24,35,1,118,167,818,350,520,100,722,841,264,71,13,302,478,23,375,346 }, +{ 98,23,48,598,13,293,541,21,125,121,51,807,0,31,35,259,126,7,386,1,223,783,10,107,199,20,221,144,342,963,49,64 }, +{ 21,13,5,586,1,23,167,48,33,781,647,49,165,18,51,271,77,32,761,118,0,82,391,22,146,141,459,31,197,156,115,4 }, +{ 2,1,5,61,29,7,58,45,14,6,425,32,70,52,290,738,207,21,72,112,66,76,655,17,186,46,64,263,38,0,128,87 }, +{ 39,265,9,100,1,333,363,101,18,411,447,254,166,310,31,98,264,30,639,404,156,286,16,93,593,203,272,682,0,905,44,821 }, +{ 6,2,1,19,29,51,26,108,25,74,5,23,14,114,13,386,133,103,42,66,453,70,309,138,719,324,65,38,64,96,52,75 }, +{ 20,43,356,107,49,858,595,7,414,359,0,5,392,319,97,612,422,819,14,376,173,246,22,470,147,427,230,92,197,33,683,95 }, +{ 0,9,68,35,65,67,114,101,28,1,124,175,336,69,154,103,83,24,189,133,39,16,50,7,2,149,55,251,18,345,230,36 }, +{ 23,13,51,15,0,1,515,115,165,2,5,12,700,202,4,21,141,457,753,197,10,3,180,120,32,9,318,11,453,64,6,269 }, +{ 121,195,60,16,126,107,98,271,146,407,132,35,1,167,199,223,493,191,279,20,18,5,43,7,21,92,48,393,0,362,212,467 }, +{ 31,44,299,116,393,144,492,456,268,22,105,0,367,918,384,434,127,489,98,9,963,125,242,948,1,28,206,49,36,51,93,293 }, +{ 23,13,457,51,165,401,719,758,197,453,961,629,308,14,15,12,730,3,386,569,391,29,739,515,34,828,832,901,115,514,670,341 }, +{ 105,36,131,22,180,115,341,127,169,1,9,31,64,98,44,365,317,141,272,143,160,55,219,86,197,776,239,187,0,535,13,752 }, +{ 1,6,2,14,66,25,29,5,108,67,65,114,19,38,26,52,74,7,24,18,69,86,36,388,64,51,17,83,23,46,42,75 }, +{ 51,386,23,453,719,13,730,6,457,670,758,19,401,165,2,475,47,26,899,14,108,17,1,5,197,29,894,754,236,74,27,285 }, +{ 252,18,9,101,121,16,132,0,419,167,364,60,604,35,265,363,146,271,39,158,68,109,28,329,848,24,647,907,682,159,212,55 }, +{ 283,503,128,432,26,193,63,269,789,529,102,122,389,275,678,6,25,318,445,4,342,27,573,605,177,862,643,291,216,57,235,59 }, +{ 2,1,29,75,69,52,14,6,46,74,87,7,220,226,278,38,135,66,267,70,16,262,25,24,380,324,357,140,67,394,97,222 }, +{ 97,298,69,7,66,140,189,24,16,267,172,423,60,150,14,314,92,71,81,501,43,35,74,6,517,232,149,607,83,330,18,2 }, +{ 475,421,403,899,51,805,523,958,453,817,23,615,401,801,120,326,202,670,494,730,450,386,115,629,260,576,77,365,569,0,165,13 }, +{ 7,20,14,128,77,97,112,202,2,177,16,415,269,318,275,66,107,43,141,414,135,38,307,10,58,0,6,291,32,5,4,40 }, +{ 24,14,7,0,2,1,22,28,16,65,168,124,35,67,108,109,18,49,10,149,69,158,5,95,289,12,55,6,36,71,46,21 }, +{ 26,80,27,73,122,25,19,17,6,42,684,209,445,573,667,106,45,690,4,611,255,680,297,495,65,59,128,119,483,113,64,53 }, +{ 107,259,362,376,465,20,470,147,595,534,612,683,660,43,5,49,581,0,858,35,427,246,97,786,178,356,14,21,142,878,7,279 }, +{ 131,30,228,190,856,406,224,88,219,530,863,613,778,274,944,816,187,39,100,160,258,31,44,93,1,321,539,36,871,137,435,531 }, +{ 113,6,311,25,45,491,80,611,27,26,209,667,17,73,122,42,684,396,19,85,106,5,614,4,2,255,151,29,1,64,648,61 }, +{ 15,515,700,753,0,1,23,51,120,2,13,82,5,260,9,4,341,77,180,115,141,10,7,12,450,8,202,901,197,351,165,93 }, +{ 219,127,98,258,395,421,924,293,242,201,697,105,276,51,308,23,453,272,401,944,512,137,13,31,284,567,386,365,116,131,964,125 }, +{ 15,180,352,141,515,752,217,82,1,317,854,700,351,753,115,341,110,13,260,120,21,36,33,898,23,10,5,365,4,160,901,137 }, +{ 129,123,17,257,162,184,205,249,183,769,5,80,3,4,229,130,119,45,90,99,618,106,57,497,12,128,2,84,59,152,27,40 }, +{ 33,102,23,77,64,128,51,13,0,202,10,141,40,15,1,22,117,137,2,86,4,403,269,153,515,196,65,11,700,115,99,5 }, +{ 7,14,2,16,172,107,46,92,5,135,35,202,294,87,38,232,29,97,20,21,24,1,60,220,66,43,12,0,126,52,54,70 }, +{ 403,576,615,523,475,326,805,817,494,421,51,202,120,450,137,453,23,859,260,401,402,77,33,670,0,958,15,197,386,515,165,480 }, +{ 141,352,217,137,0,180,202,349,9,269,23,51,115,291,77,372,13,317,120,752,365,351,93,22,2,341,64,10,82,854,28,18 }, +{ 1,23,13,51,202,141,5,165,21,15,120,180,64,2,197,125,33,102,12,7,137,515,48,128,269,318,93,700,0,403,9,4 }, +{ 25,151,6,145,122,29,174,45,113,74,4,665,42,138,2,614,416,287,19,348,746,0,66,26,1,7,64,243,311,396,81,624 }, +{ 30,190,254,166,100,382,731,829,88,131,264,795,9,93,625,274,438,1,578,613,716,31,44,39,530,36,616,921,265,203,160,77 }, +{ 132,5,21,13,1,23,32,195,379,687,156,121,626,296,48,70,850,146,51,82,883,771,35,49,652,407,60,4,260,0,845,33 }, +{ 9,254,0,49,272,131,39,159,688,101,105,578,518,158,286,28,327,333,68,224,252,219,344,16,22,1,716,31,30,228,24,890 }, +{ 16,7,35,60,18,20,14,68,9,0,28,118,43,92,126,55,107,2,101,154,24,71,5,202,121,109,22,252,21,97,1,621 }, +{ 15,515,700,753,13,1,341,2,0,4,3,5,11,23,10,33,117,12,901,197,6,134,77,8,165,317,21,365,217,7,17,40 }, +{ 78,19,444,47,26,390,27,453,130,813,108,730,711,65,412,122,51,680,113,235,690,196,630,283,128,236,14,64,73,53,200,445 }, +{ 2,7,29,5,61,6,45,1,66,113,112,14,52,315,738,128,32,151,74,16,20,64,70,21,592,0,25,4,425,43,491,222 }, +{ 145,112,74,66,6,29,26,70,19,396,25,87,2,287,135,151,138,222,5,226,42,122,7,307,1,644,45,58,113,651,635,632 }, +{ 92,16,7,60,126,24,140,35,14,232,18,121,246,71,46,267,172,150,107,314,132,146,230,2,278,108,330,199,236,5,38,572 }, +{ 13,115,197,538,569,341,98,55,165,127,365,762,219,286,844,23,170,206,734,638,535,901,169,253,629,0,873,509,180,10,332,258 }, +{ 58,151,74,53,287,27,29,396,6,70,2,73,5,52,112,26,651,1,297,113,17,75,19,45,334,445,145,34,315,549,436,331 }, +{ 214,289,90,874,104,751,64,65,312,835,204,249,750,194,74,81,875,32,519,288,348,0,174,247,636,715,138,192,784,6,524,280 }, +{ 9,39,28,35,30,0,166,49,1,175,439,158,64,346,36,101,67,364,86,88,274,100,168,55,23,10,420,22,190,141,505,180 }, +{ 341,901,15,515,700,753,1,365,10,0,569,180,2,197,115,31,165,3,5,4,44,22,317,13,9,951,23,253,116,143,762,93 }, +{ 120,202,77,450,260,15,128,318,102,515,494,13,817,700,269,5,403,51,1,33,23,753,82,326,141,342,291,137,21,523,351,32 }, +{ 13,115,241,64,180,32,125,197,165,4,118,22,21,23,16,247,237,28,225,191,95,141,167,5,0,341,288,35,459,18,177,24 }, +{ 16,24,35,14,1,2,7,69,18,46,60,50,267,140,71,189,108,38,75,92,0,5,9,230,67,21,309,335,54,236,394,220 }, +{ 15,515,700,753,898,180,901,341,197,638,10,165,33,1,115,4,77,365,317,13,102,217,117,0,5,2,253,3,82,569,21,752 }, +{ 193,523,18,84,56,730,233,65,4,817,90,33,643,403,91,511,453,240,59,11,214,51,719,196,153,475,32,123,64,847,102,561 }, +{ 112,29,151,2,74,6,66,7,222,145,287,45,5,624,52,25,113,416,58,122,19,70,186,204,4,87,644,549,337,884,32,0 }, +{ 13,0,23,2,1,15,33,3,77,515,141,5,4,217,10,51,64,180,700,115,6,117,11,7,753,40,102,165,197,22,317,153 }, +{ 28,0,1,67,65,9,2,114,83,69,103,50,36,22,55,24,46,14,124,109,35,7,16,38,133,160,389,323,18,12,154,5 }, +{ 121,132,18,167,271,146,101,363,621,9,411,647,16,354,520,60,212,932,1,806,55,0,195,446,68,35,31,364,777,252,407,118 }, +{ 26,6,85,396,122,624,25,19,42,445,64,648,573,416,174,680,665,214,45,348,90,65,194,145,113,881,138,289,112,436,297,544 }, +{ 16,146,18,92,24,199,60,71,121,126,35,108,156,953,271,674,132,7,32,640,360,246,649,118,21,95,5,517,14,9,1,314 }, +{ 51,13,23,453,475,730,719,15,457,403,64,115,33,95,4,523,3,12,21,6,899,102,5,128,401,202,11,141,308,515,22,125 }, +{ 151,396,6,53,27,113,58,26,73,112,74,287,45,29,297,19,145,70,138,445,315,436,34,2,17,573,5,61,549,491,1,80 }, +{ 223,1,888,774,260,98,269,385,349,202,96,141,421,622,730,863,318,697,87,453,393,418,922,834,751,5,163,335,120,291,352,30 }, +{ 16,60,92,35,126,121,7,150,246,18,107,1,598,24,167,195,14,97,71,279,98,441,191,199,517,146,356,223,298,271,230,0 }, +{ 22,1,105,28,239,170,0,55,95,31,36,301,2,320,98,127,9,49,44,64,35,67,10,86,5,12,109,23,168,13,21,312 }, +{ 2,6,5,207,292,76,1,119,45,32,17,29,61,306,790,58,240,106,14,64,214,151,476,710,7,72,84,128,4,179,70,25 }, +{ 51,23,221,254,115,13,438,530,125,48,21,39,541,960,386,49,1,613,15,840,228,308,627,131,688,401,5,326,421,158,165,83 }, +{ 1,5,2,0,12,22,21,36,10,14,48,86,23,13,32,54,3,4,28,65,51,50,137,37,208,114,9,38,17,7,281,202 }, +{ 363,23,447,182,296,340,1,93,698,478,379,156,284,144,18,269,21,98,141,70,668,411,664,658,110,914,67,937,180,691,335,291 }, +{ 17,32,45,498,41,115,180,197,106,62,54,38,546,165,13,155,468,509,341,243,241,217,542,15,57,536,428,51,117,721,292,129 }, +{ 32,95,64,246,22,92,180,13,5,652,125,241,638,237,7,49,4,126,21,115,197,296,888,316,0,165,774,23,16,392,1,534 }, +{ 15,515,700,753,33,341,13,217,4,141,77,23,180,317,1,10,102,351,82,115,40,5,854,21,137,11,352,901,365,117,197,0 }, +{ 15,120,1,82,93,217,515,260,77,141,13,110,700,351,352,23,180,753,21,854,202,317,64,349,269,51,165,137,5,128,291,36 }, +{ 13,23,51,141,77,0,33,4,115,64,2,10,102,202,217,128,1,177,269,11,7,22,6,21,32,9,180,40,15,3,165,318 }, +{ 478,264,1,520,98,724,9,682,223,664,21,759,13,772,604,100,23,363,411,48,821,5,0,905,909,447,31,265,88,101,166,39 }, +{ 20,29,7,2,77,416,6,128,33,5,0,113,104,32,43,13,491,66,23,21,102,51,74,210,202,525,64,318,10,81,174,14 }, +{ 2,1,5,14,7,58,61,29,45,290,46,38,52,21,32,270,6,592,425,0,75,155,16,48,17,50,72,70,207,24,263,663 }, +{ 80,6,17,209,106,26,483,113,19,469,255,25,378,27,495,833,45,64,161,2,61,667,76,742,32,90,445,5,814,65,887,119 }, +{ 98,223,393,1,354,834,195,791,447,697,284,293,360,541,781,156,51,807,18,664,421,411,163,668,48,31,591,765,883,386,948,23 }, +{ 679,141,816,36,93,406,876,144,228,137,1,180,669,21,332,251,5,269,116,187,96,351,202,752,317,64,203,831,574,466,855,345 }, +{ 15,515,700,13,1,753,2,0,23,341,3,5,4,10,51,11,33,165,6,7,115,197,12,64,180,153,217,77,9,569,901,317 }, +{ 13,23,202,51,5,21,403,15,120,64,1,450,128,141,12,523,33,165,494,125,2,515,269,7,48,102,318,95,260,180,453,197 }, +{ 16,18,24,60,71,92,146,246,199,35,140,7,9,118,121,108,167,230,126,132,0,640,156,14,68,133,267,360,649,271,64,55 }, +{ 269,141,678,177,202,77,128,318,33,947,40,120,291,349,102,137,64,352,210,864,461,498,13,342,196,23,275,450,954,0,205,111 }, +{ 16,24,92,18,71,60,35,7,108,191,167,246,140,14,126,21,1,68,150,118,149,388,399,9,273,0,121,796,230,48,212,517 }, +{ 2,14,1,29,46,75,52,70,69,171,38,7,58,163,16,5,24,220,67,112,223,54,50,409,155,35,267,186,151,334,394,140 }, +{ 9,252,100,265,166,39,88,404,329,0,1,520,382,812,101,593,264,274,604,676,30,118,68,553,18,664,363,23,639,865,21,411 }, +{ 16,18,35,24,0,60,158,7,22,68,14,49,109,159,55,9,28,71,2,10,5,105,1,118,329,13,344,23,92,20,21,126 }, +{ 15,13,515,700,23,0,753,1,51,2,4,10,77,5,3,197,115,165,961,202,9,457,180,12,141,22,33,120,6,11,318,31 }, +{ 160,93,251,137,317,1,180,36,120,217,345,752,617,352,332,10,96,531,498,318,365,202,141,269,816,341,901,679,143,35,83,968 }, +{ 6,25,42,128,19,59,122,4,85,26,611,27,269,233,45,0,343,91,318,80,11,177,283,73,33,614,2,77,64,138,445,216 }, +{ 95,64,74,7,32,81,51,204,0,20,237,65,56,38,91,23,207,180,347,343,29,6,511,52,49,10,25,18,554,370,14,312 }, +{ 202,120,326,260,450,817,494,318,137,403,128,77,523,553,859,5,704,1,15,23,13,576,7,16,615,51,682,291,515,0,21,234 }, +{ 20,43,107,356,362,126,595,92,359,7,422,319,493,16,858,5,392,246,414,683,60,0,35,945,441,21,259,819,49,97,279,173 }, +{ 25,42,6,77,33,102,0,122,4,690,29,483,210,27,21,19,2,300,18,648,680,119,117,59,1,10,342,12,26,153,91,684 }, +{ 31,44,299,116,125,242,456,599,22,393,0,144,492,28,268,1,9,963,301,105,367,36,127,170,384,434,206,98,918,10,13,93 }, +{ 410,521,686,367,662,88,335,321,201,96,98,772,144,1,934,921,443,435,284,274,264,551,120,897,44,100,33,225,744,418,909,960 }, +{ 142,178,878,234,132,786,195,202,77,416,147,929,146,522,167,259,687,639,450,271,626,481,590,5,198,212,771,49,0,465,315,427 }, +{ 254,39,131,9,272,0,578,716,310,224,30,49,105,827,518,829,166,333,616,228,613,846,101,219,1,31,890,98,159,938,252,100 }, +{ 230,699,854,473,450,351,831,137,855,217,352,704,800,202,251,498,160,144,206,203,317,201,253,752,418,141,1,332,82,180,443,36 }, +{ 403,202,475,453,494,23,51,77,318,402,13,33,128,102,137,141,120,342,269,0,450,4,899,576,40,421,275,117,217,177,196,64 }, +{ 23,44,98,182,291,144,116,39,110,141,96,82,905,70,367,264,125,93,77,411,120,1,658,202,100,415,107,363,197,30,447,105 }, +{ 15,515,1,13,700,23,77,120,0,753,51,180,202,141,260,5,21,115,2,137,128,9,450,197,365,269,12,326,110,102,318,7 }, +{ 0,32,18,95,207,577,193,29,61,104,64,784,715,102,693,887,81,91,583,671,403,5,52,474,397,180,138,49,37,344,38,263 }, +{ 0,101,9,68,252,16,100,39,166,364,124,24,154,265,212,88,18,35,329,419,28,118,71,30,65,158,191,55,1,694,21,676 }, +{ 16,24,191,18,35,71,167,118,149,68,212,9,0,1,21,108,101,92,60,375,302,7,589,755,124,674,350,48,562,246,13,363 }, +{ 2,14,69,24,1,67,46,16,38,103,29,267,7,35,189,135,278,71,108,18,83,309,52,6,149,388,75,236,60,0,150,66 }, +{ 13,1,23,0,4,2,51,15,180,33,3,115,5,515,141,10,77,700,11,9,197,341,202,165,217,102,22,7,753,317,365,6 }, +{ 447,1,698,411,31,363,98,5,919,4,156,125,759,691,13,64,459,354,44,21,48,293,30,914,478,225,82,120,2,922,848,839 }, +{ 854,82,351,217,141,180,352,15,515,752,1,700,317,898,753,244,10,21,922,115,77,36,4,260,64,110,372,13,5,365,120,11 }, +{ 0,1,4,13,5,2,82,33,3,120,10,23,9,11,77,260,21,102,8,31,40,6,351,51,64,450,22,117,93,110,7,457 }, +{ 51,13,403,23,12,475,1,2,21,5,453,523,115,202,817,7,0,99,3,6,450,120,494,64,22,95,49,899,10,37,32,141 }, +{ 180,752,352,141,498,864,317,217,9,0,115,237,230,39,30,197,83,1,930,64,35,365,372,13,579,88,702,36,101,901,482,21 }, +{ 16,24,0,7,22,18,28,35,14,158,71,2,109,60,1,168,49,154,124,68,10,55,92,118,159,9,5,747,95,105,65,6 }, +{ 15,515,700,753,0,23,1,13,341,51,120,77,141,4,137,33,260,82,202,9,180,5,351,2,291,10,11,901,21,115,40,128 }, +{ 24,68,35,149,18,16,0,7,9,14,189,108,69,65,67,1,71,2,118,28,140,101,114,336,230,124,175,133,46,55,251,154 }, +{ 13,308,197,115,125,9,165,237,391,23,509,569,546,28,49,629,22,338,317,254,749,180,468,159,903,386,217,352,558,39,36,734 }, +{ 64,90,32,217,77,4,141,216,172,312,128,13,86,33,597,147,352,95,5,115,875,22,59,11,102,14,182,437,97,177,874,707 }, +{ 1,22,36,0,105,28,2,67,95,49,55,5,239,12,86,9,83,170,312,64,31,21,23,10,164,50,114,159,208,13,7,320 }, +{ 9,18,16,0,159,105,101,252,49,310,24,68,35,39,265,272,7,1,60,28,455,890,329,557,118,286,55,137,327,167,5,13 }, +{ 129,123,214,249,618,17,5,257,205,184,460,76,2,162,769,245,90,106,128,45,119,1,183,4,3,12,179,64,6,229,99,209 }, +{ 51,453,719,457,23,13,730,899,475,386,4,15,11,12,670,196,515,523,961,401,153,3,700,99,753,117,403,32,120,165,57,0 }, +{ 173,66,192,204,20,74,104,636,7,43,289,426,825,712,560,214,81,750,65,97,707,0,90,414,64,348,32,500,22,861,95,6 }, +{ 13,23,1,51,5,21,141,120,202,15,165,2,515,180,12,125,0,64,82,700,197,269,32,48,260,128,115,93,9,137,33,753 }, +{ 200,34,322,78,472,390,27,714,19,14,136,161,453,176,236,444,59,3,62,128,108,57,283,862,73,53,47,17,412,813,4,56 }, +{ 33,347,66,204,426,498,56,172,97,95,5,681,546,22,10,0,135,180,4,241,19,174,6,353,263,21,7,370,42,197,27,808 }, +{ 131,224,219,187,385,371,258,442,254,737,31,98,836,127,924,944,44,871,908,716,39,827,201,574,116,137,36,1,276,242,578,616 }, +{ 1,15,2,180,4,141,13,515,0,5,3,115,700,901,341,23,217,352,753,51,197,77,317,33,365,752,165,21,6,7,269,93 }, +{ 370,91,718,74,81,510,397,66,636,240,355,84,138,511,18,278,6,681,701,289,90,5,214,582,64,104,0,643,192,65,750,32 }, +{ 15,515,700,753,13,120,1,0,165,2,197,23,260,180,4,82,51,386,8,5,12,10,3,141,351,341,326,9,450,7,64,6 }, +{ 32,2,76,5,1,292,72,45,476,214,21,241,29,14,17,48,129,90,179,460,464,123,290,148,519,205,3,263,249,38,710,89 }, +{ 13,165,115,17,197,569,23,509,457,45,32,41,106,180,62,38,659,734,155,536,341,629,961,873,587,54,431,37,391,99,405,428 }, +{ 68,24,35,16,0,101,9,124,154,71,149,65,18,175,28,118,7,55,302,108,92,14,22,346,1,39,429,252,375,364,10,67 }, +{ 0,9,1,68,230,65,35,69,83,23,101,13,141,67,217,352,21,39,16,88,28,124,212,100,115,154,51,64,30,36,10,317 }, +{ 100,265,88,909,410,382,812,593,1,213,321,30,252,230,352,264,9,166,689,39,676,98,21,466,724,639,478,217,13,48,553,101 }, +{ 113,61,198,904,43,0,5,37,899,325,20,59,33,523,204,725,817,389,470,329,222,40,174,58,22,453,690,848,122,104,788,105 }, +{ 16,24,141,18,7,0,71,140,35,269,75,352,12,9,108,217,5,330,60,64,199,70,22,13,486,246,318,133,65,50,23,498 }, +{ 271,167,121,60,18,191,146,199,16,1,446,132,575,212,463,354,126,35,598,566,727,98,107,21,608,955,640,407,5,24,223,68 }, +{ 15,515,115,217,700,13,317,753,141,180,33,23,110,120,4,341,82,10,1,260,365,36,64,854,351,21,51,352,137,77,40,0 }, +{ 173,7,97,356,43,107,20,387,729,104,426,232,560,595,359,392,414,707,885,81,5,0,66,858,612,49,861,14,22,32,819,230 }, +{ 15,515,700,753,13,23,51,82,0,33,165,120,196,4,1,2,197,453,260,351,180,12,40,8,386,110,5,326,9,141,217,457 }, +{ 9,100,120,30,77,795,137,82,202,39,264,827,578,127,0,166,373,318,18,326,141,260,1,450,731,31,33,395,217,291,341,254 }, +{ 14,2,67,1,24,69,0,28,65,7,46,18,114,108,36,83,38,398,9,16,124,133,103,154,50,55,22,267,29,160,35,547 }, +{ 14,7,69,24,66,16,2,267,189,67,71,150,140,97,18,60,172,35,6,1,38,149,388,92,83,135,108,74,462,380,29,36 }, +{ 15,515,700,753,1,13,2,0,4,341,5,3,23,365,11,117,180,10,12,33,134,115,77,197,217,165,6,7,317,102,21,9 }, +{ 1,22,0,12,5,2,36,28,21,10,86,13,23,49,128,9,95,51,55,96,208,141,48,202,4,137,37,64,105,3,50,7 }, +{ 605,630,63,123,736,650,65,108,444,368,561,389,19,25,42,619,122,194,183,27,53,33,84,26,297,813,114,73,256,235,249,216 }, +{ 127,39,9,0,31,371,98,254,1,385,395,44,30,836,187,131,100,116,284,578,299,166,28,21,737,16,276,272,23,49,137,935 }, +{ 2,61,6,29,45,151,1,655,7,207,32,5,112,425,17,76,833,4,14,64,58,106,119,25,113,128,72,52,70,21,292,790 }, +{ 2,5,17,14,3,29,23,27,13,401,46,6,51,58,1,453,45,53,34,52,133,19,236,26,181,114,99,366,151,108,218,38 }, +{ 31,22,170,1,301,44,127,98,36,28,55,105,239,0,338,116,512,299,293,125,86,10,242,395,2,13,9,64,841,23,95,685 }, +{ 1,21,2,14,5,32,48,50,38,270,46,76,290,72,45,54,17,0,155,221,263,207,37,281,430,3,89,12,181,408,36,67 }, +{ 17,106,119,378,84,240,62,80,383,136,306,3,56,790,742,5,207,504,64,440,32,128,45,2,123,209,14,4,61,57,297,667 }, +{ 13,15,1,515,23,2,0,700,4,115,3,51,5,10,753,180,33,341,217,11,165,317,365,197,6,77,40,64,22,9,7,117 }, +{ 772,335,96,744,1,367,662,686,652,897,303,264,521,31,225,410,141,520,260,116,64,44,321,98,144,88,919,966,340,269,349,284 }, +{ 1,0,5,28,36,2,12,22,83,67,65,50,24,14,9,96,21,218,18,114,48,281,54,10,7,160,181,103,37,23,133,99 }, +{ 25,6,145,42,138,81,174,348,525,544,26,74,85,280,287,648,746,91,66,0,29,396,204,64,636,90,122,194,355,104,65,233 }, +{ 16,24,0,18,28,158,7,35,49,22,68,159,55,1,14,109,105,2,9,71,65,154,124,95,424,344,60,239,118,577,21,10 }, +{ 352,854,699,230,93,689,137,144,217,160,251,36,669,202,351,120,617,855,752,203,332,82,450,180,141,748,831,30,258,201,1,816 }, +{ 15,13,515,700,1,2,0,753,23,5,4,3,51,33,10,115,11,317,217,77,180,341,117,165,6,134,197,153,64,9,102,7 }, +{ 104,20,43,173,66,319,0,77,202,7,198,5,97,580,355,74,2,204,174,52,712,234,426,155,102,192,32,4,500,337,226,904 }, +{ 13,23,15,51,1,515,0,2,5,700,141,4,753,165,115,12,3,21,457,10,180,269,32,6,197,202,9,7,120,11,77,33 }, +{ 15,515,700,753,0,1,13,2,23,115,4,317,8,3,5,51,9,341,10,217,22,365,33,457,6,180,77,901,197,120,18,7 }, +{ 15,515,700,753,1,0,2,3,4,13,5,141,23,16,82,217,457,10,365,180,9,317,51,21,269,898,64,202,11,12,318,341 }, +{ 2,5,1,14,50,38,29,17,114,46,133,3,45,21,58,171,181,36,218,12,6,52,0,48,137,65,361,23,155,4,285,51 }, +{ 0,68,9,65,101,124,35,212,16,149,154,100,24,1,114,336,67,589,252,39,71,189,69,562,18,13,30,398,118,88,265,264 }, +{ 1,2,14,22,0,7,67,65,28,36,24,46,168,5,86,69,38,16,49,12,289,10,194,50,83,114,95,6,18,23,55,158 }, +{ 13,23,15,51,515,0,700,4,1,753,2,10,115,961,457,12,33,11,3,5,197,9,165,77,102,403,453,40,64,22,37,59 }, +{ 15,515,700,753,0,1,2,13,23,5,51,901,8,9,180,3,7,82,4,120,12,10,719,341,6,31,141,457,197,22,115,93 }, +{ 100,252,88,101,0,265,9,724,48,1,21,352,213,676,410,382,321,230,30,329,593,909,39,812,553,217,23,689,520,264,166,419 }, +{ 2,5,1,58,171,14,46,50,29,52,45,38,186,155,67,54,151,281,334,61,48,96,17,181,103,400,502,227,21,223,12,69 }, +{ 23,120,13,1,202,141,51,21,165,128,260,15,5,269,137,64,33,180,82,318,93,197,77,326,515,125,110,700,450,2,32,48 }, +{ 341,197,10,901,13,15,8,638,569,515,479,23,180,873,700,165,143,642,0,961,753,951,1,115,509,499,116,12,498,242,82,206 }, +{ 15,1,23,13,515,21,120,51,2,141,202,700,5,180,165,0,753,197,12,7,33,260,352,137,269,4,82,128,48,9,110,6 }, +{ 2,29,50,58,1,6,5,52,14,262,17,46,27,53,151,34,171,74,324,26,38,309,45,113,19,96,287,396,223,67,73,583 }, +{ 13,23,141,51,4,202,0,115,77,2,33,217,5,317,180,64,10,269,3,9,15,21,1,128,102,137,318,11,352,515,22,31 }, +{ 1,67,0,24,50,5,14,18,16,69,2,9,103,35,83,12,96,28,54,7,58,223,21,46,281,48,65,181,22,38,36,108 }, +{ 13,23,141,51,77,64,202,115,33,102,128,4,0,269,10,21,217,32,180,318,9,137,2,11,22,291,7,177,16,31,165,197 }, +{ 317,115,180,365,873,498,217,341,13,752,482,197,569,352,1,901,36,23,457,468,165,346,546,143,509,134,579,876,868,2,332,21 }, +{ 184,257,205,229,152,17,57,497,266,432,452,524,5,619,381,32,4,90,2,12,313,128,45,59,245,106,3,471,129,769,339,214 }, +{ 13,23,1,0,15,2,4,515,51,3,10,33,5,700,115,180,753,77,11,365,341,217,9,6,197,7,102,165,317,40,22,64 }, +{ 626,70,771,687,379,846,767,761,518,878,82,481,31,786,49,591,178,163,407,44,87,13,845,125,590,371,195,120,98,557,937,351 }, +{ 264,1,410,909,772,897,686,521,335,478,98,96,691,639,100,44,284,382,31,321,744,88,914,724,662,765,223,9,682,363,0,367 }, +{ 13,23,1,2,0,15,51,515,5,10,4,33,115,77,180,700,3,141,217,40,6,753,317,197,64,165,7,11,102,9,341,22 }, +{ 141,77,13,64,269,23,115,21,318,217,5,202,102,33,137,2,15,291,177,51,48,180,32,4,515,352,128,7,0,10,96,11 }, +{ 13,15,23,515,51,0,700,753,1,2,4,10,33,11,961,453,115,40,457,14,12,3,9,5,165,401,197,77,22,21,64,102 }, +{ 1,22,0,36,2,31,5,12,13,105,28,9,49,86,141,21,23,95,128,55,44,115,170,10,164,98,180,4,137,239,83,51 }, +{ 100,101,88,0,252,9,265,30,21,39,759,724,213,329,321,13,419,68,562,382,676,352,694,35,553,410,1,166,909,593,230,23 }, +{ 539,88,30,190,321,530,840,144,669,435,957,748,778,100,96,418,203,213,1,131,410,228,466,274,36,382,219,863,613,83,822,352 }, +{ 7,97,92,173,298,107,43,314,232,140,16,356,20,387,729,362,126,359,246,14,230,501,426,441,0,5,560,66,104,779,35,60 }, +{ 2,14,7,1,58,5,46,16,38,70,75,45,24,155,29,0,21,52,61,163,220,50,69,270,35,48,32,171,18,6,64,54 }, +{ 447,411,363,664,647,98,621,1,354,271,223,478,18,777,781,936,360,759,167,132,121,48,21,156,9,195,118,293,23,691,13,264 }, +{ 2,1,14,65,36,67,0,7,46,22,69,5,38,24,28,6,83,29,86,114,168,50,124,208,12,18,108,10,194,484,103,16 }, +{ 421,386,51,791,730,958,165,801,23,453,697,403,615,13,221,523,24,899,401,326,551,670,576,102,18,33,125,77,566,115,203,197 }, +{ 104,319,422,945,0,81,20,43,715,32,784,693,879,7,397,74,306,207,52,681,671,2,61,173,6,636,904,95,887,5,18,192 }, +{ 2,29,1,46,14,52,70,262,6,26,50,67,75,96,309,38,103,112,58,19,5,163,145,83,74,220,223,357,24,69,331,25 }, +{ 786,929,590,771,687,626,941,178,465,259,70,5,13,21,35,534,107,518,132,49,878,48,146,121,379,279,31,767,147,195,108,125 }, +{ 5,2,186,45,17,29,48,50,14,61,46,155,400,1,227,171,52,58,38,54,430,209,80,281,3,106,536,311,181,243,21,502 }, +{ 0,32,64,95,817,494,342,403,207,202,194,389,453,365,312,180,316,5,690,237,848,577,450,61,102,523,475,289,49,241,65,482 }, +{ 23,13,51,15,202,515,1,0,2,12,115,4,700,5,165,753,77,457,21,141,128,10,3,64,403,32,197,318,9,11,33,117 }, +{ 119,2,6,76,5,17,45,292,306,240,32,1,19,84,64,61,4,209,710,80,26,0,106,27,214,25,128,129,29,179,3,113 }, +{ 15,515,700,1,753,0,13,23,180,120,51,2,5,33,165,197,9,450,7,260,115,523,4,12,202,141,82,77,21,102,8,6 }, +{ 6,138,74,280,222,85,66,226,25,42,87,204,64,337,29,135,95,174,235,26,145,65,19,32,792,294,112,52,256,2,5,22 }, +{ 39,9,0,101,333,158,49,252,310,254,272,68,16,18,159,286,344,455,30,109,627,327,24,105,419,100,364,22,35,1,329,709 }, +{ 21,5,32,14,2,1,38,72,76,54,17,3,48,221,270,0,45,46,12,181,37,89,36,50,540,290,430,10,4,741,99,23 }, +{ 120,137,202,269,141,260,318,450,922,494,77,291,82,15,5,351,128,1,515,326,64,854,700,352,342,21,753,678,349,32,523,90 }, +{ 16,24,68,35,71,18,149,118,191,167,9,0,212,124,65,246,7,67,140,189,399,101,133,60,1,108,267,114,69,92,695,154 }, +{ 28,0,9,67,1,22,109,36,55,65,194,114,39,83,49,69,2,35,103,50,158,208,86,420,168,289,505,24,7,185,5,323 }, +{ 147,325,198,427,142,178,202,798,5,376,20,318,259,43,120,450,77,234,534,904,470,465,878,725,329,14,315,0,260,858,70,61 }, +{ 28,0,65,9,109,1,55,67,35,22,24,39,289,7,175,14,114,2,158,124,420,194,68,16,336,36,49,69,168,570,154,505 }, +{ 141,180,13,115,1,23,4,269,2,202,0,317,217,51,15,5,21,352,77,318,3,752,197,10,165,365,137,341,9,515,33,64 }, +{ 15,515,700,0,753,1,2,13,23,9,51,5,4,901,33,7,8,3,12,93,180,120,197,6,82,341,10,141,22,260,457,115 }, +{ 6,74,66,1,25,75,324,380,278,26,138,85,135,500,87,42,220,841,97,350,29,19,70,226,38,21,52,606,235,889,2,14 }, +{ 13,23,51,12,4,15,453,0,457,1,403,165,115,3,11,2,64,5,401,10,515,37,202,33,40,32,99,475,197,700,308,17 }, +{ 788,180,5,83,693,319,314,4,32,21,17,11,817,3,510,498,33,12,24,104,814,120,64,117,306,804,523,450,288,160,102,43 }, +{ 15,1,515,13,2,700,23,0,4,753,3,341,5,51,33,11,10,6,77,7,115,102,180,165,141,9,197,217,901,40,12,64 }, +{ 101,18,9,167,520,16,0,118,60,212,604,364,694,24,55,252,68,917,264,35,1,121,146,363,39,100,806,5,21,166,191,28 }, +{ 13,23,0,33,51,141,77,4,64,2,115,217,9,102,7,202,21,10,180,3,15,128,5,269,6,32,11,16,165,352,22,317 }, +{ 66,135,97,74,172,6,278,7,204,324,138,174,29,85,2,87,25,140,92,192,52,38,802,69,448,500,808,620,22,1,280,232 }, +{ 2,24,69,6,97,7,1,0,14,298,423,66,67,29,150,25,189,267,124,74,607,18,36,81,172,33,83,38,52,273,71,809 }, +{ 0,9,158,39,68,49,109,16,24,333,35,344,101,22,159,254,272,30,124,65,28,18,793,154,310,252,327,105,627,419,286,55 }, +{ 15,515,700,10,753,33,77,180,4,341,1,197,13,115,365,23,901,317,5,102,11,217,165,117,141,40,2,3,253,21,134,55 }, +{ 100,166,382,478,265,264,88,39,98,1,404,274,9,593,724,921,639,438,363,682,411,31,30,812,96,447,821,905,252,0,223,435 }, +{ 19,283,436,53,297,26,813,432,27,128,42,25,390,503,122,736,73,123,605,63,389,529,630,250,690,65,381,444,6,269,108,216 }, +{ 33,202,13,128,494,0,51,141,269,1,4,2,102,180,15,137,65,95,6,450,77,40,117,59,457,36,196,817,134,86,49,515 }, +{ 18,16,60,68,101,167,191,118,35,121,9,212,55,0,126,1,24,647,199,146,520,107,628,621,363,71,21,28,346,92,806,727 }, +{ 352,230,217,531,160,93,36,669,748,854,689,258,137,871,728,699,752,251,574,202,373,351,228,120,717,260,144,219,268,82,816,1 }, +{ 15,515,700,753,13,23,0,51,8,1,4,82,165,77,110,33,10,180,5,202,11,22,120,12,9,197,115,93,403,141,40,351 }, +{ 0,9,101,217,35,88,352,100,39,175,30,68,562,752,13,317,252,115,180,197,64,1,83,141,65,213,165,230,194,36,28,265 }, +{ 32,64,5,470,288,90,21,147,0,95,356,22,20,519,835,312,819,18,247,182,11,97,13,4,387,49,43,298,316,48,107,7 }, +{ 16,14,24,1,7,2,35,0,5,50,18,69,46,12,58,75,9,67,70,163,21,54,38,48,223,502,281,37,140,60,28,10 }, +{ 18,265,9,252,39,195,354,411,1,16,132,101,121,682,167,203,5,363,146,593,35,333,21,271,60,13,100,0,156,327,7,520 }, +{ 4,13,1,115,141,23,2,180,5,0,51,3,217,202,77,15,33,269,341,318,317,21,165,515,11,10,197,365,9,137,64,352 }, +{ 7,24,16,14,71,35,18,92,140,189,108,149,68,60,69,150,2,230,97,66,0,458,67,1,65,251,38,314,388,267,36,46 }, +{ 1,22,31,36,0,2,44,5,141,105,180,170,12,64,13,98,86,55,23,21,28,164,115,127,10,125,128,4,9,239,352,197 }, +{ 66,7,97,2,192,20,52,43,135,74,560,107,104,0,750,147,414,29,580,173,324,376,226,194,77,174,204,38,356,64,16,470 }, +{ 0,101,9,68,35,124,24,65,39,16,252,100,154,166,28,364,149,694,30,88,55,346,1,419,71,439,265,289,22,21,175,158 }, +{ 39,9,100,30,127,0,737,856,31,836,827,254,98,931,166,88,93,1,44,190,131,228,120,395,625,385,863,264,219,373,110,28 }, +{ 5,17,2,3,21,45,14,155,48,32,38,1,328,181,186,46,23,51,12,61,227,29,106,54,99,133,62,832,13,37,514,543 }, +{ 131,613,30,224,228,716,274,100,827,406,219,856,39,190,31,88,1,166,9,44,829,863,931,93,0,187,625,924,127,98,137,254 }, +{ 352,217,64,141,752,269,180,864,437,372,954,115,498,177,77,349,317,318,579,291,947,197,247,0,23,717,237,304,128,457,776,678 }, +{ 141,217,180,317,352,115,15,341,1,23,13,365,515,752,2,64,0,5,498,700,372,165,51,237,753,77,244,197,137,4,21,253 }, +{ 5,2,17,14,1,45,3,38,21,29,181,58,46,48,50,133,114,171,61,155,32,6,186,281,361,12,36,54,4,13,52,514 }, +{ 6,2,119,25,790,4,45,483,655,113,1,29,76,26,32,19,887,17,128,0,292,833,59,61,106,64,77,814,14,151,84,42 }, +{ 13,23,2,0,51,4,1,115,141,3,5,180,33,217,77,9,202,11,7,15,10,6,317,64,21,197,515,165,102,128,22,269 }, +{ 13,23,51,202,21,5,1,15,141,165,120,2,115,12,32,0,515,128,318,64,125,700,4,403,197,453,180,457,3,7,10,6 }, +{ 9,39,31,30,0,127,1,44,100,131,98,187,385,276,88,442,219,908,254,116,49,166,935,28,201,36,141,827,137,299,284,21 }, +{ 22,49,28,109,9,185,105,95,1,131,159,272,36,67,86,254,39,55,35,0,505,31,83,169,208,327,286,98,168,535,312,708 }, +{ 5,1,2,21,0,12,48,22,10,14,36,3,32,17,23,54,86,38,4,51,13,37,137,50,65,281,114,45,28,99,58,202 }, +{ 141,269,352,82,217,351,180,854,372,922,752,1,15,260,317,318,515,202,64,700,120,349,954,753,77,35,67,717,898,137,365,115 }, +{ 144,203,613,418,326,406,96,669,137,679,1,228,494,822,840,317,36,83,855,160,817,859,856,816,217,831,345,93,876,77,44,251 }, +{ 15,13,515,23,700,753,51,1,33,0,202,21,2,5,180,141,120,165,217,82,12,117,4,352,269,197,115,32,3,9,134,260 }, +{ 13,23,115,1,0,51,4,77,2,33,15,141,10,5,341,180,515,3,217,202,9,365,317,64,700,102,11,165,197,22,753,7 }, +{ 531,943,373,160,728,93,206,260,261,559,964,269,717,535,332,384,365,295,110,533,141,10,180,352,244,137,120,55,959,564,36,253 }, +{ 1,31,36,170,22,55,44,10,86,64,127,0,2,98,301,164,740,338,237,143,5,125,116,13,242,141,299,180,23,169,105,12 }, +{ 13,23,141,77,51,4,64,32,33,202,115,269,102,128,21,0,177,180,318,90,40,10,7,5,137,15,217,352,9,291,59,22 }, +{ 202,120,260,318,77,15,450,269,1,82,33,23,141,13,51,515,351,128,700,5,64,326,137,21,102,110,753,494,93,523,817,165 }, +{ 523,899,102,33,730,15,23,403,719,117,153,13,515,51,475,4,700,5,453,817,196,753,494,40,202,120,1,2,450,457,17,421 }, +{ 202,403,494,450,120,817,523,475,318,453,33,402,128,77,13,51,260,576,342,102,15,23,515,4,700,5,82,753,326,210,137,615 }, +{ 2,29,112,66,7,52,70,151,58,87,135,5,74,226,307,6,14,186,1,45,549,172,644,25,113,287,46,155,334,64,294,97 }, +{ 1,77,349,291,260,120,652,102,5,39,64,269,9,33,340,342,13,98,888,698,23,296,100,318,51,202,87,137,638,128,50,850 }, +{ 1,13,15,2,0,4,23,515,5,141,180,3,700,341,115,51,753,269,77,901,197,352,217,33,21,11,365,6,165,202,7,317 }, +{ 7,14,16,2,46,5,70,58,1,38,24,35,92,163,0,75,21,18,50,54,140,12,87,220,155,69,171,23,60,9,13,307 }, +{ 2,29,66,226,135,7,87,74,52,278,6,75,222,220,294,70,97,1,145,25,172,262,324,38,69,112,331,92,5,14,140,26 }, +{ 15,515,700,753,33,77,117,4,1,102,134,40,153,11,13,196,217,21,5,51,23,115,32,3,2,202,141,137,128,291,48,177 }, +{ 15,217,82,515,351,141,317,1,13,700,260,77,110,120,115,854,23,753,180,51,21,36,137,922,5,64,365,352,291,202,93,341 }, +{ 31,190,30,373,120,110,863,88,44,127,908,856,260,318,82,98,93,187,836,717,935,39,442,131,141,254,228,219,1,968,77,116 }, +{ 23,13,308,9,165,115,51,21,401,125,49,39,197,391,159,254,217,743,28,438,773,629,558,386,341,95,32,317,876,679,109,166 }, +{ 49,5,43,165,7,0,21,104,125,22,173,422,64,13,623,102,20,18,314,95,91,141,23,31,193,51,391,900,779,558,92,232 }, +{ 1,4,13,2,15,0,23,515,77,3,341,33,5,700,115,51,202,753,141,180,11,10,102,217,6,901,40,7,197,318,317,365 }, +{ 93,843,295,120,36,160,206,261,10,137,567,110,384,141,943,268,201,332,258,55,1,180,64,116,44,144,699,203,282,31,260,373 }, +{ 81,7,192,426,43,173,172,104,879,91,5,712,715,526,6,97,568,95,448,66,33,861,560,32,49,20,0,636,232,825,2,22 }, +{ 475,403,51,453,33,102,13,23,494,202,0,196,15,77,153,18,4,117,515,450,318,22,730,128,700,421,65,753,269,402,134,817 }, +{ 141,269,260,318,202,120,352,349,82,351,1,5,854,137,64,291,15,922,180,851,32,77,515,372,21,700,7,217,13,947,33,753 }, +{ 15,515,700,753,1,13,0,2,4,23,3,5,180,115,197,12,51,165,217,10,961,9,6,141,352,21,8,7,33,77,457,120 }, +{ 39,166,9,30,0,101,274,404,252,333,190,100,158,438,310,88,68,265,656,21,1,530,329,344,49,539,625,254,13,131,48,419 }, +{ 0,1,28,9,22,12,65,83,67,36,5,2,50,55,96,109,16,13,24,23,21,238,49,18,285,160,128,39,69,114,7,323 }, +{ 15,515,700,1,0,753,23,2,13,51,5,180,115,6,3,9,197,12,457,120,7,165,901,82,4,21,8,141,31,33,719,341 }, +{ 9,39,30,0,28,166,22,49,180,1,352,35,317,158,88,141,498,131,115,345,752,128,228,217,100,83,219,930,13,251,365,36 }, +{ 5,61,45,2,80,29,311,209,6,17,58,1,151,106,454,667,243,70,52,496,287,592,255,738,64,74,483,14,27,32,112,19 }, +{ 31,125,22,44,299,456,685,242,599,116,170,28,0,1,492,393,506,144,558,10,268,301,239,23,13,36,963,367,55,206,105,95 }, +{ 187,258,926,574,839,93,228,860,406,219,871,160,137,531,224,116,120,902,669,201,36,131,44,144,843,533,318,384,442,1,434,268 }, +{ 7,107,75,16,87,9,64,177,24,18,291,77,349,141,60,232,23,0,51,269,132,14,5,21,70,32,678,112,126,121,71,947 }, +{ 15,515,700,753,1,0,2,13,5,3,23,180,4,115,901,51,6,8,961,9,7,10,12,82,197,22,141,341,33,120,365,457 }, +{ 13,23,51,1,5,202,2,12,15,21,165,141,0,115,3,4,32,515,197,10,180,318,128,120,64,700,6,7,403,269,457,137 }, +{ 1,2,0,77,64,3,141,13,33,15,23,10,6,102,5,515,180,4,117,7,700,165,11,217,269,40,753,115,128,17,197,134 }, +{ 345,531,332,269,260,317,717,752,373,351,180,352,728,82,10,365,160,533,217,143,498,251,244,93,341,901,36,1,141,898,55,864 }, +{ 16,7,33,189,92,77,388,60,140,35,102,24,14,1,230,21,150,117,733,314,18,915,71,13,108,134,5,64,69,2,98,22 }, +{ 142,202,234,178,5,786,77,49,70,0,416,450,639,878,1,48,21,929,147,259,315,455,198,120,12,481,163,113,846,329,318,22 }, +{ 81,715,192,0,173,712,681,104,636,91,74,20,750,370,7,718,95,879,22,43,825,560,422,64,207,49,172,18,397,10,426,319 }, +{ 13,23,15,51,515,0,700,753,4,1,961,2,10,115,457,11,33,453,3,5,9,40,12,197,165,77,401,475,64,102,22,569 }, +{ 64,297,5,445,95,61,250,311,80,34,17,312,45,2,86,472,58,14,180,53,22,151,869,738,247,237,29,1,128,165,21,288 }, +{ 16,24,18,71,7,35,118,92,14,154,60,68,0,149,28,302,124,150,55,175,2,9,97,1,429,20,108,273,22,65,43,126 }, +{ 195,360,156,771,132,163,626,687,591,371,883,146,121,846,70,586,379,13,293,98,407,48,761,296,354,18,31,1,55,49,21,105 }, +{ 202,13,77,23,318,33,51,0,4,141,5,21,217,32,291,102,64,128,15,10,9,494,269,137,515,403,1,31,117,700,120,317 }, +{ 7,192,97,81,172,66,426,173,43,715,712,232,861,879,104,330,568,298,74,893,885,526,387,825,92,140,91,14,636,6,5,448 }, +{ 2,1,14,6,67,7,65,69,24,36,66,124,108,83,38,29,22,86,0,18,484,5,28,46,12,10,25,302,150,16,650,74 }, +{ 33,77,13,202,102,4,0,23,128,51,141,64,318,22,403,269,137,10,15,40,494,117,32,59,11,153,1,21,177,196,515,115 }, +{ 269,141,318,77,349,291,217,202,33,15,372,304,515,22,102,177,351,700,352,120,5,137,10,317,260,753,64,851,854,403,49,21 }, +{ 6,74,66,85,138,25,87,42,135,26,226,222,280,29,75,500,220,278,792,70,19,2,1,294,204,64,32,145,853,112,52,174 }, +{ 9,0,105,39,16,18,1,101,272,31,127,98,24,518,333,252,310,28,68,737,846,371,158,916,938,49,30,7,286,35,301,455 }, +{ 2,1,14,67,24,46,83,108,69,29,38,103,114,36,6,133,18,0,28,7,65,52,236,75,50,398,5,309,135,16,278,160 }, +{ 22,1,28,105,49,95,0,2,67,55,36,239,168,159,65,35,14,170,320,164,9,7,10,5,114,12,83,64,194,109,24,301 }, +{ 7,66,172,97,92,140,232,568,298,14,192,314,16,380,135,324,2,330,74,38,357,448,126,69,35,5,107,6,387,60,204,572 }, +{ 2,29,1,14,5,6,46,133,114,50,52,26,218,108,19,13,366,236,27,45,70,17,58,23,86,51,137,65,112,38,25,12 }, +{ 6,1,74,25,2,26,29,66,42,19,75,14,388,67,108,70,52,85,103,65,38,138,357,133,114,594,324,516,603,96,309,69 }, +{ 22,9,28,1,36,49,109,105,86,95,131,31,169,39,0,141,272,159,44,55,98,180,13,30,185,115,83,128,352,137,64,208 }, +{ 203,822,326,23,77,859,403,494,576,39,473,182,33,1,691,100,18,217,13,817,411,447,363,102,93,966,96,478,291,704,310,120 }, +{ 15,515,700,753,33,77,117,4,102,134,115,153,13,1,40,217,11,196,341,2,5,3,23,317,365,0,21,291,32,51,12,569 }, +{ 15,515,700,753,13,0,23,1,8,82,51,165,197,120,180,2,9,33,4,110,5,12,10,260,351,386,141,7,457,475,93,901 }, +{ 1,23,13,15,51,0,21,2,515,5,141,180,120,165,700,202,197,4,753,12,33,9,7,82,115,93,3,352,260,6,110,48 }, +{ 15,515,700,753,0,1,2,5,3,4,8,13,180,341,10,23,7,6,9,51,77,197,961,115,165,82,120,31,22,202,457,217 }, +{ 559,661,922,564,141,533,10,317,373,110,143,269,244,260,332,261,93,642,752,295,351,876,531,843,180,206,728,384,352,1,434,120 }, +{ 15,515,1,0,700,2,13,23,753,5,51,180,3,165,12,6,197,115,4,9,7,21,719,8,457,82,141,120,33,22,901,10 }, +{ 198,234,0,325,5,77,202,416,20,147,32,43,639,315,49,61,450,455,142,21,113,230,22,318,725,342,207,13,95,904,494,10 }, +{ 447,264,363,9,411,676,682,1,156,664,821,478,166,354,812,39,100,905,382,897,98,18,759,404,31,101,724,5,265,223,88,13 }, +{ 24,14,69,16,35,18,2,7,108,189,71,67,267,149,1,46,68,83,38,140,0,236,251,9,388,60,133,103,65,28,29,50 }, +{ 16,7,35,20,14,18,109,2,43,120,107,60,1,121,326,907,553,77,13,147,23,82,68,260,0,403,5,24,202,126,265,199 }, +{ 30,131,187,276,31,44,613,442,39,9,190,228,1,839,116,935,908,219,127,88,244,224,110,137,93,201,98,141,36,567,0,856 }, +{ 98,223,1,393,812,265,100,421,593,834,697,48,51,410,791,382,21,88,31,284,9,125,96,293,230,23,213,217,656,689,541,5 }, +{ 98,51,127,219,616,258,105,293,395,421,924,512,31,308,23,201,116,44,301,272,763,276,125,13,453,170,401,295,261,944,115,567 }, +{ 253,110,951,352,811,206,332,180,141,244,282,10,854,417,642,638,559,752,143,911,260,55,93,533,499,498,661,120,351,959,564,341 }, +{ 49,9,159,254,272,158,0,131,28,39,627,105,327,286,22,518,688,578,68,347,374,101,224,424,95,35,219,24,16,364,65,344 }, +{ 105,22,131,272,98,286,327,109,374,239,28,95,320,219,9,224,55,127,187,36,578,169,64,185,538,1,159,10,371,634,49,616 }, +{ 691,478,340,1,658,914,724,363,744,698,156,772,411,296,682,447,9,284,335,98,264,303,909,21,354,410,225,13,664,686,88,919 }, +{ 16,24,35,18,71,7,140,108,189,267,92,60,14,230,68,69,9,1,149,46,246,191,388,167,2,0,118,236,133,21,674,5 }, +{ 9,0,127,31,98,371,395,39,737,49,1,44,385,272,512,28,293,242,836,761,254,299,101,16,187,22,116,158,159,131,18,21 }, +{ 16,18,68,0,60,35,9,101,252,28,118,24,419,55,7,109,604,71,39,121,22,364,14,158,191,167,925,126,329,21,92,49 }, +{ 116,268,203,93,206,692,551,31,417,940,499,8,473,44,202,523,959,0,120,137,559,22,450,403,576,10,728,299,13,326,51,1 }, +{ 225,459,744,1,919,914,691,330,622,21,141,223,5,284,934,335,88,538,340,82,385,839,363,120,478,98,48,30,64,32,686,166 }, +{ 5,1,2,0,14,36,21,281,12,48,50,67,22,28,54,83,24,218,38,10,181,9,32,18,65,58,45,114,430,17,99,37 }, +{ 137,450,202,704,120,260,326,318,968,269,851,403,291,77,23,141,182,310,494,373,351,457,82,890,349,110,60,128,817,678,105,96 }, +{ 15,515,700,753,4,33,13,23,77,5,40,11,102,93,1,21,110,51,82,117,141,2,10,8,32,64,120,31,202,3,217,115 }, +{ 15,13,1,23,515,0,51,2,700,5,753,21,180,141,165,3,12,115,197,4,7,6,457,9,352,202,33,8,719,120,77,341 }, +{ 219,127,258,98,276,201,131,395,944,293,116,284,567,31,242,105,137,935,295,44,403,860,51,224,576,456,9,371,578,475,202,512 }, +{ 16,7,18,35,60,0,14,20,118,28,68,22,2,24,1,92,158,107,5,49,154,126,109,12,43,10,55,6,677,71,21,168 }, +{ 93,728,531,160,559,373,574,120,295,860,533,269,717,260,926,902,258,318,36,201,261,434,851,137,617,141,187,352,843,384,332,251 }, +{ 28,0,1,9,22,109,83,39,49,12,36,67,55,5,96,2,128,30,158,69,21,23,160,208,35,13,65,323,50,141,194,238 }, +{ 23,13,51,1,5,15,141,21,0,2,165,515,202,700,12,197,180,120,32,115,4,753,64,9,7,269,6,3,125,386,48,453 }, +{ 5,21,13,49,14,20,7,23,43,32,1,0,652,48,713,22,38,2,16,132,955,107,12,279,24,888,197,640,70,303,18,638 }, +{ 9,0,28,39,1,30,35,101,22,67,83,141,49,175,36,68,55,88,13,251,10,69,23,158,180,115,64,100,217,65,345,166 }, +{ 260,120,82,269,5,450,351,1,202,141,854,13,77,922,32,33,137,4,23,125,291,21,15,515,165,349,177,700,318,326,180,753 }, +{ 121,16,18,35,363,101,60,20,107,14,68,259,621,55,604,43,7,252,9,364,126,0,167,191,5,407,132,28,199,419,146,10 }, +{ 13,23,51,1,15,0,2,141,5,515,12,21,700,115,165,180,4,753,3,197,202,32,9,120,7,8,6,11,37,10,457,269 }, +{ 201,144,206,443,418,203,435,96,335,459,187,1,88,332,330,321,269,934,30,372,822,521,268,326,44,523,382,141,410,264,494,473 }, +{ 31,44,276,201,116,131,284,662,567,144,9,489,98,295,268,434,0,30,137,39,93,1,187,22,219,918,110,299,141,36,224,384 }, +{ 520,478,664,1,264,604,9,167,777,759,411,0,806,724,48,21,101,68,647,936,363,223,118,682,410,18,100,16,252,98,265,13 }, +{ 23,70,21,87,60,75,120,182,163,379,92,18,7,937,71,121,446,132,24,98,931,126,107,77,795,195,115,44,411,146,51,850 }, +{ 39,9,0,737,127,31,846,98,1,827,105,310,371,30,254,100,44,18,395,242,272,101,385,916,836,16,265,131,938,93,166,557 }, +{ 279,20,43,126,107,7,92,16,356,362,60,595,246,359,598,35,0,683,939,653,121,97,125,441,399,392,150,199,48,230,14,649 }, +{ 206,417,93,959,499,728,8,559,120,473,137,141,10,564,31,260,44,450,203,341,253,244,373,116,143,638,268,180,352,110,318,940 }, +{ 259,465,147,132,590,687,534,199,581,146,941,427,107,640,279,178,121,5,195,150,522,955,198,35,786,929,798,142,1,21,325,626 }, +{ 523,15,120,450,202,515,403,51,817,700,13,753,23,457,33,899,128,64,730,102,494,342,115,719,453,196,49,99,318,421,308,5 }, +{ 141,559,10,244,365,564,661,180,253,143,752,110,55,317,533,341,901,93,373,206,535,160,82,922,260,36,531,964,352,332,261,197 }, +{ 219,258,98,127,276,964,943,137,843,535,201,935,131,860,261,295,284,567,206,44,116,31,253,492,203,332,160,615,36,93,55,692 }, +{ 0,319,422,207,945,693,577,887,32,804,95,344,104,904,61,20,5,43,7,725,113,510,306,102,49,263,153,426,33,83,22,9 }, +{ 1,22,0,5,12,2,36,21,28,86,49,105,9,10,23,13,141,95,31,55,128,37,51,4,83,202,3,64,96,7,32,44 }, +{ 15,515,13,700,1,0,753,2,23,3,4,5,51,10,115,197,6,33,12,9,165,7,8,77,11,961,180,269,141,22,120,457 }, +{ 15,1,23,120,77,13,515,51,141,202,700,180,110,137,260,753,326,5,128,102,0,21,2,165,269,33,197,450,318,217,93,115 }, +{ 15,515,1,700,0,753,13,2,23,180,51,5,120,4,9,115,197,12,7,165,21,33,6,82,3,8,523,901,31,141,457,260 }, +{ 16,18,24,7,92,35,60,75,9,13,71,14,0,108,50,21,126,121,1,140,23,5,132,146,2,12,128,10,64,141,70,87 }, +{ 180,341,901,15,515,1,365,700,0,2,197,753,115,4,10,13,752,5,3,8,165,317,141,23,143,873,44,31,569,55,93,6 }, +{ 9,0,175,35,101,28,39,67,68,1,65,83,30,69,364,336,22,114,55,124,194,158,100,289,252,166,64,345,103,36,50,88 }, +{ 64,165,180,197,115,247,217,237,21,13,32,316,22,141,352,72,288,304,95,225,76,391,386,16,468,90,49,35,365,640,372,23 }, +{ 15,515,700,13,753,1,0,2,23,4,5,3,115,51,141,197,12,10,180,961,7,9,21,33,217,6,8,165,457,11,77,341 }, +{ 132,121,199,146,60,279,493,640,407,598,126,195,534,581,955,590,107,5,150,35,522,49,259,16,18,360,156,0,147,362,21,167 }, +{ 0,1,28,9,22,5,36,12,65,24,67,96,2,83,18,50,114,55,21,16,7,10,23,14,13,160,137,51,48,218,103,69 }, +{ 93,120,957,77,30,968,459,110,137,160,613,102,202,352,373,141,31,372,217,330,190,318,269,260,203,44,28,473,228,177,863,704 }, +{ 15,515,700,0,753,1,13,23,2,51,5,9,120,82,4,7,901,197,10,8,260,180,341,12,33,6,3,523,165,102,115,141 }, +{ 206,417,8,141,499,44,244,93,31,10,137,253,559,116,728,144,120,564,269,638,203,352,143,260,341,752,268,717,951,180,160,110 }, +{ 530,254,228,1,96,21,406,39,827,31,669,840,613,829,137,679,166,98,23,51,960,438,131,93,48,224,219,317,310,36,876,190 }, +{ 15,515,700,753,13,457,0,197,719,1,165,82,23,8,120,730,2,10,12,180,134,5,9,141,260,4,351,51,115,3,341,899 }, +{ 0,16,68,9,24,28,18,35,252,109,39,419,124,158,154,55,101,71,22,118,60,7,49,65,333,14,1,10,329,364,677,346 }, +{ 1,15,13,23,515,51,120,0,700,180,2,165,5,753,141,197,21,33,202,102,260,4,9,12,7,326,137,450,115,6,82,110 }, +{ 535,253,352,564,110,365,82,180,341,10,854,533,55,898,244,901,873,141,752,143,642,559,498,317,36,951,115,964,638,282,661,197 }, +{ 31,44,125,338,116,64,242,36,1,10,55,22,456,237,180,13,299,164,506,86,23,165,558,143,0,762,492,479,844,546,93,8 }, +{ 13,23,4,1,202,2,0,51,115,77,141,180,5,15,217,3,33,11,515,317,9,10,102,21,700,341,365,318,269,64,32,128 }, +{ 9,39,0,166,68,101,28,364,30,158,562,35,175,65,333,154,49,404,706,124,21,252,274,168,190,289,100,570,16,1,310,346 }, +{ 15,515,700,753,341,13,0,23,1,33,141,4,260,82,77,51,351,180,9,5,115,137,10,217,11,120,102,40,349,269,202,854 } diff --git a/libkram/bc7enc/utils.cpp b/libkram/bc7enc/utils.cpp new file mode 100644 index 0000000..2b3b04d --- /dev/null +++ b/libkram/bc7enc/utils.cpp @@ -0,0 +1,908 @@ +// File: utils.cpp +#include "utils.h" + +// Don't need the impl yet +#if 0 + +#include "lodepng.h" +#include "miniz.h" + + +namespace utils +{ + +#define FLOOD_PUSH(y, xl, xr, dy) if (((y + (dy)) >= 0) && ((y + (dy)) < (int)m_height)) { stack.push_back(fill_segment(y, xl, xr, dy)); } + +// See http://www.realtimerendering.com/resources/GraphicsGems/gems/SeedFill.c +uint32_t image_u8::flood_fill(int x, int y, const color_quad_u8& c, const color_quad_u8& b, std::vector* pSet_pixels) +{ + uint32_t total_set = 0; + + if (!flood_fill_is_inside(x, y, b)) + return 0; + + std::vector stack; + stack.reserve(64); + + FLOOD_PUSH(y, x, x, 1); + FLOOD_PUSH(y + 1, x, x, -1); + + while (stack.size()) + { + fill_segment s = stack.back(); + stack.pop_back(); + + int x1 = s.m_xl, x2 = s.m_xr, dy = s.m_dy; + y = s.m_y + s.m_dy; + + for (x = x1; (x >= 0) && flood_fill_is_inside(x, y, b); x--) + { + (*this)(x, y) = c; + total_set++; + if (pSet_pixels) + pSet_pixels->push_back(pixel_coord(x, y)); + } + + int l; + + if (x >= x1) + goto skip; + + l = x + 1; + if (l < x1) + FLOOD_PUSH(y, l, x1 - 1, -dy); + + x = x1 + 1; + + do + { + for (; x <= ((int)m_width - 1) && flood_fill_is_inside(x, y, b); x++) + { + (*this)(x, y) = c; + total_set++; + if (pSet_pixels) + pSet_pixels->push_back(pixel_coord(x, y)); + } + FLOOD_PUSH(y, l, x - 1, dy); + + if (x > (x2 + 1)) + FLOOD_PUSH(y, x2 + 1, x - 1, -dy); + + skip: + for (x++; x <= x2 && !flood_fill_is_inside(x, y, b); x++) + ; + + l = x; + } while (x <= x2); + } + + return total_set; +} + +void image_u8::draw_line(int xs, int ys, int xe, int ye, const color_quad_u8& color) +{ + if (xs > xe) + { + std::swap(xs, xe); + std::swap(ys, ye); + } + + int dx = xe - xs, dy = ye - ys; + if (!dx) + { + if (ys > ye) + std::swap(ys, ye); + for (int i = ys; i <= ye; i++) + set_pixel_clipped(xs, i, color); + } + else if (!dy) + { + for (int i = xs; i < xe; i++) + set_pixel_clipped(i, ys, color); + } + else if (dy > 0) + { + if (dy <= dx) + { + int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); + rasterize_line(xs, ys, xe, ye, 0, 1, e, e_inc, e_no_inc, color); + } + else + { + int e = 2 * dx - dy, e_no_inc = 2 * dx, e_inc = 2 * (dx - dy); + rasterize_line(xs, ys, xe, ye, 1, 1, e, e_inc, e_no_inc, color); + } + } + else + { + dy = -dy; + if (dy <= dx) + { + int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); + rasterize_line(xs, ys, xe, ye, 0, -1, e, e_inc, e_no_inc, color); + } + else + { + int e = 2 * dx - dy, e_no_inc = (2 * dx), e_inc = 2 * (dx - dy); + rasterize_line(xe, ye, xs, ys, 1, -1, e, e_inc, e_no_inc, color); + } + } +} + +void image_u8::rasterize_line(int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_quad_u8& color) +{ + int start, end, var; + + if (pred) + { + start = ys; + end = ye; + var = xs; + for (int i = start; i <= end; i++) + { + set_pixel_clipped(var, i, color); + if (e < 0) + e += e_no_inc; + else + { + var += inc_dec; + e += e_inc; + } + } + } + else + { + start = xs; + end = xe; + var = ys; + for (int i = start; i <= end; i++) + { + set_pixel_clipped(i, var, color); + if (e < 0) + e += e_no_inc; + else + { + var += inc_dec; + e += e_inc; + } + } + } +} + +bool load_png(const char* pFilename, image_u8& img) +{ + img.clear(); + + std::vector pixels; + unsigned int w = 0, h = 0; + unsigned int e = lodepng::decode(pixels, w, h, pFilename); + if (e != 0) + { + fprintf(stderr, "Failed loading PNG file %s\n", pFilename); + return false; + } + + img.init(w, h); + memcpy(&img.get_pixels()[0], &pixels[0], w * h * sizeof(uint32_t)); + + return true; +} + +bool save_png(const char* pFilename, const image_u8& img, bool save_alpha) +{ + const uint32_t w = img.width(); + const uint32_t h = img.height(); + + std::vector pixels; + if (save_alpha) + { + pixels.resize(w * h * sizeof(color_quad_u8)); + memcpy(&pixels[0], &img.get_pixels()[0], w * h * sizeof(color_quad_u8)); + } + else + { + pixels.resize(w * h * 3); + unsigned char* pDst = &pixels[0]; + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++, pDst += 3) + pDst[0] = img(x, y)[0], pDst[1] = img(x, y)[1], pDst[2] = img(x, y)[2]; + } + + return lodepng::encode(pFilename, pixels, w, h, save_alpha ? LCT_RGBA : LCT_RGB) == 0; +} + +static float gauss(int x, int y, float sigma_sqr) +{ + float pow = expf(-((x * x + y * y) / (2.0f * sigma_sqr))); + float g = (1.0f / (sqrtf((float)(2.0f * M_PI * sigma_sqr)))) * pow; + return g; +} + +// size_x/y should be odd +void compute_gaussian_kernel(float* pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags) +{ + assert(size_x & size_y & 1); + + if (!(size_x | size_y)) + return; + + int mid_x = size_x / 2; + int mid_y = size_y / 2; + + double sum = 0; + for (int x = 0; x < size_x; x++) + { + for (int y = 0; y < size_y; y++) + { + float g; + if ((x > mid_x) && (y < mid_y)) + g = pDst[(size_x - x - 1) + y * size_x]; + else if ((x < mid_x) && (y > mid_y)) + g = pDst[x + (size_y - y - 1) * size_x]; + else if ((x > mid_x) && (y > mid_y)) + g = pDst[(size_x - x - 1) + (size_y - y - 1) * size_x]; + else + g = gauss(x - mid_x, y - mid_y, sigma_sqr); + + pDst[x + y * size_x] = g; + sum += g; + } + } + + if (flags & cComputeGaussianFlagNormalizeCenterToOne) + { + sum = pDst[mid_x + mid_y * size_x]; + } + + if (flags & (cComputeGaussianFlagNormalizeCenterToOne | cComputeGaussianFlagNormalize)) + { + double one_over_sum = 1.0f / sum; + for (int i = 0; i < size_x * size_y; i++) + pDst[i] = static_cast(pDst[i] * one_over_sum); + + if (flags & cComputeGaussianFlagNormalizeCenterToOne) + pDst[mid_x + mid_y * size_x] = 1.0f; + } + + if (flags & cComputeGaussianFlagPrint) + { + printf("{\n"); + for (int y = 0; y < size_y; y++) + { + printf(" "); + for (int x = 0; x < size_x; x++) + { + printf("%f, ", pDst[x + y * size_x]); + } + printf("\n"); + } + printf("}"); + } +} + +void gaussian_filter(imagef& dst, const imagef& orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping, uint32_t width_divisor, uint32_t height_divisor) +{ + assert(odd_filter_width && (odd_filter_width & 1)); + odd_filter_width |= 1; + + std::vector kernel(odd_filter_width * odd_filter_width); + compute_gaussian_kernel(&kernel[0], odd_filter_width, odd_filter_width, sigma_sqr, cComputeGaussianFlagNormalize); + + const int dst_width = orig_img.get_width() / width_divisor; + const int dst_height = orig_img.get_height() / height_divisor; + + const int H = odd_filter_width / 2; + const int L = -H; + + dst.crop(dst_width, dst_height); + +#pragma omp parallel for + for (int oy = 0; oy < dst_height; oy++) + { + for (int ox = 0; ox < dst_width; ox++) + { + vec4F c(0.0f); + + for (int yd = L; yd <= H; yd++) + { + int y = oy * height_divisor + (height_divisor >> 1) + yd; + + for (int xd = L; xd <= H; xd++) + { + int x = ox * width_divisor + (width_divisor >> 1) + xd; + + const vec4F& p = orig_img.get_clamped_or_wrapped(x, y, wrapping, wrapping); + + float w = kernel[(xd + H) + (yd + H) * odd_filter_width]; + c[0] += p[0] * w; + c[1] += p[1] * w; + c[2] += p[2] * w; + c[3] += p[3] * w; + } + } + + dst(ox, oy).set(c[0], c[1], c[2], c[3]); + } + } +} + +static void pow_image(const imagef& src, imagef& dst, const vec4F& power) +{ + dst.resize(src); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& p = src(x, y); + + if ((power[0] == 2.0f) && (power[1] == 2.0f) && (power[2] == 2.0f) && (power[3] == 2.0f)) + dst(x, y).set(p[0] * p[0], p[1] * p[1], p[2] * p[2], p[3] * p[3]); + else + dst(x, y).set(powf(p[0], power[0]), powf(p[1], power[1]), powf(p[2], power[2]), powf(p[3], power[3])); + } + } +} + +#if 0 +static void mul_image(const imagef& src, imagef& dst, const vec4F& mul) +{ + dst.resize(src); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& p = src(x, y); + dst(x, y).set(p[0] * mul[0], p[1] * mul[1], p[2] * mul[2], p[3] * mul[3]); + } + } +} +#endif + +static void scale_image(const imagef& src, imagef& dst, const vec4F& scale, const vec4F& shift) +{ + dst.resize(src); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& p = src(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + d[c] = scale[c] * p[c] + shift[c]; + + dst(x, y).set(d[0], d[1], d[2], d[3]); + } + } +} + +static void add_weighted_image(const imagef& src1, const vec4F& alpha, const imagef& src2, const vec4F& beta, const vec4F& gamma, imagef& dst) +{ + dst.resize(src1); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& s1 = src1(x, y); + const vec4F& s2 = src2(x, y); + + dst(x, y).set( + s1[0] * alpha[0] + s2[0] * beta[0] + gamma[0], + s1[1] * alpha[1] + s2[1] * beta[1] + gamma[1], + s1[2] * alpha[2] + s2[2] * beta[2] + gamma[2], + s1[3] * alpha[3] + s2[3] * beta[3] + gamma[3]); + } + } +} + +static void add_image(const imagef& src1, const imagef& src2, imagef& dst) +{ + dst.resize(src1); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& s1 = src1(x, y); + const vec4F& s2 = src2(x, y); + + dst(x, y).set(s1[0] + s2[0], s1[1] + s2[1], s1[2] + s2[2], s1[3] + s2[3]); + } + } +} + +static void adds_image(const imagef& src, const vec4F& value, imagef& dst) +{ + dst.resize(src); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& p = src(x, y); + + dst(x, y).set(p[0] + value[0], p[1] + value[1], p[2] + value[2], p[3] + value[3]); + } + } +} + +static void mul_image(const imagef& src1, const imagef& src2, imagef& dst, const vec4F& scale) +{ + dst.resize(src1); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& s1 = src1(x, y); + const vec4F& s2 = src2(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + { + float v1 = s1[c]; + float v2 = s2[c]; + d[c] = v1 * v2 * scale[c]; + } + + dst(x, y) = d; + } + } +} + +static void div_image(const imagef& src1, const imagef& src2, imagef& dst, const vec4F& scale) +{ + dst.resize(src1); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& s1 = src1(x, y); + const vec4F& s2 = src2(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + { + float v = s2[c]; + if (v == 0.0f) + d[c] = 0.0f; + else + d[c] = (s1[c] * scale[c]) / v; + } + + dst(x, y) = d; + } + } +} + +static vec4F avg_image(const imagef& src) +{ + vec4F avg(0.0f); + + for (uint32_t y = 0; y < src.get_height(); y++) + { + for (uint32_t x = 0; x < src.get_width(); x++) + { + const vec4F& s = src(x, y); + + avg += vec4F(s[0], s[1], s[2], s[3]); + } + } + + avg /= static_cast(src.get_total_pixels()); + + return avg; +} + +// Reference: https://ece.uwaterloo.ca/~z70wang/research/ssim/index.html +vec4F compute_ssim(const imagef& a, const imagef& b) +{ + imagef axb, a_sq, b_sq, mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, s1_sq, s2_sq, s12, smap, t1, t2, t3; + + const float C1 = 6.50250f, C2 = 58.52250f; + + pow_image(a, a_sq, vec4F(2)); + pow_image(b, b_sq, vec4F(2)); + mul_image(a, b, axb, vec4F(1.0f)); + + gaussian_filter(mu1, a, 11, 1.5f * 1.5f); + gaussian_filter(mu2, b, 11, 1.5f * 1.5f); + + pow_image(mu1, mu1_sq, vec4F(2)); + pow_image(mu2, mu2_sq, vec4F(2)); + mul_image(mu1, mu2, mu1_mu2, vec4F(1.0f)); + + gaussian_filter(s1_sq, a_sq, 11, 1.5f * 1.5f); + add_weighted_image(s1_sq, vec4F(1), mu1_sq, vec4F(-1), vec4F(0), s1_sq); + + gaussian_filter(s2_sq, b_sq, 11, 1.5f * 1.5f); + add_weighted_image(s2_sq, vec4F(1), mu2_sq, vec4F(-1), vec4F(0), s2_sq); + + gaussian_filter(s12, axb, 11, 1.5f * 1.5f); + add_weighted_image(s12, vec4F(1), mu1_mu2, vec4F(-1), vec4F(0), s12); + + scale_image(mu1_mu2, t1, vec4F(2), vec4F(0)); + adds_image(t1, vec4F(C1), t1); + + scale_image(s12, t2, vec4F(2), vec4F(0)); + adds_image(t2, vec4F(C2), t2); + + mul_image(t1, t2, t3, vec4F(1)); + + add_image(mu1_sq, mu2_sq, t1); + adds_image(t1, vec4F(C1), t1); + + add_image(s1_sq, s2_sq, t2); + adds_image(t2, vec4F(C2), t2); + + mul_image(t1, t2, t1, vec4F(1)); + + div_image(t3, t1, smap, vec4F(1)); + + return avg_image(smap); +} + +vec4F compute_ssim(const image_u8& a, const image_u8& b, bool luma) +{ + image_u8 ta(a), tb(b); + + if ((ta.width() != tb.width()) || (ta.height() != tb.height())) + { + fprintf(stderr, "compute_ssim: Cropping input images to equal dimensions\n"); + + const uint32_t w = std::min(a.width(), b.width()); + const uint32_t h = std::min(a.height(), b.height()); + ta.crop(w, h); + tb.crop(w, h); + } + + if (!ta.width() || !ta.height()) + { + assert(0); + return vec4F(0); + } + + if (luma) + { + for (uint32_t y = 0; y < ta.height(); y++) + { + for (uint32_t x = 0; x < ta.width(); x++) + { + ta(x, y).set((uint8_t)ta(x, y).get_luma(), ta(x, y).a); + tb(x, y).set((uint8_t)tb(x, y).get_luma(), tb(x, y).a); + } + } + } + + imagef fta, ftb; + + fta.set(ta); + ftb.set(tb); + + return compute_ssim(fta, ftb); +} + +bool save_dds(const char* pFilename, uint32_t width, uint32_t height, const void* pBlocks, uint32_t pixel_format_bpp, DXGI_FORMAT dxgi_format, bool srgb, bool force_dx10_header) +{ + (void)srgb; + + FILE* pFile = NULL; +#ifdef _MSC_VER + fopen_s(&pFile, pFilename, "wb"); +#else + pFile = fopen(pFilename, "wb"); +#endif + if (!pFile) + { + fprintf(stderr, "Failed creating file %s!\n", pFilename); + return false; + } + + fwrite("DDS ", 4, 1, pFile); + + DDSURFACEDESC2 desc; + memset(&desc, 0, sizeof(desc)); + + desc.dwSize = sizeof(desc); + desc.dwFlags = DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT | DDSD_CAPS; + + desc.dwWidth = width; + desc.dwHeight = height; + + desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; + desc.ddpfPixelFormat.dwSize = sizeof(desc.ddpfPixelFormat); + + desc.ddpfPixelFormat.dwFlags |= DDPF_FOURCC; + + desc.lPitch = (((desc.dwWidth + 3) & ~3) * ((desc.dwHeight + 3) & ~3) * pixel_format_bpp) >> 3; + desc.dwFlags |= DDSD_LINEARSIZE; + + desc.ddpfPixelFormat.dwRGBBitCount = 0; + + if ((!force_dx10_header) && + ((dxgi_format == DXGI_FORMAT_BC1_UNORM) || + (dxgi_format == DXGI_FORMAT_BC3_UNORM) || + (dxgi_format == DXGI_FORMAT_BC4_UNORM) || + (dxgi_format == DXGI_FORMAT_BC5_UNORM))) + { + if (dxgi_format == DXGI_FORMAT_BC1_UNORM) + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', 'T', '1'); + else if (dxgi_format == DXGI_FORMAT_BC3_UNORM) + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', 'T', '5'); + else if (dxgi_format == DXGI_FORMAT_BC4_UNORM) + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('A', 'T', 'I', '1'); + else if (dxgi_format == DXGI_FORMAT_BC5_UNORM) + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('A', 'T', 'I', '2'); + + fwrite(&desc, sizeof(desc), 1, pFile); + } + else + { + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', '1', '0'); + + fwrite(&desc, sizeof(desc), 1, pFile); + + DDS_HEADER_DXT10 hdr10; + memset(&hdr10, 0, sizeof(hdr10)); + + // Not all tools support DXGI_FORMAT_BC7_UNORM_SRGB (like NVTT), but ddsview in DirectXTex pays attention to it. So not sure what to do here. + // For best compatibility just write DXGI_FORMAT_BC7_UNORM. + //hdr10.dxgiFormat = srgb ? DXGI_FORMAT_BC7_UNORM_SRGB : DXGI_FORMAT_BC7_UNORM; + hdr10.dxgiFormat = dxgi_format; // DXGI_FORMAT_BC7_UNORM; + hdr10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D; + hdr10.arraySize = 1; + + fwrite(&hdr10, sizeof(hdr10), 1, pFile); + } + + fwrite(pBlocks, desc.lPitch, 1, pFile); + + if (fclose(pFile) == EOF) + { + fprintf(stderr, "Failed writing to DDS file %s!\n", pFilename); + return false; + } + + return true; +} + +void strip_extension(std::string& s) +{ + for (int32_t i = (int32_t)s.size() - 1; i >= 0; i--) + { + if (s[i] == '.') + { + s.resize(i); + break; + } + } +} + +void strip_path(std::string& s) +{ + for (int32_t i = (int32_t)s.size() - 1; i >= 0; i--) + { + if ((s[i] == '/') || (s[i] == ':') || (s[i] == '\\')) + { + s.erase(0, i + 1); + break; + } + } +} + +uint32_t hash_hsieh(const uint8_t* pBuf, size_t len) +{ + if (!pBuf || !len) + return 0; + + uint32_t h = static_cast(len); + + const uint32_t bytes_left = len & 3; + len >>= 2; + + while (len--) + { + const uint16_t* pWords = reinterpret_cast(pBuf); + + h += pWords[0]; + + const uint32_t t = (pWords[1] << 11) ^ h; + h = (h << 16) ^ t; + + pBuf += sizeof(uint32_t); + + h += h >> 11; + } + + switch (bytes_left) + { + case 1: + h += *reinterpret_cast(pBuf); + h ^= h << 10; + h += h >> 1; + break; + case 2: + h += *reinterpret_cast(pBuf); + h ^= h << 11; + h += h >> 17; + break; + case 3: + h += *reinterpret_cast(pBuf); + h ^= h << 16; + h ^= (static_cast(pBuf[sizeof(uint16_t)])) << 18; + h += h >> 11; + break; + default: + break; + } + + h ^= h << 3; + h += h >> 5; + h ^= h << 4; + h += h >> 17; + h ^= h << 25; + h += h >> 6; + + return h; +} + +float compute_block_max_std_dev(const color_quad_u8* pPixels, uint32_t block_width, uint32_t block_height, uint32_t num_comps) +{ + tracked_stat comp_stats[4]; + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const color_quad_u8* pPixel = pPixels + x + y * block_width; + + for (uint32_t c = 0; c < num_comps; c++) + comp_stats[c].update(pPixel->m_c[c]); + } + } + + float max_std_dev = 0.0f; + for (uint32_t i = 0; i < num_comps; i++) + max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev()); + return max_std_dev; +} + +const uint32_t ASTC_SIG = 0x5CA1AB13; + +#pragma pack(push, 1) +struct astc_header +{ + uint32_t m_sig; + uint8_t m_block_x; + uint8_t m_block_y; + uint8_t m_block_z; + uint8_t m_width[3]; + uint8_t m_height[3]; + uint8_t m_depth[3]; +}; +#pragma pack(pop) + +bool save_astc_file(const char* pFilename, block16_vec& blocks, uint32_t width, uint32_t height, uint32_t block_width, uint32_t block_height) +{ + FILE* pFile = nullptr; + +#ifdef _MSC_VER + fopen_s(&pFile, pFilename, "wb"); +#else + pFile = fopen(pFilename, "wb"); +#endif + + if (!pFile) + return false; + + astc_header hdr; + memset(&hdr, 0, sizeof(hdr)); + + hdr.m_sig = ASTC_SIG; + hdr.m_block_x = (uint8_t)block_width; + hdr.m_block_y = (uint8_t)block_height; + hdr.m_block_z = 1; + hdr.m_width[0] = (uint8_t)(width); + hdr.m_width[1] = (uint8_t)(width >> 8); + hdr.m_width[2] = (uint8_t)(width >> 16); + hdr.m_height[0] = (uint8_t)(height); + hdr.m_height[1] = (uint8_t)(height >> 8); + hdr.m_height[2] = (uint8_t)(height >> 16); + hdr.m_depth[0] = 1; + fwrite(&hdr, sizeof(hdr), 1, pFile); + + fwrite(blocks.data(), 16, blocks.size(), pFile); + if (fclose(pFile) == EOF) + return false; + + return true; +} + +bool load_astc_file(const char* pFilename, block16_vec& blocks, uint32_t& width, uint32_t& height, uint32_t& block_width, uint32_t& block_height) +{ + FILE* pFile = nullptr; + +#ifdef _MSC_VER + fopen_s(&pFile, pFilename, "rb"); +#else + pFile = fopen(pFilename, "rb"); +#endif + + if (!pFile) + return false; + + astc_header hdr; + if (fread(&hdr, sizeof(hdr), 1, pFile) != 1) + { + fclose(pFile); + return false; + } + + if (hdr.m_sig != ASTC_SIG) + { + fclose(pFile); + return false; + } + + width = hdr.m_width[0] + (hdr.m_width[1] << 8) + (hdr.m_width[2] << 16); + height = hdr.m_height[0] + (hdr.m_height[1] << 8) + (hdr.m_height[2] << 16); + uint32_t depth = hdr.m_depth[0] + (hdr.m_depth[1] << 8) + (hdr.m_depth[2] << 16); + + if ((width < 1) || (width > 32768) || (height < 1) || (height > 32768)) + return false; + if ((hdr.m_block_z != 1) || (depth != 1)) + return false; + + block_width = hdr.m_block_x; + block_height = hdr.m_block_y; + + if ((block_width < 4) || (block_width > 12) || (block_height < 4) || (block_height > 12)) + return false; + + uint32_t blocks_x = (width + block_width - 1) / block_width; + uint32_t blocks_y = (height + block_height - 1) / block_height; + uint32_t total_blocks = blocks_x * blocks_y; + + blocks.resize(total_blocks); + + if (fread(blocks.data(), 16, total_blocks, pFile) != total_blocks) + { + fclose(pFile); + return false; + } + + fclose(pFile); + return true; +} + +uint32_t get_deflate_size(const void* pData, size_t data_size) +{ + size_t comp_size = 0; + void* pPre_RDO_Comp_data = tdefl_compress_mem_to_heap(pData, data_size, &comp_size, TDEFL_MAX_PROBES_MASK);// TDEFL_DEFAULT_MAX_PROBES); + mz_free(pPre_RDO_Comp_data); + + if (comp_size > UINT32_MAX) + return UINT32_MAX; + + return (uint32_t)comp_size; +} + +} // namespace utils + +#endif diff --git a/libkram/bc7enc/utils.h b/libkram/bc7enc/utils.h new file mode 100644 index 0000000..841710c --- /dev/null +++ b/libkram/bc7enc/utils.h @@ -0,0 +1,2617 @@ +// File: utils.h +#pragma once +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable:4127) // conditional expression is constant +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include "dds_defs.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#define ASSUME(c) static_assert(c, #c) +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) + +#define VECTOR_TEXT_LINE_SIZE (30.0f) +#define VECTOR_TEXT_CORE_LINE_SIZE (21.0f) + +#define UNUSED(x) (void)x + +namespace utils +{ +extern const uint32_t g_pretty_colors[]; +extern const uint32_t g_num_pretty_colors; + +const float cDegToRad = 0.01745329252f; +const float cRadToDeg = 57.29577951f; + +enum eClear { cClear }; +enum eZero { cZero }; +enum eInitExpand { cInitExpand }; + +inline int iabs(int i) { if (i < 0) i = -i; return i; } +inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } +template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } +template inline F lerp(F a, F b, F s) { return a + (b - a) * s; } +template inline F square(F a) { return a * a; } + +template +inline T prev_wrap(T i, T n) +{ + T temp = i - 1; + if (temp < 0) + temp = n - 1; + return temp; +} + +template +inline T next_wrap(T i, T n) +{ + T temp = i + 1; + if (temp >= n) + temp = 0; + return temp; +} + +inline int posmod(int x, int y) +{ + if (x >= 0) + return (x < y) ? x : (x % y); + int m = (-x) % y; + return (m != 0) ? (y - m) : m; +} + +inline float deg_to_rad(float f) +{ + return f * cDegToRad; +}; + +inline float rad_to_deg(float f) +{ + return f * cRadToDeg; +}; + +template +struct rel_ops +{ + friend bool operator!=(const T& x, const T& y) + { + return (!(x == y)); + } + friend bool operator>(const T& x, const T& y) + { + return (y < x); + } + friend bool operator<=(const T& x, const T& y) + { + return (!(y < x)); + } + friend bool operator>=(const T& x, const T& y) + { + return (!(x < y)); + } +}; + +template +class vec : public rel_ops > +{ +public: + typedef T scalar_type; + enum + { + num_elements = N + }; + + inline vec() + { + } + + inline vec(eClear) + { + clear(); + } + + inline vec(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = other.m_s[i]; + } + + template + inline vec(const vec& other) + { + set(other); + } + + template + inline vec(const vec& other, T w) + { + *this = other; + m_s[N - 1] = w; + } + + explicit inline vec(T val) + { + set(val); + } + + inline vec(T val0, T val1) + { + set(val0, val1); + } + + inline vec(T val0, T val1, T val2) + { + set(val0, val1, val2); + } + + inline vec(T val0, T val1, T val2, T val3) + { + set(val0, val1, val2, val3); + } + + inline vec(T val0, T val1, T val2, T val3, T val4, T val5) + { + set(val0, val1, val2, val3, val4, val5); + } + + inline vec( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15) + { + set(val0, val1, val2, val3, + val4, val5, val6, val7, + val8, val9, val10, val11, + val12, val13, val14, val15); + } + + inline vec( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15, + T val16, T val17, T val18, T val19) + { + set(val0, val1, val2, val3, + val4, val5, val6, val7, + val8, val9, val10, val11, + val12, val13, val14, val15, + val16, val17, val18, val19); + } + + inline vec( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15, + T val16, T val17, T val18, T val19, + T val20, T val21, T val22, T val23, + T val24) + { + set(val0, val1, val2, val3, + val4, val5, val6, val7, + val8, val9, val10, val11, + val12, val13, val14, val15, + val16, val17, val18, val19, + val20, val21, val22, val23, + val24); + } + + inline void clear() + { + if (N > 4) + memset(m_s, 0, sizeof(m_s)); + else + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = 0; + } + } + + template + inline vec& set(const vec& other) + { + if ((void*)this == (void*)&other) + return *this; + const uint32_t m = std::min(N, ON); + uint32_t i; + for (i = 0; i < m; i++) + m_s[i] = static_cast(other[i]); + for (; i < N; i++) + m_s[i] = 0; + return *this; + } + + inline vec& set_component(uint32_t index, T val) + { + assert(index < N); + m_s[index] = val; + return *this; + } + + inline vec& set(T val) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = val; + return *this; + } + + inline vec& set(T val0, T val1) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + for (uint32_t i = 2; i < N; i++) + m_s[i] = 0; + } + return *this; + } + + inline vec& set(T val0, T val1, T val2) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + for (uint32_t i = 3; i < N; i++) + m_s[i] = 0; + } + } + return *this; + } + + inline vec& set(T val0, T val1, T val2, T val3) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + if (N >= 4) + { + m_s[3] = val3; + + for (uint32_t i = 4; i < N; i++) + m_s[i] = 0; + } + } + } + return *this; + } + + inline vec& set(T val0, T val1, T val2, T val3, T val4, T val5) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + if (N >= 4) + { + m_s[3] = val3; + + if (N >= 5) + { + m_s[4] = val4; + + if (N >= 6) + { + m_s[5] = val5; + + for (uint32_t i = 6; i < N; i++) + m_s[i] = 0; + } + } + } + } + } + return *this; + } + + inline vec& set( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15) + { + m_s[0] = val0; + if (N >= 2) + m_s[1] = val1; + if (N >= 3) + m_s[2] = val2; + if (N >= 4) + m_s[3] = val3; + + if (N >= 5) + m_s[4] = val4; + if (N >= 6) + m_s[5] = val5; + if (N >= 7) + m_s[6] = val6; + if (N >= 8) + m_s[7] = val7; + + if (N >= 9) + m_s[8] = val8; + if (N >= 10) + m_s[9] = val9; + if (N >= 11) + m_s[10] = val10; + if (N >= 12) + m_s[11] = val11; + + if (N >= 13) + m_s[12] = val12; + if (N >= 14) + m_s[13] = val13; + if (N >= 15) + m_s[14] = val14; + if (N >= 16) + m_s[15] = val15; + + for (uint32_t i = 16; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline vec& set( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15, + T val16, T val17, T val18, T val19) + { + m_s[0] = val0; + if (N >= 2) + m_s[1] = val1; + if (N >= 3) + m_s[2] = val2; + if (N >= 4) + m_s[3] = val3; + + if (N >= 5) + m_s[4] = val4; + if (N >= 6) + m_s[5] = val5; + if (N >= 7) + m_s[6] = val6; + if (N >= 8) + m_s[7] = val7; + + if (N >= 9) + m_s[8] = val8; + if (N >= 10) + m_s[9] = val9; + if (N >= 11) + m_s[10] = val10; + if (N >= 12) + m_s[11] = val11; + + if (N >= 13) + m_s[12] = val12; + if (N >= 14) + m_s[13] = val13; + if (N >= 15) + m_s[14] = val14; + if (N >= 16) + m_s[15] = val15; + + if (N >= 17) + m_s[16] = val16; + if (N >= 18) + m_s[17] = val17; + if (N >= 19) + m_s[18] = val18; + if (N >= 20) + m_s[19] = val19; + + for (uint32_t i = 20; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline vec& set( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15, + T val16, T val17, T val18, T val19, + T val20, T val21, T val22, T val23, + T val24) + { + m_s[0] = val0; + if (N >= 2) + m_s[1] = val1; + if (N >= 3) + m_s[2] = val2; + if (N >= 4) + m_s[3] = val3; + + if (N >= 5) + m_s[4] = val4; + if (N >= 6) + m_s[5] = val5; + if (N >= 7) + m_s[6] = val6; + if (N >= 8) + m_s[7] = val7; + + if (N >= 9) + m_s[8] = val8; + if (N >= 10) + m_s[9] = val9; + if (N >= 11) + m_s[10] = val10; + if (N >= 12) + m_s[11] = val11; + + if (N >= 13) + m_s[12] = val12; + if (N >= 14) + m_s[13] = val13; + if (N >= 15) + m_s[14] = val14; + if (N >= 16) + m_s[15] = val15; + + if (N >= 17) + m_s[16] = val16; + if (N >= 18) + m_s[17] = val17; + if (N >= 19) + m_s[18] = val18; + if (N >= 20) + m_s[19] = val19; + + if (N >= 21) + m_s[20] = val20; + if (N >= 22) + m_s[21] = val21; + if (N >= 23) + m_s[22] = val22; + if (N >= 24) + m_s[23] = val23; + + if (N >= 25) + m_s[24] = val24; + + for (uint32_t i = 25; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline vec& set(const T* pValues) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = pValues[i]; + return *this; + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i) + { + return set(static_cast(other[i])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j) + { + return set(static_cast(other[i]), static_cast(other[j])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j, uint32_t k) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j, uint32_t k, uint32_t l) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k]), static_cast(other[l])); + } + + inline vec& operator=(const vec& rhs) + { + if (this != &rhs) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = rhs.m_s[i]; + } + return *this; + } + + template + inline vec& operator=(const vec& other) + { + if ((void*)this == (void*)&other) + return *this; + + uint32_t s = std::min(N, O); + + uint32_t i; + for (i = 0; i < s; i++) + m_s[i] = static_cast(other[i]); + + for (; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline bool operator==(const vec& rhs) const + { + for (uint32_t i = 0; i < N; i++) + if (!(m_s[i] == rhs.m_s[i])) + return false; + return true; + } + + inline bool operator<(const vec& rhs) const + { + for (uint32_t i = 0; i < N; i++) + { + if (m_s[i] < rhs.m_s[i]) + return true; + else if (!(m_s[i] == rhs.m_s[i])) + return false; + } + + return false; + } + + inline T operator[](uint32_t i) const + { + assert(i < N); + return m_s[i]; + } + + inline T& operator[](uint32_t i) + { + assert(i < N); + return m_s[i]; + } + + template + inline uint64_t get_component_as_uint() const + { + ASSUME(index < N); + if (sizeof(T) == sizeof(float)) + return *reinterpret_cast(&m_s[index]); + else + return *reinterpret_cast(&m_s[index]); + } + + inline T get_x(void) const + { + return m_s[0]; + } + inline T get_y(void) const + { + ASSUME(N >= 2); + return m_s[1]; + } + inline T get_z(void) const + { + ASSUME(N >= 3); + return m_s[2]; + } + inline T get_w(void) const + { + ASSUME(N >= 4); + return m_s[3]; + } + + inline vec get_x_vector() const + { + return broadcast<0>(); + } + inline vec get_y_vector() const + { + return broadcast<1>(); + } + inline vec get_z_vector() const + { + return broadcast<2>(); + } + inline vec get_w_vector() const + { + return broadcast<3>(); + } + + inline T get_component(uint32_t i) const + { + return (*this)[i]; + } + + inline vec& set_x(T v) + { + m_s[0] = v; + return *this; + } + inline vec& set_y(T v) + { + ASSUME(N >= 2); + m_s[1] = v; + return *this; + } + inline vec& set_z(T v) + { + ASSUME(N >= 3); + m_s[2] = v; + return *this; + } + inline vec& set_w(T v) + { + ASSUME(N >= 4); + m_s[3] = v; + return *this; + } + + inline const T* get_ptr() const + { + return reinterpret_cast(&m_s[0]); + } + inline T* get_ptr() + { + return reinterpret_cast(&m_s[0]); + } + + inline vec as_point() const + { + vec result(*this); + result[N - 1] = 1; + return result; + } + + inline vec as_dir() const + { + vec result(*this); + result[N - 1] = 0; + return result; + } + + inline vec<2, T> select2(uint32_t i, uint32_t j) const + { + assert((i < N) && (j < N)); + return vec<2, T>(m_s[i], m_s[j]); + } + + inline vec<3, T> select3(uint32_t i, uint32_t j, uint32_t k) const + { + assert((i < N) && (j < N) && (k < N)); + return vec<3, T>(m_s[i], m_s[j], m_s[k]); + } + + inline vec<4, T> select4(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const + { + assert((i < N) && (j < N) && (k < N) && (l < N)); + return vec<4, T>(m_s[i], m_s[j], m_s[k], m_s[l]); + } + + inline bool is_dir() const + { + return m_s[N - 1] == 0; + } + inline bool is_vector() const + { + return is_dir(); + } + inline bool is_point() const + { + return m_s[N - 1] == 1; + } + + inline vec project() const + { + vec result(*this); + if (result[N - 1]) + result /= result[N - 1]; + return result; + } + + inline vec broadcast(unsigned i) const + { + return vec((*this)[i]); + } + + template + inline vec broadcast() const + { + return vec((*this)[i]); + } + + inline vec swizzle(uint32_t i, uint32_t j) const + { + return vec((*this)[i], (*this)[j]); + } + + inline vec swizzle(uint32_t i, uint32_t j, uint32_t k) const + { + return vec((*this)[i], (*this)[j], (*this)[k]); + } + + inline vec swizzle(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const + { + return vec((*this)[i], (*this)[j], (*this)[k], (*this)[l]); + } + + inline vec operator-() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = -m_s[i]; + return result; + } + + inline vec operator+() const + { + return *this; + } + + inline vec& operator+=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] += other.m_s[i]; + return *this; + } + + inline vec& operator-=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] -= other.m_s[i]; + return *this; + } + + inline vec& operator*=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] *= other.m_s[i]; + return *this; + } + + inline vec& operator/=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] /= other.m_s[i]; + return *this; + } + + inline vec& operator*=(T s) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] *= s; + return *this; + } + + inline vec& operator/=(T s) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] /= s; + return *this; + } + + // component-wise multiply (not a dot product like in previous versions) + // just remarking it out because it's too ambiguous, use dot() or mul_components() instead +#if 0 + friend inline vec operator*(const vec& lhs, const vec& rhs) + { + return vec::mul_components(lhs, rhs); + } +#endif + + friend inline vec operator*(const vec& lhs, T val) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] * val; + return result; + } + + friend inline vec operator*(T val, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = val * rhs.m_s[i]; + return result; + } + + friend inline vec operator/(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] / rhs.m_s[i]; + return result; + } + + friend inline vec operator/(const vec& lhs, T val) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] / val; + return result; + } + + friend inline vec operator+(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] + rhs.m_s[i]; + return result; + } + + friend inline vec operator-(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] - rhs.m_s[i]; + return result; + } + + static inline vec<3, T> cross2(const vec& a, const vec& b) + { + ASSUME(N >= 2); + return vec<3, T>(0, 0, a[0] * b[1] - a[1] * b[0]); + } + + inline vec<3, T> cross2(const vec& b) const + { + return cross2(*this, b); + } + + static inline vec<3, T> cross3(const vec& a, const vec& b) + { + ASSUME(N >= 3); + return vec<3, T>(a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]); + } + + inline vec<3, T> cross3(const vec& b) const + { + return cross3(*this, b); + } + + static inline vec<3, T> cross(const vec& a, const vec& b) + { + ASSUME(N >= 2); + + if (N == 2) + return cross2(a, b); + else + return cross3(a, b); + } + + inline vec<3, T> cross(const vec& b) const + { + ASSUME(N >= 2); + return cross(*this, b); + } + + inline T dot(const vec& rhs) const + { + return dot(*this, rhs); + } + + inline vec dot_vector(const vec& rhs) const + { + return vec(dot(*this, rhs)); + } + + static inline T dot(const vec& lhs, const vec& rhs) + { + T result = lhs.m_s[0] * rhs.m_s[0]; + for (uint32_t i = 1; i < N; i++) + result += lhs.m_s[i] * rhs.m_s[i]; + return result; + } + + inline T dot2(const vec& rhs) const + { + ASSUME(N >= 2); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1]; + } + + inline T dot3(const vec& rhs) const + { + ASSUME(N >= 3); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2]; + } + + inline T dot4(const vec& rhs) const + { + ASSUME(N >= 4); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2] + m_s[3] * rhs.m_s[3]; + } + + inline T norm(void) const + { + T sum = m_s[0] * m_s[0]; + for (uint32_t i = 1; i < N; i++) + sum += m_s[i] * m_s[i]; + return sum; + } + + inline T length(void) const + { + return sqrt(norm()); + } + + inline T squared_distance(const vec& rhs) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return dist2; + } + + inline T squared_distance(const vec& rhs, T early_out) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + if (dist2 > early_out) + break; + } + return dist2; + } + + inline T distance(const vec& rhs) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return sqrt(dist2); + } + + inline vec inverse() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = m_s[i] ? (1.0f / m_s[i]) : 0; + return result; + } + + // returns squared length (norm) + inline double normalize(const vec* pDefaultVec = NULL) + { + double n = m_s[0] * m_s[0]; + for (uint32_t i = 1; i < N; i++) + n += m_s[i] * m_s[i]; + + if (n != 0) + *this *= static_cast(1.0f / sqrt(n)); + else if (pDefaultVec) + *this = *pDefaultVec; + return n; + } + + inline double normalize3(const vec* pDefaultVec = NULL) + { + ASSUME(N >= 3); + + double n = m_s[0] * m_s[0] + m_s[1] * m_s[1] + m_s[2] * m_s[2]; + + if (n != 0) + *this *= static_cast((1.0f / sqrt(n))); + else if (pDefaultVec) + *this = *pDefaultVec; + return n; + } + + inline vec& normalize_in_place(const vec* pDefaultVec = NULL) + { + normalize(pDefaultVec); + return *this; + } + + inline vec& normalize3_in_place(const vec* pDefaultVec = NULL) + { + normalize3(pDefaultVec); + return *this; + } + + inline vec get_normalized(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize(pDefaultVec); + return result; + } + + inline vec get_normalized3(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize3(pDefaultVec); + return result; + } + + inline vec& clamp(T l, T h) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = static_cast(clamp(m_s[i], l, h)); + return *this; + } + + inline vec& saturate() + { + return clamp(0.0f, 1.0f); + } + + inline vec& clamp(const vec& l, const vec& h) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = static_cast(clamp(m_s[i], l[i], h[i])); + return *this; + } + + inline bool is_within_bounds(const vec& l, const vec& h) const + { + for (uint32_t i = 0; i < N; i++) + if ((m_s[i] < l[i]) || (m_s[i] > h[i])) + return false; + + return true; + } + + inline bool is_within_bounds(T l, T h) const + { + for (uint32_t i = 0; i < N; i++) + if ((m_s[i] < l) || (m_s[i] > h)) + return false; + + return true; + } + + inline uint32_t get_major_axis(void) const + { + T m = fabs(m_s[0]); + uint32_t r = 0; + for (uint32_t i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c > m) + { + m = c; + r = i; + } + } + return r; + } + + inline uint32_t get_minor_axis(void) const + { + T m = fabs(m_s[0]); + uint32_t r = 0; + for (uint32_t i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c < m) + { + m = c; + r = i; + } + } + return r; + } + + inline void get_projection_axes(uint32_t& u, uint32_t& v) const + { + const int axis = get_major_axis(); + if (m_s[axis] < 0.0f) + { + v = next_wrap(axis, N); + u = next_wrap(v, N); + } + else + { + u = next_wrap(axis, N); + v = next_wrap(u, N); + } + } + + inline T get_absolute_minimum(void) const + { + T result = fabs(m_s[0]); + for (uint32_t i = 1; i < N; i++) + result = std::min(result, fabs(m_s[i])); + return result; + } + + inline T get_absolute_maximum(void) const + { + T result = fabs(m_s[0]); + for (uint32_t i = 1; i < N; i++) + result = std::max(result, fabs(m_s[i])); + return result; + } + + inline T get_minimum(void) const + { + T result = m_s[0]; + for (uint32_t i = 1; i < N; i++) + result = std::min(result, m_s[i]); + return result; + } + + inline T get_maximum(void) const + { + T result = m_s[0]; + for (uint32_t i = 1; i < N; i++) + result = std::max(result, m_s[i]); + return result; + } + + inline vec& remove_unit_direction(const vec& dir) + { + *this -= (dot(dir) * dir); + return *this; + } + + inline vec get_remove_unit_direction(const vec& dir) const + { + return *this - (dot(dir) * dir); + } + + inline bool all_less(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] >= b.m_s[i]) + return false; + return true; + } + + inline bool all_less_equal(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] > b.m_s[i]) + return false; + return true; + } + + inline bool all_greater(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] <= b.m_s[i]) + return false; + return true; + } + + inline bool all_greater_equal(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] < b.m_s[i]) + return false; + return true; + } + + inline vec negate_xyz() const + { + vec ret; + + ret[0] = -m_s[0]; + if (N >= 2) + ret[1] = -m_s[1]; + if (N >= 3) + ret[2] = -m_s[2]; + + for (uint32_t i = 3; i < N; i++) + ret[i] = m_s[i]; + + return ret; + } + + inline vec& invert() + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] != 0.0f) + m_s[i] = 1.0f / m_s[i]; + return *this; + } + + inline scalar_type perp_dot(const vec& b) const + { + ASSUME(N == 2); + return m_s[0] * b.m_s[1] - m_s[1] * b.m_s[0]; + } + + inline vec perp() const + { + ASSUME(N == 2); + return vec(-m_s[1], m_s[0]); + } + + inline vec get_floor() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = floor(m_s[i]); + return result; + } + + inline vec get_ceil() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = ceil(m_s[i]); + return result; + } + + // static helper methods + + static inline vec mul_components(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = lhs.m_s[i] * rhs.m_s[i]; + return result; + } + + static inline vec mul_add_components(const vec& a, const vec& b, const vec& c) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = a.m_s[i] * b.m_s[i] + c.m_s[i]; + return result; + } + + static inline vec make_axis(uint32_t i) + { + vec result; + result.clear(); + result[i] = 1; + return result; + } + + static inline vec equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] == b[i]); + return ret; + } + + static inline vec not_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] != b[i]); + return ret; + } + + static inline vec less_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] < b[i]); + return ret; + } + + static inline vec less_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] <= b[i]); + return ret; + } + + static inline vec greater_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] >= b[i]); + return ret; + } + + static inline vec greater_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] > b[i]); + return ret; + } + + static inline vec component_max(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = std::max(a.m_s[i], b.m_s[i]); + return ret; + } + + static inline vec component_min(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = std::min(a.m_s[i], b.m_s[i]); + return ret; + } + + static inline vec lerp(const vec& a, const vec& b, float t) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = a.m_s[i] + (b.m_s[i] - a.m_s[i]) * t; + return ret; + } + + static inline bool equal_tol(const vec& a, const vec& b, float t) + { + for (uint32_t i = 0; i < N; i++) + if (!equal_tol(a.m_s[i], b.m_s[i], t)) + return false; + return true; + } + + inline bool equal_tol(const vec& b, float t) const + { + return equal_tol(*this, b, t); + } + +protected: + T m_s[N]; +}; + +typedef vec<1, double> vec1D; +typedef vec<2, double> vec2D; +typedef vec<3, double> vec3D; +typedef vec<4, double> vec4D; + +typedef vec<1, float> vec1F; + +typedef vec<2, float> vec2F; +typedef std::vector vec2F_array; + +typedef vec<3, float> vec3F; +typedef std::vector vec3F_array; + +typedef vec<4, float> vec4F; +typedef std::vector vec4F_array; + +typedef vec<2, uint32_t> vec2U; +typedef vec<3, uint32_t> vec3U; +typedef vec<2, int> vec2I; +typedef vec<3, int> vec3I; +typedef vec<4, int> vec4I; + +typedef vec<2, int16_t> vec2I16; +typedef vec<3, int16_t> vec3I16; + +inline vec2F rotate_point(const vec2F& p, float rad) +{ + float c = cos(rad); + float s = sin(rad); + + float x = p[0]; + float y = p[1]; + + return vec2F(x * c - y * s, x * s + y * c); +} + +class rect +{ +public: + inline rect() + { + } + + inline rect(eClear) + { + clear(); + } + + inline rect(eInitExpand) + { + init_expand(); + } + + // up to, but not including right/bottom + inline rect(int left, int top, int right, int bottom) + { + set(left, top, right, bottom); + } + + inline rect(const vec2I& lo, const vec2I& hi) + { + m_corner[0] = lo; + m_corner[1] = hi; + } + + inline rect(const vec2I& point) + { + m_corner[0] = point; + m_corner[1].set(point[0] + 1, point[1] + 1); + } + + inline bool operator==(const rect& r) const + { + return (m_corner[0] == r.m_corner[0]) && (m_corner[1] == r.m_corner[1]); + } + + inline bool operator<(const rect& r) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (m_corner[i] < r.m_corner[i]) + return true; + else if (!(m_corner[i] == r.m_corner[i])) + return false; + } + + return false; + } + + inline void clear() + { + m_corner[0].clear(); + m_corner[1].clear(); + } + + inline void set(int left, int top, int right, int bottom) + { + m_corner[0].set(left, top); + m_corner[1].set(right, bottom); + } + + inline void set(const vec2I& lo, const vec2I& hi) + { + m_corner[0] = lo; + m_corner[1] = hi; + } + + inline void set(const vec2I& point) + { + m_corner[0] = point; + m_corner[1].set(point[0] + 1, point[1] + 1); + } + + inline uint32_t get_width() const + { + return m_corner[1][0] - m_corner[0][0]; + } + inline uint32_t get_height() const + { + return m_corner[1][1] - m_corner[0][1]; + } + + inline int get_left() const + { + return m_corner[0][0]; + } + inline int get_top() const + { + return m_corner[0][1]; + } + inline int get_right() const + { + return m_corner[1][0]; + } + inline int get_bottom() const + { + return m_corner[1][1]; + } + + inline bool is_empty() const + { + return (m_corner[1][0] <= m_corner[0][0]) || (m_corner[1][1] <= m_corner[0][1]); + } + + inline uint32_t get_dimension(uint32_t axis) const + { + return m_corner[1][axis] - m_corner[0][axis]; + } + inline uint32_t get_area() const + { + return get_dimension(0) * get_dimension(1); + } + + inline const vec2I& operator[](uint32_t i) const + { + assert(i < 2); + return m_corner[i]; + } + inline vec2I& operator[](uint32_t i) + { + assert(i < 2); + return m_corner[i]; + } + + inline rect& translate(int x_ofs, int y_ofs) + { + m_corner[0][0] += x_ofs; + m_corner[0][1] += y_ofs; + m_corner[1][0] += x_ofs; + m_corner[1][1] += y_ofs; + return *this; + } + + inline rect& init_expand() + { + m_corner[0].set(INT_MAX); + m_corner[1].set(INT_MIN); + return *this; + } + + inline rect& expand(int x, int y) + { + m_corner[0][0] = std::min(m_corner[0][0], x); + m_corner[0][1] = std::min(m_corner[0][1], y); + m_corner[1][0] = std::max(m_corner[1][0], x + 1); + m_corner[1][1] = std::max(m_corner[1][1], y + 1); + return *this; + } + + inline rect& expand(const rect& r) + { + m_corner[0][0] = std::min(m_corner[0][0], r[0][0]); + m_corner[0][1] = std::min(m_corner[0][1], r[0][1]); + m_corner[1][0] = std::max(m_corner[1][0], r[1][0]); + m_corner[1][1] = std::max(m_corner[1][1], r[1][1]); + return *this; + } + + inline bool touches(const rect& r) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (r[1][i] <= m_corner[0][i]) + return false; + else if (r[0][i] >= m_corner[1][i]) + return false; + } + + return true; + } + + inline bool fully_within(const rect& r) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (m_corner[0][i] < r[0][i]) + return false; + else if (m_corner[1][i] > r[1][i]) + return false; + } + + return true; + } + + inline bool intersect(const rect& r) + { + if (!touches(r)) + { + clear(); + return false; + } + + for (uint32_t i = 0; i < 2; i++) + { + m_corner[0][i] = std::max(m_corner[0][i], r[0][i]); + m_corner[1][i] = std::min(m_corner[1][i], r[1][i]); + } + + return true; + } + + inline bool contains(int x, int y) const + { + return (x >= m_corner[0][0]) && (x < m_corner[1][0]) && + (y >= m_corner[0][1]) && (y < m_corner[1][1]); + } + + inline bool contains(const vec2I& p) const + { + return contains(p[0], p[1]); + } + +private: + vec2I m_corner[2]; +}; + +inline rect make_rect(uint32_t width, uint32_t height) +{ + return rect(0, 0, width, height); +} + +struct color_quad_u8 +{ +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4201) +#endif + union + { + uint8_t m_c[4]; + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + }; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + inline color_quad_u8(eClear) : color_quad_u8(0, 0, 0, 0) { } + + inline color_quad_u8(uint8_t cr, uint8_t cg, uint8_t cb, uint8_t ca) + { + set(cr, cg, cb, ca); + } + + inline color_quad_u8(uint8_t cy = 0, uint8_t ca = 255) + { + set(cy, ca); + } + + inline void clear() + { + set(0, 0, 0, 0); + } + + inline color_quad_u8& set(uint8_t cy, uint8_t ca = 255) + { + m_c[0] = cy; + m_c[1] = cy; + m_c[2] = cy; + m_c[3] = ca; + return *this; + } + + inline color_quad_u8& set(uint8_t cr, uint8_t cg, uint8_t cb, uint8_t ca) + { + m_c[0] = cr; + m_c[1] = cg; + m_c[2] = cb; + m_c[3] = ca; + return *this; + } + + inline color_quad_u8& set_clamped(int cr, int cg, int cb, int ca) + { + m_c[0] = (uint8_t)clamp(cr, 0, 255); + m_c[1] = (uint8_t)clamp(cg, 0, 255); + m_c[2] = (uint8_t)clamp(cb, 0, 255); + m_c[3] = (uint8_t)clamp(ca, 0, 255); + return *this; + } + + color_quad_u8& set_alpha(int ca) { a = (uint8_t)clamp(ca, 0, 255); return *this; } + + inline uint8_t& operator[] (uint32_t i) { assert(i < 4); return m_c[i]; } + inline uint8_t operator[] (uint32_t i) const { assert(i < 4); return m_c[i]; } + + inline int get_luma() const { return (13938U * m_c[0] + 46869U * m_c[1] + 4729U * m_c[2] + 32768U) >> 16U; } // REC709 weightings + + inline bool operator== (const color_quad_u8& other) const + { + return (m_c[0] == other.m_c[0]) && (m_c[1] == other.m_c[1]) && (m_c[2] == other.m_c[2]) && (m_c[3] == other.m_c[3]); + } + + inline bool operator!= (const color_quad_u8& other) const + { + return !(*this == other); + } + + inline uint32_t squared_distance(const color_quad_u8& c, bool alpha = true) const + { + return square(r - c.r) + square(g - c.g) + square(b - c.b) + (alpha ? square(a - c.a) : 0); + } + + inline bool rgb_equals(const color_quad_u8& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); + } +}; +typedef std::vector color_quad_u8_vec; + +inline uint32_t color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) +{ + if (perceptual) + { + const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f; + const float cr1 = e1.r - l1; + const float cb1 = e1.b - l1; + + const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f; + const float cr2 = e2.r - l2; + const float cb2 = e2.b - l2; + + const float dl = l1 - l2; + const float dcr = cr1 - cr2; + const float dcb = cb1 - cb2; + + uint32_t d = static_cast( + 32.0f * 4.0f * dl * dl + + 32.0f * 2.0f * (.5f / (1.0f - .2126f)) * (.5f / (1.0f - .2126f)) * dcr * dcr + + 32.0f * .25f * (.5f / (1.0f - .0722f)) * (.5f / (1.0f - .0722f)) * dcb * dcb); + + if (alpha) + { + int da = (int)e1.a - (int)e2.a; + + d += static_cast(128.0f * da * da); + } + + return d; + } + else + return e1.squared_distance(e2, alpha); +} + +extern color_quad_u8 g_white_color_u8, g_black_color_u8, g_red_color_u8, g_green_color_u8, g_blue_color_u8, g_yellow_color_u8, g_purple_color_u8, g_magenta_color_u8, g_cyan_color_u8; + +class image_u8 +{ +public: + image_u8() : + m_width(0), m_height(0), + m_clip_rect(cClear) + { + } + + image_u8(uint32_t width, uint32_t height) : + m_width(width), m_height(height), + m_clip_rect(0, 0, width, height) + { + m_pixels.resize(width * height); + } + + inline const color_quad_u8_vec& get_pixels() const { return m_pixels; } + inline color_quad_u8_vec& get_pixels() { return m_pixels; } + + inline uint32_t width() const { return m_width; } + inline uint32_t height() const { return m_height; } + inline uint32_t total_pixels() const { return m_width * m_height; } + + inline const rect& get_clip_rect() const { return m_clip_rect; } + + inline void set_clip_rect(const rect& r) + { + assert((r.get_left() >= 0) && (r.get_top() >= 0) && (r.get_right() <= (int)m_width) && (r.get_bottom() <= (int)m_height)); + + m_clip_rect = r; + } + + inline void clear_clip_rect() { m_clip_rect.set(0, 0, m_width, m_height); } + + inline bool is_clipped(int x, int y) const { return !m_clip_rect.contains(x, y); } + + inline rect get_bounds() const { return rect(0, 0, m_width, m_height); } + + inline color_quad_u8& operator()(uint32_t x, uint32_t y) { assert((x < m_width) && (y < m_height)); return m_pixels[x + m_width * y]; } + inline const color_quad_u8& operator()(uint32_t x, uint32_t y) const { assert((x < m_width) && (y < m_height)); return m_pixels[x + m_width * y]; } + + image_u8& clear() + { + m_width = m_height = 0; + m_clip_rect.clear(); + m_pixels.clear(); + return *this; + } + + image_u8& init(uint32_t width, uint32_t height) + { + clear(); + + m_width = width; + m_height = height; + m_clip_rect.set(0, 0, width, height); + m_pixels.resize(width * height); + return *this; + } + + image_u8& set_all(const color_quad_u8& p) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = p; + return *this; + } + + inline const color_quad_u8& get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline color_quad_u8& get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline image_u8& set_pixel_clipped(int x, int y, const color_quad_u8& c) + { + if (!is_clipped(x, y)) + (*this)(x, y) = c; + return *this; + } + + inline image_u8& fill_box(int x, int y, int w, int h, const color_quad_u8& c) + { + for (int y_ofs = 0; y_ofs < h; y_ofs++) + for (int x_ofs = 0; x_ofs < w; x_ofs++) + set_pixel_clipped(x + x_ofs, y + y_ofs, c); + return *this; + } + + void invert_box(int inX, int inY, int inW, int inH) + { + for (int y = 0; y < inH; y++) + { + const uint32_t yy = inY + y; + + for (int x = 0; x < inW; x++) + { + const uint32_t xx = inX + x; + + if (is_clipped(xx, yy)) + continue; + + color_quad_u8 c((*this)(xx, yy)); + + c.r = 255 - c.r; + c.g = 255 - c.g; + c.b = 255 - c.b; + + set_pixel_clipped(xx, yy, c); + } + } + } + + image_u8& crop_dup_borders(uint32_t w, uint32_t h) + { + const uint32_t orig_w = m_width, orig_h = m_height; + + crop(w, h); + + if (orig_w && orig_h) + { + if (m_width > orig_w) + { + for (uint32_t x = orig_w; x < m_width; x++) + for (uint32_t y = 0; y < m_height; y++) + set_pixel_clipped(x, y, get_clamped(std::min(x, orig_w - 1U), std::min(y, orig_h - 1U))); + } + + if (m_height > orig_h) + { + for (uint32_t y = orig_h; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + set_pixel_clipped(x, y, get_clamped(std::min(x, orig_w - 1U), std::min(y, orig_h - 1U))); + } + } + return *this; + } + + image_u8& crop(uint32_t new_width, uint32_t new_height) + { + if ((m_width == new_width) && (m_height == new_height)) + return *this; + + image_u8 new_image(new_width, new_height); + + const uint32_t w = std::min(m_width, new_width); + const uint32_t h = std::min(m_height, new_height); + + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + new_image(x, y) = (*this)(x, y); + + return swap(new_image); + } + + image_u8& swap(image_u8& other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pixels, other.m_pixels); + std::swap(m_clip_rect, other.m_clip_rect); + return *this; + } + + // No clipping + inline void get_block(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, color_quad_u8* pPixels) const + { + assert((bx * width + width) <= m_width); + assert((by * height + height) <= m_height); + + for (uint32_t y = 0; y < height; y++) + memcpy(pPixels + y * width, &(*this)(bx * width, by * height + y), width * sizeof(color_quad_u8)); + } + + inline void get_block_clamped(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, color_quad_u8* pPixels) const + { + for (uint32_t y = 0; y < height; y++) + for (uint32_t x = 0; x < width; x++) + pPixels[x + y * width] = get_clamped(bx * width + x, by * height + y); + } + + // No clipping + inline void set_block(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, const color_quad_u8* pPixels) + { + assert((bx * width + width) <= m_width); + assert((by * height + height) <= m_height); + + for (uint32_t y = 0; y < height; y++) + memcpy(&(*this)(bx * width, by * height + y), pPixels + y * width, width * sizeof(color_quad_u8)); + } + + image_u8& swizzle(uint32_t r, uint32_t g, uint32_t b, uint32_t a) + { + assert((r | g | b | a) <= 3); + for (uint32_t y = 0; y < m_height; y++) + { + for (uint32_t x = 0; x < m_width; x++) + { + color_quad_u8 tmp((*this)(x, y)); + (*this)(x, y).set(tmp[r], tmp[g], tmp[b], tmp[a]); + } + } + + return *this; + } + + struct pixel_coord + { + uint16_t m_x, m_y; + pixel_coord() { } + pixel_coord(uint32_t x, uint32_t y) : m_x((uint16_t)x), m_y((uint16_t)y) { } + }; + + uint32_t flood_fill(int x, int y, const color_quad_u8& c, const color_quad_u8& b, std::vector* pSet_pixels = nullptr); + + void draw_line(int xs, int ys, int xe, int ye, const color_quad_u8& color); + + inline void set_pixel_clipped_alphablend(int x, int y, const color_quad_u8& c) + { + if (is_clipped(x, y)) + return; + + color_quad_u8 ct(m_pixels[x + y * m_width]); + + ct.r = static_cast(ct.r + ((c.r - ct.r) * c.a) / 255); + ct.g = static_cast(ct.g + ((c.g - ct.g) * c.a) / 255); + ct.b = static_cast(ct.b + ((c.b - ct.b) * c.a) / 255); + + m_pixels[x + y * m_width] = ct; + } + +private: + color_quad_u8_vec m_pixels; + uint32_t m_width, m_height; + rect m_clip_rect; + + struct fill_segment + { + int16_t m_y, m_xl, m_xr, m_dy; + + fill_segment(int y, int xl, int xr, int dy) : + m_y((int16_t)y), m_xl((int16_t)xl), m_xr((int16_t)xr), m_dy((int16_t)dy) + { + } + }; + + inline bool flood_fill_is_inside(int x, int y, const color_quad_u8& b) const + { + if (is_clipped(x, y)) + return false; + + return (*this)(x, y) == b; + } + + void rasterize_line(int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_quad_u8& color); + + void draw_aaline_pixel(int x, int y, int a, color_quad_u8 color) + { + color.a = static_cast(255 - a); + set_pixel_clipped_alphablend(x, y, color); + } +}; + +bool load_png(const char* pFilename, image_u8& img); + +bool save_png(const char* pFilename, const image_u8& img, bool save_alpha); + +class image_metrics +{ +public: + double m_max, m_mean, m_mean_squared, m_root_mean_squared, m_peak_snr; + + image_metrics() + { + clear(); + } + + void clear() + { + memset(this, 0, sizeof(*this)); + } + + void compute(const image_u8& a, const image_u8& b, uint32_t first_channel, uint32_t num_channels) + { + const bool average_component_error = true; + + const uint32_t width = std::min(a.width(), b.width()); + const uint32_t height = std::min(a.height(), b.height()); + + assert((first_channel < 4U) && (first_channel + num_channels <= 4U)); + + // Histogram approach originally due to Charles Bloom. + double hist[256]; + memset(hist, 0, sizeof(hist)); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const color_quad_u8& ca = a(x, y); + const color_quad_u8& cb = b(x, y); + + if (!num_channels) + hist[iabs(ca.get_luma() - cb.get_luma())]++; + else + { + for (uint32_t c = 0; c < num_channels; c++) + hist[iabs(ca[first_channel + c] - cb[first_channel + c])]++; + } + } + } + + m_max = 0; + double sum = 0.0f, sum2 = 0.0f; + for (uint32_t i = 0; i < 256; i++) + { + if (!hist[i]) + continue; + + m_max = std::max(m_max, i); + + double x = i * hist[i]; + + sum += x; + sum2 += i * x; + } + + // See http://richg42.blogspot.com/2016/09/how-to-compute-psnr-from-old-berkeley.html + double total_values = width * height; + + if (average_component_error) + total_values *= clamp(num_channels, 1, 4); + + m_mean = clamp(sum / total_values, 0.0f, 255.0f); + m_mean_squared = clamp(sum2 / total_values, 0.0f, 255.0f * 255.0f); + + m_root_mean_squared = sqrt(m_mean_squared); + + if (!m_root_mean_squared) + m_peak_snr = 100.0f; + else + m_peak_snr = clamp(log10(255.0f / m_root_mean_squared) * 20.0f, 0.0f, 100.0f); + } +}; + +class imagef +{ +public: + imagef() : + m_width(0), m_height(0), m_pitch(0) + { + } + + imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + m_width(0), m_height(0), m_pitch(0) + { + resize(w, h, p); + } + + imagef(const imagef& other) : + m_width(0), m_height(0), m_pitch(0) + { + *this = other; + } + + imagef& swap(imagef& other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pitch, other.m_pitch); + m_pixels.swap(other.m_pixels); + return *this; + } + + imagef& operator= (const imagef& rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = rhs.m_pixels; + } + return *this; + } + + imagef& clear() + { + m_width = 0; + m_height = 0; + m_pitch = 0; + m_pixels.resize(0); + return *this; + } + + imagef& set(const image_u8& src, const vec4F& scale = vec4F(1), const vec4F& bias = vec4F(0)) + { + const uint32_t width = src.width(); + const uint32_t height = src.height(); + + resize(width, height); + + for (int y = 0; y < (int)height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const color_quad_u8& src_pixel = src(x, y); + (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]); + } + } + + return *this; + } + + imagef& resize(const imagef& other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0, 0, 0, 1)) + { + return resize(other.get_width(), other.get_height(), p, background); + } + + imagef& resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0, 0, 0, 1)) + { + return crop(w, h, p, background); + } + + imagef& set_all(const vec4F& c) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = c; + return *this; + } + + imagef& fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F& c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_pixel_clipped(x + ix, y + iy, c); + return *this; + } + + imagef& crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0, 0, 0, 1)) + { + if (p == UINT32_MAX) + p = w; + + if ((w == m_width) && (m_height == h) && (m_pitch == p)) + return *this; + + if ((!w) || (!h) || (!p)) + { + clear(); + return *this; + } + + vec4F_array cur_state; + cur_state.swap(m_pixels); + + m_pixels.resize(p * h); + + for (uint32_t y = 0; y < h; y++) + { + for (uint32_t x = 0; x < w; x++) + { + if ((x < m_width) && (y < m_height)) + m_pixels[x + y * p] = cur_state[x + y * m_pitch]; + else + m_pixels[x + y * p] = background; + } + } + + m_width = w; + m_height = h; + m_pitch = p; + + return *this; + } + + inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert(x < m_width&& y < m_height); return m_pixels[x + y * m_pitch]; } + inline vec4F& operator() (uint32_t x, uint32_t y) { assert(x < m_width&& y < m_height); return m_pixels[x + y * m_pitch]; } + + inline const vec4F& get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline vec4F& get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline const vec4F& get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline vec4F& get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline imagef& set_pixel_clipped(int x, int y, const vec4F& c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y) = c; + return *this; + } + + // Very straightforward blit with full clipping. Not fast, but it works. + imagef& blit(const imagef& src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) + { + for (int y = 0; y < src_h; y++) + { + const int sy = src_y + y; + if (sy < 0) + continue; + else if (sy >= (int)src.get_height()) + break; + + for (int x = 0; x < src_w; x++) + { + const int sx = src_x + x; + if (sx < 0) + continue; + else if (sx >= (int)src.get_height()) + break; + + set_pixel_clipped(dst_x + x, dst_y + y, src(sx, sy)); + } + } + + return *this; + } + + const imagef& extract_block_clamped(vec4F* pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = get_clamped(src_x + x, src_y + y); + return *this; + } + + imagef& set_block_clipped(const vec4F* pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + set_pixel_clipped(dst_x + x, dst_y + y, *pSrc++); + return *this; + } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + inline uint32_t get_pitch() const { return m_pitch; } + inline uint32_t get_total_pixels() const { return m_width * m_height; } + + inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } + inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } + inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } + + inline const vec4F_array& get_pixels() const { return m_pixels; } + inline vec4F_array& get_pixels() { return m_pixels; } + + inline const vec4F* get_ptr() const { return &m_pixels[0]; } + inline vec4F* get_ptr() { return &m_pixels[0]; } + +private: + uint32_t m_width, m_height, m_pitch; // all in pixels + vec4F_array m_pixels; +}; + +enum +{ + cComputeGaussianFlagNormalize = 1, + cComputeGaussianFlagPrint = 2, + cComputeGaussianFlagNormalizeCenterToOne = 4 +}; + +// size_x/y should be odd +void compute_gaussian_kernel(float* pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags); + +void gaussian_filter(imagef& dst, const imagef& orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping = false, uint32_t width_divisor = 1, uint32_t height_divisor = 1); + +vec4F compute_ssim(const imagef& a, const imagef& b); + +vec4F compute_ssim(const image_u8& a, const image_u8& b, bool luma); + +struct block8 +{ + uint64_t m_vals[1]; +}; + +typedef std::vector block8_vec; + +struct block16 +{ + uint64_t m_vals[2]; +}; + +typedef std::vector block16_vec; + +//bool save_dds(const char* pFilename, uint32_t width, uint32_t height, const void* pBlocks, uint32_t pixel_format_bpp, DXGI_FORMAT dxgi_format, bool srgb, bool force_dx10_header); + +void strip_extension(std::string& s); +void strip_path(std::string& s); + +uint32_t hash_hsieh(const uint8_t* pBuf, size_t len); + +// https://www.johndcook.com/blog/standard_deviation/ +// This class is for small numbers of integers, so precision shouldn't be an issue. +class tracked_stat +{ +public: + tracked_stat() { clear(); } + + void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; } + + tracked_stat& operator += (uint32_t val) { update(val); return *this; } + + uint32_t get_number_of_values() const { return m_num; } + uint64_t get_total() const { return m_total; } + uint64_t get_total2() const { return m_total2; } + + float get_mean() const { return m_num ? (float)m_total / m_num : 0.0f; }; + + float get_variance() const { return m_num ? ((float)(m_num * m_total2 - m_total * m_total)) / (m_num * m_num) : 0.0f; } + float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + + float get_sample_variance() const { return (m_num > 1) ? ((float)(m_num * m_total2 - m_total * m_total)) / (m_num * (m_num - 1)) : 0.0f; } + float get_sample_std_dev() const { return (m_num > 1) ? sqrtf(get_sample_variance()) : 0.0f; } + +private: + uint32_t m_num; + uint64_t m_total; + uint64_t m_total2; +}; + +inline float compute_covariance(const float* pA, const float* pB, const tracked_stat& a, const tracked_stat& b, bool sample) +{ + const uint32_t n = a.get_number_of_values(); + assert(n == b.get_number_of_values()); + + if (!n) + { + assert(0); + return 0.0f; + } + if ((sample) && (n == 1)) + { + assert(0); + return 0; + } + + const float mean_a = a.get_mean(); + const float mean_b = b.get_mean(); + + float total = 0.0f; + for (uint32_t i = 0; i < n; i++) + total += (pA[i] - mean_a) * (pB[i] - mean_b); + + return total / (sample ? (n - 1) : n); +} + +inline float compute_correlation_coefficient(const float* pA, const float* pB, const tracked_stat& a, const tracked_stat& b, float c, bool sample) +{ + if (!a.get_number_of_values()) + return 1.0f; + + float covar = compute_covariance(pA, pB, a, b, sample); + float std_dev_a = sample ? a.get_sample_std_dev() : a.get_std_dev(); + float std_dev_b = sample ? b.get_sample_std_dev() : b.get_std_dev(); + float denom = std_dev_a * std_dev_b + c; + + if (denom < .0000125f) + return 1.0f; + + float result = (covar + c) / denom; + + return clamp(result, -1.0f, 1.0f); +} + +float compute_block_max_std_dev(const color_quad_u8* pPixels, uint32_t block_width, uint32_t block_height, uint32_t num_comps); + +class rand +{ + std::mt19937 m_mt; + +public: + rand() { } + + rand(uint32_t s) { seed(s); } + void seed(uint32_t s) { m_mt.seed(s); } + + // between [l,h] + int irand(int l, int h) { std::uniform_int_distribution d(l, h); return d(m_mt); } + + uint32_t urand32() { return static_cast(irand(INT32_MIN, INT32_MAX)); } + + bool bit() { return irand(0, 1) == 1; } + + uint8_t byte() { return static_cast(urand32()); } + + // between [l,h) + float frand(float l, float h) { std::uniform_real_distribution d(l, h); return d(m_mt); } + + float gaussian(float mean, float stddev) { std::normal_distribution d(mean, stddev); return d(m_mt); } +}; + +bool save_astc_file(const char* pFilename, block16_vec& blocks, uint32_t width, uint32_t height, uint32_t block_width, uint32_t block_height); +bool load_astc_file(const char* pFilename, block16_vec& blocks, uint32_t& width, uint32_t& height, uint32_t& block_width, uint32_t& block_height); + +class value_stats +{ +public: + value_stats() + { + clear(); + } + + void clear() + { + m_sum = 0; + m_sum2 = 0; + m_num = 0; + m_min = 1e+39; + m_max = -1e+39; + m_vals.clear(); + } + + void add(double val) + { + m_sum += val; + m_sum2 += val * val; + + m_num++; + + m_min = std::min(m_min, val); + m_max = std::max(m_max, val); + + m_vals.push_back(val); + } + + void add(int val) + { + add(static_cast(val)); + } + + void add(uint32_t val) + { + add(static_cast(val)); + } + + void add(int64_t val) + { + add(static_cast(val)); + } + + void add(uint64_t val) + { + add(static_cast(val)); + } + + void print(const char* pPrefix = "") + { + if (!m_vals.size()) + printf("%s: Empty\n", pPrefix); + else + printf("%s: Samples: %llu, Total: %f, Avg: %f, Std Dev: %f, Min: %f, Max: %f, Mean: %f\n", + pPrefix, (unsigned long long)get_num(), get_total(), get_average(), get_std_dev(), get_min(), get_max(), get_mean()); + } + + double get_total() const + { + return m_sum; + } + + double get_average() const + { + return m_num ? (m_sum / m_num) : 0.0f; + } + + double get_min() const + { + return m_min; + } + + double get_max() const + { + return m_max; + } + + uint64_t get_num() const + { + return m_num; + } + + double get_val(uint32_t index) const + { + return m_vals[index]; + } + + // Returns population standard deviation + double get_std_dev() const + { + if (!m_num) + return 0.0f; + + // TODO: FP precision + return sqrt((m_sum2 - ((m_sum * m_sum) / m_num)) / m_num); + } + + double get_mean() const + { + if (!m_num) + return 0.0f; + + std::vector sorted_vals(m_vals); + std::sort(sorted_vals.begin(), sorted_vals.end()); + + return sorted_vals[sorted_vals.size() / 2]; + } + +private: + double m_sum; + double m_sum2; + + uint64_t m_num; + + double m_min; + double m_max; + + mutable std::vector m_vals; +}; + +uint32_t get_deflate_size(const void* pData, size_t data_size); + +} // namespace utils + +#ifdef _MSC_VER +#pragma warning (pop) +#endif diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 80568fd..82c35f4 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -2524,29 +2524,29 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image, uberLevel = 0; maxPartitions = 0; bc7params.m_try_least_squares = false; - bc7params.m_mode_partition_estimation_filterbank = true; + bc7params.m_mode17_partition_estimation_filterbank = true; } else if (info.quality <= 40) { uberLevel = 0; maxPartitions = 16; bc7params.m_try_least_squares = false; - bc7params.m_mode_partition_estimation_filterbank = true; + bc7params.m_mode17_partition_estimation_filterbank = true; } else if (info.quality <= 90) { uberLevel = 1; maxPartitions = 64; bc7params.m_try_least_squares = true; // true = 0.7s on test case - bc7params.m_mode_partition_estimation_filterbank = true; + bc7params.m_mode17_partition_estimation_filterbank = true; } else { uberLevel = 4; maxPartitions = 64; bc7params.m_try_least_squares = true; - bc7params.m_mode_partition_estimation_filterbank = true; + bc7params.m_mode17_partition_estimation_filterbank = true; } bc7params.m_uber_level = std::min(uberLevel, (uint32_t)BC7ENC_MAX_UBER_LEVEL); - bc7params.m_max_partitions_mode = std::min(maxPartitions, (uint32_t)BC7ENC_MAX_PARTITIONS1); + bc7params.m_max_partitions = std::min(maxPartitions, (uint32_t)BC7ENC_MAX_PARTITIONS); } else if (info.pixelFormat == MyMTLPixelFormatBC1_RGBA || info.pixelFormat == MyMTLPixelFormatBC1_RGBA_sRGB ||