Skip to content

Commit

Permalink
Extend JSON property perfect hash to 256 bits (#1384)
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
  • Loading branch information
jviotti authored Dec 17, 2024
1 parent 1f68221 commit eac39ce
Show file tree
Hide file tree
Showing 2 changed files with 253 additions and 32 deletions.
48 changes: 18 additions & 30 deletions src/json/include/sourcemeta/jsontoolkit/json_hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@ template <typename T> struct FastHash {
using type = std::uint64_t;
type a{0};
type b{0};
type c{0};
type d{0};

inline auto operator==(const property_hash_type &other) const noexcept
-> bool {
return this->a == other.a && this->b == other.b;
return this->a == other.a && this->b == other.b && this->c == other.c &&
this->d == other.d;
}

inline auto is_perfect() const noexcept -> bool {
Expand All @@ -34,35 +37,20 @@ template <typename T> struct FastHash {
-> property_hash_type {
const auto size{value.size()};
property_hash_type result;
switch (size) {
case 0:
return result;
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
case 8:
case 9:
case 10:
case 11:
case 12:
case 13:
case 14:
case 15:
// Copy starting a byte 2
std::memcpy(reinterpret_cast<char *>(&result) + 1, value.data(), size);
return result;
default:
// This case is specifically designed to be constant with regards to
// string length, and to exploit the fact that most JSON objects don't
// have a lot of entries, so hash collision is not as common
return {(size + static_cast<property_hash_type::type>(value.front()) +
static_cast<property_hash_type::type>(value.back())) %
// Make sure the property hash can never exceed 8 bits
256};
if (size == 0) {
return result;
} else if (size <= 31) {
// Copy starting a byte 2
std::memcpy(reinterpret_cast<char *>(&result) + 1, value.data(), size);
return result;
} else {
// This case is specifically designed to be constant with regards to
// string length, and to exploit the fact that most JSON objects don't
// have a lot of entries, so hash collision is not as common
return {(size + static_cast<property_hash_type::type>(value.front()) +
static_cast<property_hash_type::type>(value.back())) %
// Make sure the property hash can never exceed 8 bits
256};
}
}
};
Expand Down
Loading

4 comments on commit eac39ce

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (macos/llvm)

Benchmark suite Current: eac39ce Previous: 1f68221 Ratio
JSON_Array_Of_Objects_Unique 328.99222777044577 ns/iter 369.571672058009 ns/iter 0.89
JSON_Parse_1 20928.56769042236 ns/iter 21786.254455293 ns/iter 0.96
JSON_Fast_Hash_Helm_Chart_Lock 47.02899211966917 ns/iter 53.410933458744914 ns/iter 0.88
JSON_Equality_Helm_Chart_Lock 130.7395012770654 ns/iter 127.46254246663268 ns/iter 1.03
Regex_Lower_S_Or_Upper_S_Asterisk 1.5951623180930332 ns/iter 1.719914313100771 ns/iter 0.93
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 1.5868845741974344 ns/iter 1.7112034438478128 ns/iter 0.93
Regex_Period_Asterisk 1.5754372550777291 ns/iter 1.7007634468417419 ns/iter 0.93
Regex_Group_Period_Asterisk_Group 1.5856035235753745 ns/iter 1.9227863884594125 ns/iter 0.82
Regex_Period_Plus 1.8933440811271889 ns/iter 2.169283169868119 ns/iter 0.87
Regex_Period 1.8948130565194174 ns/iter 2.056124115889789 ns/iter 0.92
Regex_Caret_Period_Plus_Dollar 1.9074948685762332 ns/iter 2.2837940645852157 ns/iter 0.84
Regex_Caret_Group_Period_Plus_Group_Dollar 1.8796610790515198 ns/iter 2.7351230869045504 ns/iter 0.69
Regex_Caret_Period_Asterisk_Dollar 1.5661841438868387 ns/iter 1.7644744974923465 ns/iter 0.89
Regex_Caret_Group_Period_Asterisk_Group_Dollar 1.5977113670960632 ns/iter 1.7146642003499248 ns/iter 0.93
Regex_Caret_X_Hyphen 6.277528477041983 ns/iter 6.561703164381995 ns/iter 0.96
Regex_Period_Md_Dollar 67.19666354170874 ns/iter 72.8895404335288 ns/iter 0.92
Regex_Caret_Slash_Period_Asterisk 4.788997347898685 ns/iter 6.217491615747055 ns/iter 0.77
Regex_Caret_Period_Range_Dollar 2.0466427032187338 ns/iter 2.181082564328603 ns/iter 0.94
Regex_Nested_Backtrack 697.3788181748639 ns/iter 733.8339915991335 ns/iter 0.95
Pointer_Object_Traverse 25.89021168987009 ns/iter 22.404704365634842 ns/iter 1.16
Pointer_Object_Try_Traverse 32.31510301070527 ns/iter 29.0930996642658 ns/iter 1.11

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (linux/llvm)

Benchmark suite Current: eac39ce Previous: 1f68221 Ratio
JSON_Array_Of_Objects_Unique 441.790382259756 ns/iter 428.76310683787995 ns/iter 1.03
JSON_Parse_1 30371.301773357216 ns/iter 29995.876027014034 ns/iter 1.01
JSON_Fast_Hash_Helm_Chart_Lock 67.81864445065021 ns/iter 67.47528515057881 ns/iter 1.01
JSON_Equality_Helm_Chart_Lock 147.08359167565405 ns/iter 141.57529153091772 ns/iter 1.04
Regex_Lower_S_Or_Upper_S_Asterisk 2.201597832407949 ns/iter 2.191014461256072 ns/iter 1.00
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 2.214365384922537 ns/iter 2.184394732791691 ns/iter 1.01
Regex_Period_Asterisk 2.196519004102141 ns/iter 2.1838603604621314 ns/iter 1.01
Regex_Group_Period_Asterisk_Group 2.205243983308975 ns/iter 2.203542276266959 ns/iter 1.00
Regex_Period_Plus 2.7975444611177394 ns/iter 2.805541871460306 ns/iter 1.00
Regex_Period 3.029946286378688 ns/iter 2.7990715767155567 ns/iter 1.08
Regex_Caret_Period_Plus_Dollar 2.4862561678448767 ns/iter 2.80055967958819 ns/iter 0.89
Regex_Caret_Group_Period_Plus_Group_Dollar 2.7960903374300425 ns/iter 2.799933034341434 ns/iter 1.00
Regex_Caret_Period_Asterisk_Dollar 2.289185095123084 ns/iter 3.421011132627143 ns/iter 0.67
Regex_Caret_Group_Period_Asterisk_Group_Dollar 2.4850054495518727 ns/iter 3.4305609383280653 ns/iter 0.72
Regex_Caret_X_Hyphen 12.66024028073257 ns/iter 12.679874635773682 ns/iter 1.00
Regex_Period_Md_Dollar 73.56274416058307 ns/iter 74.2932689433822 ns/iter 0.99
Regex_Caret_Slash_Period_Asterisk 5.906451083779363 ns/iter 5.709489272902125 ns/iter 1.03
Regex_Caret_Period_Range_Dollar 4.038702256103094 ns/iter 3.1089441140468095 ns/iter 1.30
Regex_Nested_Backtrack 497.86433831607724 ns/iter 496.4402826334702 ns/iter 1.00
Pointer_Object_Traverse 44.44493184743774 ns/iter 34.208199319040766 ns/iter 1.30
Pointer_Object_Try_Traverse 52.653254513148724 ns/iter 47.6839834439575 ns/iter 1.10

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (linux/gcc)

Benchmark suite Current: eac39ce Previous: 1f68221 Ratio
Pointer_Object_Traverse 44.120551688401605 ns/iter 40.61760355024703 ns/iter 1.09
Pointer_Object_Try_Traverse 22.412345313707863 ns/iter 22.39356542059181 ns/iter 1.00
Regex_Lower_S_Or_Upper_S_Asterisk 3.1217909627209632 ns/iter 3.1071710810634854 ns/iter 1.00
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 3.1203334745002893 ns/iter 3.1076433396810303 ns/iter 1.00
Regex_Period_Asterisk 3.1107853486305346 ns/iter 3.1117945952908066 ns/iter 1.00
Regex_Group_Period_Asterisk_Group 3.110425891641492 ns/iter 3.111290009667922 ns/iter 1.00
Regex_Period_Plus 3.122400290460424 ns/iter 3.1066867998136676 ns/iter 1.01
Regex_Period 3.131684909449099 ns/iter 3.107150025897235 ns/iter 1.01
Regex_Caret_Period_Plus_Dollar 3.1435460947350826 ns/iter 3.1731837143262593 ns/iter 0.99
Regex_Caret_Group_Period_Plus_Group_Dollar 3.121697029696118 ns/iter 3.1091394132982155 ns/iter 1.00
Regex_Caret_Period_Asterisk_Dollar 3.1283711395362346 ns/iter 4.040336712593923 ns/iter 0.77
Regex_Caret_Group_Period_Asterisk_Group_Dollar 3.12673197354188 ns/iter 4.0389183629613665 ns/iter 0.77
Regex_Caret_X_Hyphen 12.429402300235688 ns/iter 12.429364527130126 ns/iter 1.00
Regex_Period_Md_Dollar 90.00922239283624 ns/iter 90.56419721676627 ns/iter 0.99
Regex_Caret_Slash_Period_Asterisk 6.215103333205751 ns/iter 7.767057262382037 ns/iter 0.80
Regex_Caret_Period_Range_Dollar 3.168692926758205 ns/iter 4.038639988105274 ns/iter 0.78
Regex_Nested_Backtrack 832.6321773605719 ns/iter 869.2778660859577 ns/iter 0.96
JSON_Array_Of_Objects_Unique 369.6474256523657 ns/iter 392.3917071063564 ns/iter 0.94
JSON_Parse_1 32617.496251873745 ns/iter 32991.83689053278 ns/iter 0.99
JSON_Fast_Hash_Helm_Chart_Lock 65.42151877017203 ns/iter 64.31490853996605 ns/iter 1.02
JSON_Equality_Helm_Chart_Lock 139.4102882791241 ns/iter 150.5570903419436 ns/iter 0.93

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (windows/msvc)

Benchmark suite Current: eac39ce Previous: 1f68221 Ratio
JSON_Array_Of_Objects_Unique 488.0127542883278 ns/iter 516.842946428499 ns/iter 0.94
JSON_Parse_1 77601.62946429514 ns/iter 78388.80580358168 ns/iter 0.99
JSON_Fast_Hash_Helm_Chart_Lock 54.83663392855728 ns/iter 62.94433035714673 ns/iter 0.87
JSON_Equality_Helm_Chart_Lock 243.8131201101652 ns/iter 229.15771996007777 ns/iter 1.06
Regex_Lower_S_Or_Upper_S_Asterisk 6.084942857143005 ns/iter 5.936360714286033 ns/iter 1.03
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 5.697486607143024 ns/iter 5.359570535714754 ns/iter 1.06
Regex_Period_Asterisk 5.802817857142729 ns/iter 5.529531249999918 ns/iter 1.05
Regex_Group_Period_Asterisk_Group 5.747089285713563 ns/iter 5.327892857143825 ns/iter 1.08
Regex_Period_Plus 5.946541294642925 ns/iter 5.956353571428557 ns/iter 1.00
Regex_Period 6.077475892855741 ns/iter 5.913234821428707 ns/iter 1.03
Regex_Caret_Period_Plus_Dollar 6.33989375000153 ns/iter 5.604823214285253 ns/iter 1.13
Regex_Caret_Group_Period_Plus_Group_Dollar 6.4055279017846285 ns/iter 5.857733035714067 ns/iter 1.09
Regex_Caret_Period_Asterisk_Dollar 5.668133035714261 ns/iter 5.511880357142372 ns/iter 1.03
Regex_Caret_Group_Period_Asterisk_Group_Dollar 5.691183000001274 ns/iter 5.284080000000131 ns/iter 1.08
Regex_Caret_X_Hyphen 14.903039665612829 ns/iter 14.795495535714512 ns/iter 1.01
Regex_Period_Md_Dollar 150.57752232142514 ns/iter 151.10200892856085 ns/iter 1.00
Regex_Caret_Slash_Period_Asterisk 9.161607548384382 ns/iter 8.909771875000416 ns/iter 1.03
Regex_Caret_Period_Range_Dollar 6.1549580357141815 ns/iter 6.026391071428756 ns/iter 1.02
Regex_Nested_Backtrack 578.3993749998575 ns/iter 604.2289285714122 ns/iter 0.96
Pointer_Object_Traverse 39.546573660714 ns/iter 31.7705401785722 ns/iter 1.24
Pointer_Object_Try_Traverse 57.00504464286383 ns/iter 50.73775000000751 ns/iter 1.12

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.