Skip to content

Commit

Permalink
use xxhash-wasm
Browse files Browse the repository at this point in the history
  • Loading branch information
shannonwells committed May 11, 2021
1 parent 5547ad4 commit 22829dd
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 55 deletions.
1 change: 1 addition & 0 deletions .tool-versions
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
nodejs 15.12.0
15 changes: 8 additions & 7 deletions lib/bloom/sbbf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ class SplitBlockBloomFilter {
private numBlocks: number = 0
private numDistinctValues: number = SplitBlockBloomFilter.DEFAULT_DISTINCT_VALUES
private hashStrategy = new parquet_thrift.BloomFilterHash(parquet_thrift.XxHash)
private hasher = new XxHasher()

private isInitialized(): boolean { return this.splitBlockFilter.length > 0 }

Expand Down Expand Up @@ -351,12 +352,12 @@ class SplitBlockBloomFilter {
return this
}

hash(value: any): Long {
async hash(value: any): Promise<Long> {
if (!this.hashStrategy.hasOwnProperty("XXHASH")) {
throw new Error("unsupported hash strategy")
}

return Long.fromString(XxHasher.hash64(value), true, 16)
const hashed = await this.hasher.hash64(value)
return Long.fromString(hashed, true, 16)
}

private insertHash(hashValue: Long): void {
Expand All @@ -372,9 +373,9 @@ class SplitBlockBloomFilter {
* @param value: an unsigned Long, the value to add. If not a string, will be JSON.stringified
* @return void
*/
insert(value: any): void {
async insert(value: any): Promise<void> {
if (!this.isInitialized()) throw new Error("filter has not been initialized. call init() first")
this.insertHash(this.hash(value))
this.insertHash(await this.hash(value))
}

private checkHash(hashValue: Long): boolean {
Expand All @@ -390,9 +391,9 @@ class SplitBlockBloomFilter {
* @return true if hashed item is found in the data set represented by this filter
* @return false if it is __definitely not__ in the data set.
*/
check(value: any): boolean {
async check(value: any): Promise<boolean> {
if (!this.isInitialized()) throw new Error("filter has not been initialized")
return this.checkHash(this.hash(value))
return this.checkHash(await this.hash(value))
}
}

Expand Down
65 changes: 28 additions & 37 deletions lib/bloom/xxhasher.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
const xxhash = require("xxhash");
import xxhash from "xxhash-wasm";
import Long from "long"

const HASH_SEED = 0x0
type SupportedType = Buffer | Uint8Array | Long | string | number | bigint | boolean
type HasherFunc = (input: string, seedHigh?: number, seedLow?: number) => string

/**
* @class XxHasher
Expand All @@ -15,43 +14,35 @@ type SupportedType = Buffer | Uint8Array | Long | string | number | bigint | boo
* [xxHash spec](https://github.com/Cyan4973/xxHash/blob/v0.7.0/doc/xxhash_spec.md)
*/
class XxHasher {
private static hashWithToString(value: any): string {
return xxhash.hash64(Buffer.from(value.toString()), HASH_SEED, 'hex')
}

private static hash64Buffer(value: Buffer): string {
return xxhash.hash64(value, HASH_SEED, 'hex')
}

private static hash64Bytes(value: string | Uint8Array): string {
return xxhash.hash64(Buffer.from(value), HASH_SEED, 'hex')
}


/**
* @function hash64
* @description attempts to create a hash for certain data types.
* @return the 64 big XXHash as a string
* @param value one of n, throw an error.
*/
static hash64(value: SupportedType): string {
if (value instanceof Buffer) return this.hash64Buffer(value)

if (value instanceof Uint8Array) return this.hash64Bytes(value)

if (value instanceof Long) return this.hashWithToString(value)
hasher: HasherFunc | undefined

private async hashit(value: string): Promise<string> {
if (this.hasher === undefined) {
const {h64} = await xxhash()
this.hasher = h64
}
// @ts-ignore
return this.hasher(value)
}

switch (typeof value) {
case 'string':
return this.hash64Bytes(value)
case 'number': // FLOAT, DOUBLE, INT32?
case 'bigint':
case 'boolean':
return this.hashWithToString(value)
default:
/**
* @function hash64
* @description attempts to create a hash for certain data types.
* @return the 64 big XXHash as a string
* @param value one of n, throw an error.
*/
async hash64(value: any): Promise<string> {
if (typeof value === 'string') return this.hashit(value)
if (value instanceof Buffer ||
value instanceof Uint8Array ||
value instanceof Long ||
typeof value === 'boolean' ||
typeof value === 'number' ||
typeof value === 'bigint') {
return this.hashit(value.toString())
}
throw new Error("unsupported type: " + value)
}
}
}

export = XxHasher;
18 changes: 16 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"thrift": "^0.14.1",
"typescript": "^4.2.3",
"varint": "^5.0.0",
"xxhash": "^0.3.0"
"xxhash-wasm": "^0.4.1"
},
"devDependencies": {
"@babel/core": "7.13.10",
Expand All @@ -52,6 +52,7 @@
"test": "mocha -r ts-node/register 'test/**/*.{js,ts}'"
},
"engines": {
"node": ">=14.16.0"
"node": ">=15.12.0",
"npm": ">=7.6.0"
}
}
20 changes: 13 additions & 7 deletions test/sbbf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,10 @@ describe("Split Block Bloom Filters", () => {
]
const filter = new SplitBlockBloomFilter().setOptionNumDistinct(1000).init()
testCases.forEach(tc => {
it(`works for a ${tc.name} type`, () => {
filter.insert(tc.val)
expect(filter.check(tc.val))
it(`works for a ${tc.name} type`, async () => {
await filter.insert(tc.val)
const isPresent = filter.check(tc.val)
expect(isPresent)
})
})

Expand All @@ -263,10 +264,15 @@ describe("Split Block Bloom Filters", () => {
{name: "Map", val: new Map() }
]
throwCases.forEach((tc) => {
it(`throws on type ${tc.name}`, () => {
expect(() => {
filter.insert(tc.val)
}).to.throw(/unsupported type/)
it(`throws on type ${tc.name}`, async () => {
let gotError = false
try {
await filter.insert(tc.val)
} catch (e) {
gotError = true
expect(e.message).to.match(/unsupported type:/)
}
expect(gotError).to.eq(true)
})
})

Expand Down

0 comments on commit 22829dd

Please sign in to comment.