From b0a0c08fa5f912f4a2270ddd08712fde62e67be9 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 26 Feb 2018 19:04:30 -0800 Subject: [PATCH 1/3] use indicies.offset in DictionaryData constructor --- js/src/data.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/js/src/data.ts b/js/src/data.ts index cdd9f29a7a908..3bfb3209b696b 100644 --- a/js/src/data.ts +++ b/js/src/data.ts @@ -152,10 +152,9 @@ export class DictionaryData extends BaseData> public get indices() { return this._indices; } public get dictionary() { return this._dictionary; } constructor(type: Dictionary, dictionary: Vector, indices: Data>) { - super(type, indices.length, (indices as any)._nullCount); + super(type, indices.length, indices.offset, (indices as any)._nullCount); this._indices = indices; this._dictionary = dictionary; - this.length = this._indices.length; } public get nullCount() { return this._indices.nullCount; } public get nullBitmap() { return this._indices.nullBitmap; } From 2888657595dda4d52f1af9581d1d2d15e0150ca7 Mon Sep 17 00:00:00 2001 From: Brian Hulette Date: Wed, 28 Feb 2018 10:50:28 -0500 Subject: [PATCH 2/3] Add dictionary vector unit tests --- js/test/unit/vector-tests.ts | 61 ++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/js/test/unit/vector-tests.ts b/js/test/unit/vector-tests.ts index e2be229834f8e..3eb3fbe0195b3 100644 --- a/js/test/unit/vector-tests.ts +++ b/js/test/unit/vector-tests.ts @@ -17,14 +17,15 @@ import { TextEncoder } from 'text-encoding-utf-8'; import Arrow from '../Arrow'; -import { type, TypedArray, TypedArrayConstructor } from '../../src/Arrow'; +import { type, TypedArray, TypedArrayConstructor, Vector } from '../../src/Arrow'; +import { packBools } from '../../src/util/bit' const utf8Encoder = new TextEncoder('utf-8'); -const { BoolData, FlatData, FlatListData } = Arrow.data; -const { IntVector, FloatVector, BoolVector, Utf8Vector } = Arrow.vector; +const { BoolData, FlatData, FlatListData, DictionaryData } = Arrow.data; +const { IntVector, FloatVector, BoolVector, Utf8Vector, DictionaryVector } = Arrow.vector; const { - Utf8, Bool, + Dictionary, Utf8, Bool, Float16, Float32, Float64, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64, @@ -310,6 +311,54 @@ describe(`Utf8Vector`, () => { let offset = 0; const offsets = Uint32Array.of(0, ...values.map((d) => { offset += d.length; return offset; })); const vector = new Utf8Vector(new FlatListData(new Utf8(), n, null, offsets, utf8Encoder.encode(values.join('')))); + basicVectorTests(vector, values, ['abc', '123']); + describe(`sliced`, () => { + basicVectorTests(vector.slice(1,3), values.slice(1,3), ['foo', 'abc']); + }); +}); + +describe(`DictionaryVector`, () => { + const dictionary = ['foo', 'bar', 'baz']; + const extras = ['abc', '123']; // values to search for that should NOT be found + let offset = 0; + const offsets = Uint32Array.of(0, ...dictionary.map((d) => { offset += d.length; return offset; })); + const dictionary_vec = new Utf8Vector(new FlatListData(new Utf8(), dictionary.length, null, offsets, utf8Encoder.encode(dictionary.join('')))); + + const indices = Array.from({length: 50}, () => Math.random() * 3 | 0); + + describe(`index with nullCount == 0`, () => { + const indices_data = new FlatData(new Int32(), indices.length, new Uint8Array(0), indices); + + const values = Array.from(indices).map((d) => dictionary[d]); + const vector = new DictionaryVector(new DictionaryData(new Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec, indices_data)); + + basicVectorTests(vector, values, extras); + + describe(`sliced`, () => { + basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras); + }) + }); + + describe(`index with nullCount > 0`, () => { + const validity = Array.from({length: indices.length}, () => Math.random() > 0.2 ? true : false); + const indices_data = new FlatData(new Int32(), indices.length, packBools(validity), indices, 0, validity.reduce((acc, d) => acc + (d ? 0 : 1), 0)); + const values = Array.from(indices).map((d, i) => validity[i] ? dictionary[d] : null); + const vector = new DictionaryVector(new DictionaryData(new Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec, indices_data)); + + basicVectorTests(vector, values, ['abc', '123']); + describe(`sliced`, () => { + basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras); + }); + }); +}); + +// Creates some basic tests for the given vector. +// Verifies that: +// - `get` and the native iterator return the same data as `values` +// - `indexOf` returns the same indices as `values` +function basicVectorTests(vector: Vector, values: any[], extras: any[]) { + const n = values.length; + test(`gets expected values`, () => { let i = -1; while (++i < n) { @@ -325,14 +374,14 @@ describe(`Utf8Vector`, () => { } }); test(`indexOf returns expected values`, () => { - let testValues = values.concat(['abc', '12345']); + let testValues = values.concat(extras); for (const value of testValues) { const expected = values.indexOf(value); expect(vector.indexOf(value)).toEqual(expected); } }); -}); +} function toMap(entries: Record, keys: string[]) { return keys.reduce((map, key) => { From 3fb9a265fe9d613696b0bddb9e0bb81cc60594fe Mon Sep 17 00:00:00 2001 From: Brian Hulette Date: Wed, 28 Feb 2018 12:30:01 -0500 Subject: [PATCH 3/3] Fix bug in DictionaryVector with nullable indices --- js/src/vector.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/js/src/vector.ts b/js/src/vector.ts index f36c691e1bd27..6c2bbbb86a7d2 100644 --- a/js/src/vector.ts +++ b/js/src/vector.ts @@ -399,6 +399,9 @@ export class DictionaryVector extends Vector; constructor(data: Data>, view: View> = new DictionaryView(data.dictionary, new IntVector(data.indices))) { super(data as Data, view); + if (view instanceof ValidityView) { + view = (view as any).view; + } if (data instanceof DictionaryData && view instanceof DictionaryView) { this.indices = view.indices; this.dictionary = data.dictionary;