-
Notifications
You must be signed in to change notification settings - Fork 211
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(arrow): Experimental module for Apache Arrow attribute data extr…
…action (#2278)
- Loading branch information
Showing
22 changed files
with
827 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# @luma.gl/arrow | ||
|
||
This is Apache Arrow utilities for luma.gl. | ||
|
||
See [luma.gl](http://luma.gl) for documentation. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
{ | ||
"private": true, | ||
"name": "@luma.gl/arrow", | ||
"description": "luma.gl Apache Arrow bindings", | ||
"version": "9.2.0-alpha.0", | ||
"license": "MIT", | ||
"type": "module", | ||
"publishConfig": { | ||
"access": "public" | ||
}, | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/visgl/luma.gl" | ||
}, | ||
"keywords": [ | ||
"webgl", | ||
"visualization", | ||
"animation", | ||
"3d" | ||
], | ||
"types": "dist/index.d.ts", | ||
"main": "dist/index.cjs", | ||
"module": "dist/index.js", | ||
"exports": { | ||
".": { | ||
"import": "./dist/index.js", | ||
"require": "./dist/index.cjs", | ||
"types": "./dist/index.d.ts" | ||
} | ||
}, | ||
"files": [ | ||
"src", | ||
"dist", | ||
"dist.min.js", | ||
"README.md" | ||
], | ||
"sideEffects": false, | ||
"scripts": { | ||
"build-minified-bundle": "ocular-bundle ./bundle.ts --output=dist/dist.min.js", | ||
"build-dev-bundle": "ocular-bundle ./bundle.ts --output=dist/dist.dev.js --env=dev", | ||
"prepublishOnly": "npm run build-minified-bundle && npm run build-dev-bundle" | ||
}, | ||
"dependencies": { | ||
"@luma.gl/core": "9.2.0-alpha.0", | ||
"@math.gl/polygon": "^4.1.0", | ||
"apache-arrow": "^17.0.0" | ||
}, | ||
"gitHead": "c636c34b8f1581eed163e94543a8eb1f4382ba8e" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
// luma.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
import * as arrow from 'apache-arrow'; | ||
import {getArrowPaths} from './arrow-paths'; | ||
import {ArrowColumnInfo} from './arrow-types'; | ||
import {getArrowColumnInfo} from './arrow-column-info'; | ||
|
||
export function analyzeArrowTable(arrowTable: arrow.Table): Record<string, ArrowColumnInfo> { | ||
const paths = getArrowPaths(arrowTable); | ||
const columnInfos: Record<string, ArrowColumnInfo> = {}; | ||
|
||
for (const path of paths) { | ||
const columnInfo = getArrowColumnInfo(arrowTable, path); | ||
if (columnInfo) { | ||
columnInfos[path] = columnInfo; | ||
} | ||
} | ||
|
||
return columnInfos; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
// luma.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
import * as arrow from 'apache-arrow'; | ||
import { | ||
AttributeArrowType, | ||
NumericArrowType, | ||
ArrowColumnInfo, | ||
isNumericArrowType, | ||
isInstanceArrowType, | ||
// isVertexArrowType, | ||
getSignedShaderType | ||
} from './arrow-types'; | ||
import {getArrowVectorByPath} from './arrow-paths'; | ||
|
||
/** Extracts info from columns that can be used as GPU data sources */ | ||
export function getArrowColumnInfo(arrowTable: arrow.Table, path: string): ArrowColumnInfo | null { | ||
const vector = getArrowVectorByPath(arrowTable, path); | ||
if (isInstanceArrowType(vector.type)) { | ||
return getInstanceColumnInfo(vector); | ||
} | ||
// if (isVertexArrowType(vector.type)) { | ||
// return getVertexColumnInfo(vector); | ||
// } | ||
return null; | ||
} | ||
|
||
/** Extracts info from columns that can be used with GPU instanced attributes */ | ||
export function getInstanceColumnInfo(vector: arrow.Vector<AttributeArrowType>): ArrowColumnInfo { | ||
let components: 1 | 2 | 3 | 4 = 1; | ||
|
||
let dataVector = vector as arrow.Vector<NumericArrowType>; | ||
if (arrow.DataType.isFixedSizeList(vector.type)) { | ||
dataVector = vector.getChild(0)!; | ||
if (vector.type.listSize < 1 || vector.type.listSize > 4) { | ||
throw new Error('Attribute column fixed list size must be between 1 and 4'); | ||
} | ||
components = vector.type.listSize as 1 | 2 | 3 | 4; | ||
} | ||
|
||
if (!isNumericArrowType(dataVector.type)) { | ||
throw new Error('Attribute column must be numeric or fixed list of numeric'); | ||
} | ||
|
||
const signedDataType = getSignedShaderType(dataVector.type, components); | ||
|
||
const columnInfo: ArrowColumnInfo = { | ||
// data: dataVector.data, | ||
signedDataType, | ||
components, | ||
stepMode: 'instance', | ||
values: [], | ||
offsets: [] | ||
}; | ||
|
||
for (const data of dataVector.data) { | ||
columnInfo.values.push(data.values); | ||
} | ||
return columnInfo; | ||
} | ||
|
||
/** Extracts info from columns that can be used with GPU vertex attributes * | ||
export function getVertexColumnInfo(vector: arrow.Vector<MeshArrowType>): MeshData[] { | ||
if (!arrow.DataType.isList(vector.type)) { | ||
throw new Error('mesh data must be an Arrow list'); | ||
} | ||
for (const data of vector.data) { | ||
const offsets = data.valueOffsets; | ||
if (arrow.DataType.isFixedSizeList(vector.type)) { | ||
const dataVector = vector.getChild(0)!; | ||
const getArrowColumnInfo | ||
const dataVectorType = dataVector.type; | ||
if (isNumericArrowType(dataVectorType)) { | ||
return { | ||
data: dataVector.data, | ||
values: dataVector.data.values, | ||
size: vector.type.listSize, | ||
type: getAttributeShaderType(dataVectorType) | ||
}; | ||
} | ||
const size = dataVector; | ||
return vector.getChild(0)!.data; | ||
} | ||
return vector.data; | ||
} | ||
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
// luma.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
import * as arrow from 'apache-arrow'; | ||
|
||
export function getArrowPaths( | ||
arrowObject: arrow.Data | arrow.Table | arrow.RecordBatch | arrow.Vector | ||
): string[] { | ||
const data = getArrowDataArray(arrowObject)[0]; | ||
return getArrowPathsRecursive(data, []); | ||
} | ||
|
||
export function getArrowPathsRecursive(arrowData: arrow.Data, currentPath: string[]): string[] { | ||
if (!arrow.DataType.isStruct(arrowData.type)) { | ||
return [currentPath.join('.')]; | ||
} | ||
|
||
const fields = arrowData.type.children; | ||
const nestedPaths: any[] = []; | ||
for (let fieldIndex = 0; fieldIndex < fields.length; fieldIndex++) { | ||
const field = fields[fieldIndex]; | ||
const fieldData = arrowData.children[fieldIndex]; | ||
const fieldPath = [...currentPath, field.name]; | ||
const paths = getArrowPathsRecursive(fieldData, fieldPath); | ||
nestedPaths.push(...paths); | ||
} | ||
|
||
return nestedPaths; | ||
} | ||
|
||
export function getArrowDataByPath( | ||
arrowObject: arrow.Data | arrow.Table | arrow.RecordBatch | arrow.Vector, | ||
columnPath: string | ||
): arrow.Data { | ||
const data = getArrowDataArray(arrowObject)[0]; | ||
|
||
const path = decomposePath(columnPath); | ||
let nestedData = data; | ||
for (const key of path) { | ||
if (!arrow.DataType.isStruct(nestedData.type)) { | ||
throw new Error( | ||
`Arrow table nested column is a not a struct: '${key} in '${path.join('.')}'` | ||
); | ||
} | ||
const fields = nestedData.type.children; | ||
const indexByField = fields.findIndex(field => field.name === key); | ||
if (indexByField === -1) { | ||
throw new Error( | ||
`Arrow table schema does not contain nested column '${key} in '${path.join('.')}'` | ||
); | ||
} | ||
|
||
nestedData = nestedData.children[indexByField]; | ||
} | ||
|
||
// Check that we resolved all the intermediate structs | ||
if (arrow.DataType.isStruct(nestedData.type)) { | ||
throw new Error(`Arrow table nested column '${path.join('.')}' is a struct`); | ||
} | ||
|
||
return nestedData; | ||
} | ||
|
||
export function getArrowVectorByPath(arrowTable: arrow.Table, columnPath: string): arrow.Vector { | ||
// Make a temporary vector from the top level struct data. | ||
const vector = arrow.makeVector(arrowTable.data); | ||
|
||
const path = decomposePath(columnPath); | ||
let nestedVector = vector; | ||
for (const key of path) { | ||
if (!arrow.DataType.isStruct(nestedVector.type)) { | ||
throw new Error( | ||
`Arrow table nested column is a not a struct: '${key} in '${path.join('.')}'` | ||
); | ||
} | ||
const fields = nestedVector.type.children; | ||
const indexByField = fields.findIndex(field => field.name === key); | ||
if (indexByField === -1) { | ||
throw new Error( | ||
`Arrow table schema does not contain nested column '${key} in '${path.join('.')}'` | ||
); | ||
} | ||
|
||
nestedVector = nestedVector.getChildAt(indexByField)!; | ||
} | ||
|
||
// Check that we resolved all the intermediate structs | ||
if (arrow.DataType.isStruct(nestedVector.type)) { | ||
throw new Error(`Arrow table nested column '${path.join('.')}' is a struct`); | ||
} | ||
|
||
return nestedVector; | ||
} | ||
|
||
/** Get a data object from an arrow object */ | ||
export function getArrowDataArray( | ||
arrowObject: arrow.Data | arrow.Table | arrow.RecordBatch | arrow.Vector | ||
): arrow.Data[] { | ||
if (arrowObject instanceof arrow.Table) { | ||
return arrowObject.data; | ||
} else if (arrowObject instanceof arrow.RecordBatch) { | ||
return [arrowObject.data]; | ||
} else if (arrowObject instanceof arrow.Vector) { | ||
// @ts-expect-error for some reason read-only in this context | ||
return arrowObject.data; | ||
} | ||
return [arrowObject]; | ||
} | ||
|
||
// HELPER FUNCTIONS | ||
|
||
function decomposePath(path: string): string[] { | ||
return path.split('.'); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
// luma.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
import type {SignedDataType, BigTypedArray} from '@luma.gl/core'; | ||
import * as arrow from 'apache-arrow'; | ||
|
||
export type NumericArrowType = arrow.Int | arrow.Float; | ||
|
||
/** An instance attribute-compatible column - has 1-4 (fixed) numeric values per row */ | ||
export type AttributeArrowType = NumericArrowType | arrow.FixedSizeList<NumericArrowType>; | ||
|
||
/** A non-instance attribute compatible column - has a list of 1-4 (fixed) numeric values per row */ | ||
export type MeshArrowType = arrow.List<NumericArrowType | arrow.FixedSizeList<NumericArrowType>>; | ||
|
||
/** Extracted information required to populate a mesh */ | ||
export type ArrowColumnInfo = { | ||
stepMode: 'instance' | 'vertex'; | ||
signedDataType: SignedDataType; | ||
components: 1 | 2 | 3 | 4; | ||
values: BigTypedArray[]; | ||
offsets: Uint32Array[][]; | ||
}; | ||
|
||
export function isNumericArrowType(type: arrow.DataType): type is arrow.Int | arrow.Float { | ||
return arrow.DataType.isFloat(type) || arrow.DataType.isInt(type); | ||
} | ||
|
||
/** Instance = One "vec1-vec4 value" per step */ | ||
export function isInstanceArrowType(type: arrow.DataType): type is AttributeArrowType { | ||
return ( | ||
isNumericArrowType(type) || | ||
(arrow.DataType.isFixedSizeList(type) && isNumericArrowType(type.children[0].type)) | ||
// TODO - check listSize? | ||
); | ||
} | ||
|
||
/** Vertex = Multiple "vec1-vec4 values" per step */ | ||
export function isVertexArrowType(type: arrow.DataType): type is MeshArrowType { | ||
return arrow.DataType.isList(type) && isInstanceArrowType(type.children[0].type); | ||
} | ||
|
||
/** Get the luma.gl signed shader type corresponding to an Apache Arrow type */ | ||
export function getSignedShaderType( | ||
arrowType: NumericArrowType, | ||
size: 1 | 2 | 3 | 4 | ||
): SignedDataType { | ||
if (arrow.DataType.isInt(arrowType)) { | ||
switch (arrowType.bitWidth) { | ||
case 8: | ||
return arrowType.isSigned ? 'sint8' : 'uint8'; | ||
case 16: | ||
return arrowType.isSigned ? 'sint16' : 'uint16'; | ||
case 32: | ||
return arrowType.isSigned ? 'sint32' : 'uint32'; | ||
case 64: | ||
throw new Error('64-bit integers are not supported in shaders'); | ||
} | ||
} | ||
|
||
if (arrow.DataType.isFloat(arrowType)) { | ||
switch (arrowType.precision) { | ||
case arrow.Precision.HALF: | ||
return 'float16'; | ||
case arrow.Precision.SINGLE: | ||
return 'float32'; | ||
case arrow.Precision.DOUBLE: | ||
throw new Error('Double precision floats are not supported in shaders'); | ||
} | ||
} | ||
|
||
throw new Error(`Unsupported arrow type ${arrowType}`); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// luma.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
export type {NumericArrowType, ArrowColumnInfo} from './arrow/arrow-types'; | ||
export { | ||
isNumericArrowType | ||
// isInstanceArrowType, | ||
// isVertexArrowType, | ||
} from './arrow/arrow-types'; | ||
|
||
export {getArrowPaths, getArrowDataByPath, getArrowVectorByPath} from './arrow/arrow-paths'; | ||
|
||
export {getArrowColumnInfo} from './arrow/arrow-column-info'; | ||
|
||
export {analyzeArrowTable} from './arrow/analyze-arrow-table'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
// luma.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
import test from 'tape-promise/tape'; | ||
import {ARROW_TABLES} from '@luma.gl/arrow/test/data/arrow/make-arrow-tables'; | ||
import {analyzeArrowTable} from '@luma.gl/arrow'; | ||
|
||
test('getArrowDataByPath', async t => { | ||
const {simpleTable} = ARROW_TABLES; | ||
let tableColumns = analyzeArrowTable(simpleTable); | ||
t.ok(tableColumns, 'extracted info from simple table'); | ||
t.comment(JSON.stringify(tableColumns)); | ||
|
||
const {nestedTable} = ARROW_TABLES; | ||
tableColumns = analyzeArrowTable(nestedTable); | ||
t.ok(tableColumns, 'extracted info from nested table'); | ||
t.comment(JSON.stringify(tableColumns)); | ||
|
||
t.end(); | ||
}); |
Oops, something went wrong.