-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* [chore] common-utils module Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>
- Loading branch information
1 parent
6fd4f88
commit 13b469d
Showing
56 changed files
with
551 additions
and
376 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// SPDX-License-Identifier: MIT | ||
// Copyright contributors to the kepler.gl project | ||
|
||
const KeplerPackage = require('./package'); | ||
|
||
const PRESETS = ['@babel/preset-env', '@babel/preset-react', '@babel/preset-typescript']; | ||
const PLUGINS = [ | ||
['@babel/plugin-transform-typescript', {isTSX: true, allowDeclareFields: true}], | ||
'@babel/plugin-transform-modules-commonjs', | ||
'@babel/plugin-transform-class-properties', | ||
'@babel/plugin-transform-optional-chaining', | ||
'@babel/plugin-transform-logical-assignment-operators', | ||
'@babel/plugin-transform-nullish-coalescing-operator', | ||
'@babel/plugin-transform-export-namespace-from', | ||
[ | ||
'@babel/transform-runtime', | ||
{ | ||
regenerator: true | ||
} | ||
], | ||
[ | ||
'search-and-replace', | ||
{ | ||
rules: [ | ||
{ | ||
search: '__PACKAGE_VERSION__', | ||
replace: KeplerPackage.version | ||
} | ||
] | ||
} | ||
] | ||
]; | ||
const ENV = { | ||
test: { | ||
plugins: ['istanbul'] | ||
}, | ||
debug: { | ||
sourceMaps: 'inline', | ||
retainLines: true | ||
} | ||
}; | ||
|
||
module.exports = function babel(api) { | ||
api.cache(true); | ||
|
||
return { | ||
presets: PRESETS, | ||
plugins: PLUGINS, | ||
env: ENV | ||
}; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
{ | ||
"name": "@kepler.gl/common-utils", | ||
"author": "Shan He <heshan0131@gmail.com>", | ||
"version": "3.0.0", | ||
"description": "kepler.gl common utils", | ||
"license": "MIT", | ||
"main": "dist/index.js", | ||
"types": "dist/index.d.ts", | ||
"keywords": [ | ||
"babel", | ||
"es6", | ||
"react", | ||
"webgl", | ||
"visualization", | ||
"deck.gl" | ||
], | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/keplergl/kepler.gl.git" | ||
}, | ||
"scripts": { | ||
"build": "rm -fr dist && babel src --out-dir dist --source-maps inline --extensions '.ts,.tsx,.js,.jsx' --ignore '**/*.d.ts'", | ||
"build:umd": "NODE_OPTIONS=--openssl-legacy-provider webpack --config ./webpack/umd.js --progress --env.prod", | ||
"build:types": "tsc --project ./tsconfig.production.json", | ||
"prepublish": "babel-node ../../scripts/license-header/bin --license ../../FILE-HEADER && yarn build && yarn build:types", | ||
"stab": "mkdir -p dist && touch dist/index.js" | ||
}, | ||
"files": [ | ||
"dist", | ||
"umd" | ||
], | ||
"dependencies": { | ||
"@kepler.gl/constants": "3.0.0", | ||
"@kepler.gl/types": "3.0.0", | ||
"d3-array": "^2.8.0", | ||
"global": "^4.3.0", | ||
"type-analyzer": "0.4.0" | ||
}, | ||
"nyc": { | ||
"sourceMap": false, | ||
"instrument": false | ||
}, | ||
"maintainers": [ | ||
"Shan He <heshan0131@gmail.com>", | ||
"Igor Dykhta <dikhta.igor@gmail.com>" | ||
], | ||
"engines": { | ||
"node": ">=18" | ||
}, | ||
"volta": { | ||
"node": "18.18.2", | ||
"yarn": "4.4.0" | ||
}, | ||
"packageManager": "yarn@4.4.0" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,255 @@ | ||
// SPDX-License-Identifier: MIT | ||
// Copyright contributors to the kepler.gl project | ||
|
||
import {Analyzer, DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer'; | ||
import {RowData, Field} from '@kepler.gl/types'; | ||
import {ALL_FIELD_TYPES} from '@kepler.gl/constants'; | ||
import {console as globalConsole} from 'global/window'; | ||
import {range} from 'd3-array'; | ||
import {isHexWkb, notNullorUndefined} from './data'; | ||
|
||
export const ACCEPTED_ANALYZER_TYPES = [ | ||
AnalyzerDATA_TYPES.DATE, | ||
AnalyzerDATA_TYPES.TIME, | ||
AnalyzerDATA_TYPES.DATETIME, | ||
AnalyzerDATA_TYPES.NUMBER, | ||
AnalyzerDATA_TYPES.INT, | ||
AnalyzerDATA_TYPES.FLOAT, | ||
AnalyzerDATA_TYPES.BOOLEAN, | ||
AnalyzerDATA_TYPES.STRING, | ||
AnalyzerDATA_TYPES.GEOMETRY, | ||
AnalyzerDATA_TYPES.GEOMETRY_FROM_STRING, | ||
AnalyzerDATA_TYPES.PAIR_GEOMETRY_FROM_STRING, | ||
AnalyzerDATA_TYPES.ZIPCODE, | ||
AnalyzerDATA_TYPES.ARRAY, | ||
AnalyzerDATA_TYPES.OBJECT | ||
]; | ||
|
||
const IGNORE_DATA_TYPES = Object.keys(AnalyzerDATA_TYPES).filter( | ||
type => !ACCEPTED_ANALYZER_TYPES.includes(type) | ||
); | ||
|
||
/** | ||
* Getting sample data for analyzing field type. | ||
*/ | ||
export function getSampleForTypeAnalyze({ | ||
fields, | ||
rows, | ||
sampleCount = 50 | ||
}: { | ||
fields: string[]; | ||
rows: unknown[][] | RowData; | ||
sampleCount?: number; | ||
}): RowData { | ||
const total = Math.min(sampleCount, rows.length); | ||
// const fieldOrder = fields.map(f => f.name); | ||
const sample = range(0, total, 1).map(() => ({})); | ||
|
||
if (rows.length < 1) { | ||
return []; | ||
} | ||
const isRowObject = !Array.isArray(rows[0]); | ||
|
||
// collect sample data for each field | ||
fields.forEach((field, fieldIdx) => { | ||
// row counter | ||
let i = 0; | ||
// sample counter | ||
let j = 0; | ||
|
||
while (j < total) { | ||
if (i >= rows.length) { | ||
// if depleted data pool | ||
sample[j][field] = null; | ||
j++; | ||
} else if (notNullorUndefined(rows[i][isRowObject ? field : fieldIdx])) { | ||
const value = rows[i][isRowObject ? field : fieldIdx]; | ||
sample[j][field] = typeof value === 'string' ? value.trim() : value; | ||
j++; | ||
i++; | ||
} else { | ||
i++; | ||
} | ||
} | ||
}); | ||
|
||
return sample; | ||
} | ||
|
||
/** | ||
* Convert type-analyzer output to kepler.gl field types | ||
* | ||
* @param aType | ||
* @returns corresponding type in `ALL_FIELD_TYPES` | ||
*/ | ||
/* eslint-disable complexity */ | ||
export function analyzerTypeToFieldType(aType: string): string { | ||
const { | ||
DATE, | ||
TIME, | ||
DATETIME, | ||
NUMBER, | ||
INT, | ||
FLOAT, | ||
BOOLEAN, | ||
STRING, | ||
GEOMETRY, | ||
GEOMETRY_FROM_STRING, | ||
PAIR_GEOMETRY_FROM_STRING, | ||
ZIPCODE, | ||
ARRAY, | ||
OBJECT | ||
} = AnalyzerDATA_TYPES; | ||
|
||
// TODO: un recognized types | ||
// CURRENCY PERCENT NONE | ||
switch (aType) { | ||
case DATE: | ||
return ALL_FIELD_TYPES.date; | ||
case TIME: | ||
case DATETIME: | ||
return ALL_FIELD_TYPES.timestamp; | ||
case FLOAT: | ||
return ALL_FIELD_TYPES.real; | ||
case INT: | ||
return ALL_FIELD_TYPES.integer; | ||
case BOOLEAN: | ||
return ALL_FIELD_TYPES.boolean; | ||
case GEOMETRY: | ||
case GEOMETRY_FROM_STRING: | ||
case PAIR_GEOMETRY_FROM_STRING: | ||
return ALL_FIELD_TYPES.geojson; | ||
case ARRAY: | ||
return ALL_FIELD_TYPES.array; | ||
case OBJECT: | ||
return ALL_FIELD_TYPES.object; | ||
case NUMBER: | ||
case STRING: | ||
case ZIPCODE: | ||
return ALL_FIELD_TYPES.string; | ||
default: | ||
globalConsole.warn(`Unsupported analyzer type: ${aType}`); | ||
return ALL_FIELD_TYPES.string; | ||
} | ||
} | ||
|
||
/** | ||
* Analyze field types from data in `string` format, e.g. uploaded csv. | ||
* Assign `type`, `fieldIdx` and `format` (timestamp only) to each field | ||
* | ||
* @param data array of row object | ||
* @param fieldOrder array of field names as string | ||
* @returns formatted fields | ||
* @public | ||
* @example | ||
* | ||
* import {getFieldsFromData} from 'kepler.gl/common-utils'; | ||
* const data = [{ | ||
* time: '2016-09-17 00:09:55', | ||
* value: '4', | ||
* surge: '1.2', | ||
* isTrip: 'true', | ||
* zeroOnes: '0' | ||
* }, { | ||
* time: '2016-09-17 00:30:08', | ||
* value: '3', | ||
* surge: null, | ||
* isTrip: 'false', | ||
* zeroOnes: '1' | ||
* }, { | ||
* time: null, | ||
* value: '2', | ||
* surge: '1.3', | ||
* isTrip: null, | ||
* zeroOnes: '1' | ||
* }]; | ||
* | ||
* const fieldOrder = ['time', 'value', 'surge', 'isTrip', 'zeroOnes']; | ||
* const fields = getFieldsFromData(data, fieldOrder); | ||
* // fields = [ | ||
* // {name: 'time', format: 'YYYY-M-D H:m:s', fieldIdx: 1, type: 'timestamp'}, | ||
* // {name: 'value', format: '', fieldIdx: 4, type: 'integer'}, | ||
* // {name: 'surge', format: '', fieldIdx: 5, type: 'real'}, | ||
* // {name: 'isTrip', format: '', fieldIdx: 6, type: 'boolean'}, | ||
* // {name: 'zeroOnes', format: '', fieldIdx: 7, type: 'integer'}]; | ||
* | ||
*/ | ||
export function getFieldsFromData(data: RowData, fieldOrder: string[]): Field[] { | ||
// add a check for epoch timestamp | ||
const metadata = Analyzer.computeColMeta( | ||
data, | ||
[ | ||
{regex: /.*geojson|all_points/g, dataType: 'GEOMETRY'}, | ||
{regex: /.*census/g, dataType: 'STRING'} | ||
], | ||
{ignoredDataTypes: IGNORE_DATA_TYPES} | ||
); | ||
|
||
const {fieldByIndex} = renameDuplicateFields(fieldOrder); | ||
|
||
const result = fieldOrder.map((field, index) => { | ||
const name = fieldByIndex[index]; | ||
|
||
const fieldMeta = metadata.find(m => m.key === field); | ||
|
||
// fieldMeta could be undefined if the field has no data and Analyzer.computeColMeta | ||
// will ignore the field. In this case, we will simply assign the field type to STRING | ||
// since dropping the column in the RowData could be expensive | ||
let type = fieldMeta?.type || 'STRING'; | ||
const format = fieldMeta?.format || ''; | ||
|
||
// check if string is hex wkb | ||
if (type === AnalyzerDATA_TYPES.STRING) { | ||
type = data.some(d => isHexWkb(d[name])) ? AnalyzerDATA_TYPES.GEOMETRY : type; | ||
} | ||
|
||
return { | ||
name, | ||
id: name, | ||
displayName: name, | ||
format, | ||
fieldIdx: index, | ||
type: analyzerTypeToFieldType(type), | ||
analyzerType: type, | ||
valueAccessor: dc => d => { | ||
return dc.valueAt(d.index, index); | ||
} | ||
}; | ||
}); | ||
|
||
return result; | ||
} | ||
|
||
/** | ||
* pass in an array of field names, rename duplicated one | ||
* and return a map from old field index to new name | ||
* | ||
* @param fieldOrder | ||
* @returns new field name by index | ||
*/ | ||
export function renameDuplicateFields(fieldOrder: string[]): { | ||
allNames: string[]; | ||
fieldByIndex: string[]; | ||
} { | ||
return fieldOrder.reduce<{allNames: string[]; fieldByIndex: string[]}>( | ||
(accu, field, i) => { | ||
const {allNames} = accu; | ||
let fieldName = field; | ||
|
||
// add a counter to duplicated names | ||
if (allNames.includes(field)) { | ||
let counter = 0; | ||
while (allNames.includes(`${field}-${counter}`)) { | ||
counter++; | ||
} | ||
fieldName = `${field}-${counter}`; | ||
} | ||
|
||
accu.fieldByIndex[i] = fieldName; | ||
accu.allNames.push(fieldName); | ||
|
||
return accu; | ||
}, | ||
{allNames: [], fieldByIndex: []} | ||
); | ||
} |
Oops, something went wrong.