[chore] common-utils module (#2773)

* [chore] common-utils module Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>
keplergl · Nov 20, 2024 · 13b469d · 13b469d
1 parent 6fd4f88
commit 13b469d
Show file tree

Hide file tree

Showing 56 changed files with 551 additions and 376 deletions.
diff --git a/package.json b/package.json
@@ -18,6 +18,7 @@
   "workspaces": [
     "./src/types",
     "./src/constants",
+    "./src/common-utils",
     "./src/utils",
     "./src/styles",
     "./src/localization",

diff --git a/src/common-utils/babel.config.js b/src/common-utils/babel.config.js
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: MIT
+// Copyright contributors to the kepler.gl project
+
+const KeplerPackage = require('./package');
+
+const PRESETS = ['@babel/preset-env', '@babel/preset-react', '@babel/preset-typescript'];
+const PLUGINS = [
+  ['@babel/plugin-transform-typescript', {isTSX: true, allowDeclareFields: true}],
+  '@babel/plugin-transform-modules-commonjs',
+  '@babel/plugin-transform-class-properties',
+  '@babel/plugin-transform-optional-chaining',
+  '@babel/plugin-transform-logical-assignment-operators',
+  '@babel/plugin-transform-nullish-coalescing-operator',
+  '@babel/plugin-transform-export-namespace-from',
+  [
+    '@babel/transform-runtime',
+    {
+      regenerator: true
+    }
+  ],
+  [
+    'search-and-replace',
+    {
+      rules: [
+        {
+          search: '__PACKAGE_VERSION__',
+          replace: KeplerPackage.version
+        }
+      ]
+    }
+  ]
+];
+const ENV = {
+  test: {
+    plugins: ['istanbul']
+  },
+  debug: {
+    sourceMaps: 'inline',
+    retainLines: true
+  }
+};
+
+module.exports = function babel(api) {
+  api.cache(true);
+
+  return {
+    presets: PRESETS,
+    plugins: PLUGINS,
+    env: ENV
+  };
+};
diff --git a/src/common-utils/package.json b/src/common-utils/package.json
@@ -0,0 +1,55 @@
+{
+  "name": "@kepler.gl/common-utils",
+  "author": "Shan He <heshan0131@gmail.com>",
+  "version": "3.0.0",
+  "description": "kepler.gl common utils",
+  "license": "MIT",
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "keywords": [
+    "babel",
+    "es6",
+    "react",
+    "webgl",
+    "visualization",
+    "deck.gl"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/keplergl/kepler.gl.git"
+  },
+  "scripts": {
+    "build": "rm -fr dist && babel src --out-dir dist --source-maps inline --extensions '.ts,.tsx,.js,.jsx' --ignore '**/*.d.ts'",
+    "build:umd": "NODE_OPTIONS=--openssl-legacy-provider webpack --config ./webpack/umd.js --progress --env.prod",
+    "build:types": "tsc --project ./tsconfig.production.json",
+    "prepublish": "babel-node ../../scripts/license-header/bin --license ../../FILE-HEADER && yarn build && yarn build:types",
+    "stab": "mkdir -p dist && touch dist/index.js"
+  },
+  "files": [
+    "dist",
+    "umd"
+  ],
+  "dependencies": {
+    "@kepler.gl/constants": "3.0.0",
+    "@kepler.gl/types": "3.0.0",
+    "d3-array": "^2.8.0",
+    "global": "^4.3.0",
+    "type-analyzer": "0.4.0"
+  },
+  "nyc": {
+    "sourceMap": false,
+    "instrument": false
+  },
+  "maintainers": [
+    "Shan He <heshan0131@gmail.com>",
+    "Igor Dykhta <dikhta.igor@gmail.com>"
+  ],
+  "engines": {
+    "node": ">=18"
+  },
+  "volta": {
+    "node": "18.18.2",
+    "yarn": "4.4.0"
+  },
+  "packageManager": "yarn@4.4.0"
+}
diff --git a/src/common-utils/src/data-type.ts b/src/common-utils/src/data-type.ts
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: MIT
+// Copyright contributors to the kepler.gl project
+
+import {Analyzer, DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
+import {RowData, Field} from '@kepler.gl/types';
+import {ALL_FIELD_TYPES} from '@kepler.gl/constants';
+import {console as globalConsole} from 'global/window';
+import {range} from 'd3-array';
+import {isHexWkb, notNullorUndefined} from './data';
+
+export const ACCEPTED_ANALYZER_TYPES = [
+  AnalyzerDATA_TYPES.DATE,
+  AnalyzerDATA_TYPES.TIME,
+  AnalyzerDATA_TYPES.DATETIME,
+  AnalyzerDATA_TYPES.NUMBER,
+  AnalyzerDATA_TYPES.INT,
+  AnalyzerDATA_TYPES.FLOAT,
+  AnalyzerDATA_TYPES.BOOLEAN,
+  AnalyzerDATA_TYPES.STRING,
+  AnalyzerDATA_TYPES.GEOMETRY,
+  AnalyzerDATA_TYPES.GEOMETRY_FROM_STRING,
+  AnalyzerDATA_TYPES.PAIR_GEOMETRY_FROM_STRING,
+  AnalyzerDATA_TYPES.ZIPCODE,
+  AnalyzerDATA_TYPES.ARRAY,
+  AnalyzerDATA_TYPES.OBJECT
+];
+
+const IGNORE_DATA_TYPES = Object.keys(AnalyzerDATA_TYPES).filter(
+  type => !ACCEPTED_ANALYZER_TYPES.includes(type)
+);
+
+/**
+ * Getting sample data for analyzing field type.
+ */
+export function getSampleForTypeAnalyze({
+  fields,
+  rows,
+  sampleCount = 50
+}: {
+  fields: string[];
+  rows: unknown[][] | RowData;
+  sampleCount?: number;
+}): RowData {
+  const total = Math.min(sampleCount, rows.length);
+  // const fieldOrder = fields.map(f => f.name);
+  const sample = range(0, total, 1).map(() => ({}));
+
+  if (rows.length < 1) {
+    return [];
+  }
+  const isRowObject = !Array.isArray(rows[0]);
+
+  // collect sample data for each field
+  fields.forEach((field, fieldIdx) => {
+    // row counter
+    let i = 0;
+    // sample counter
+    let j = 0;
+
+    while (j < total) {
+      if (i >= rows.length) {
+        // if depleted data pool
+        sample[j][field] = null;
+        j++;
+      } else if (notNullorUndefined(rows[i][isRowObject ? field : fieldIdx])) {
+        const value = rows[i][isRowObject ? field : fieldIdx];
+        sample[j][field] = typeof value === 'string' ? value.trim() : value;
+        j++;
+        i++;
+      } else {
+        i++;
+      }
+    }
+  });
+
+  return sample;
+}
+
+/**
+ * Convert type-analyzer output to kepler.gl field types
+ *
+ * @param aType
+ * @returns corresponding type in `ALL_FIELD_TYPES`
+ */
+/* eslint-disable complexity */
+export function analyzerTypeToFieldType(aType: string): string {
+  const {
+    DATE,
+    TIME,
+    DATETIME,
+    NUMBER,
+    INT,
+    FLOAT,
+    BOOLEAN,
+    STRING,
+    GEOMETRY,
+    GEOMETRY_FROM_STRING,
+    PAIR_GEOMETRY_FROM_STRING,
+    ZIPCODE,
+    ARRAY,
+    OBJECT
+  } = AnalyzerDATA_TYPES;
+
+  // TODO: un recognized types
+  // CURRENCY PERCENT NONE
+  switch (aType) {
+    case DATE:
+      return ALL_FIELD_TYPES.date;
+    case TIME:
+    case DATETIME:
+      return ALL_FIELD_TYPES.timestamp;
+    case FLOAT:
+      return ALL_FIELD_TYPES.real;
+    case INT:
+      return ALL_FIELD_TYPES.integer;
+    case BOOLEAN:
+      return ALL_FIELD_TYPES.boolean;
+    case GEOMETRY:
+    case GEOMETRY_FROM_STRING:
+    case PAIR_GEOMETRY_FROM_STRING:
+      return ALL_FIELD_TYPES.geojson;
+    case ARRAY:
+      return ALL_FIELD_TYPES.array;
+    case OBJECT:
+      return ALL_FIELD_TYPES.object;
+    case NUMBER:
+    case STRING:
+    case ZIPCODE:
+      return ALL_FIELD_TYPES.string;
+    default:
+      globalConsole.warn(`Unsupported analyzer type: ${aType}`);
+      return ALL_FIELD_TYPES.string;
+  }
+}
+
+/**
+ * Analyze field types from data in `string` format, e.g. uploaded csv.
+ * Assign `type`, `fieldIdx` and `format` (timestamp only) to each field
+ *
+ * @param data array of row object
+ * @param fieldOrder array of field names as string
+ * @returns formatted fields
+ * @public
+ * @example
+ *
+ * import {getFieldsFromData} from 'kepler.gl/common-utils';
+ * const data = [{
+ *   time: '2016-09-17 00:09:55',
+ *   value: '4',
+ *   surge: '1.2',
+ *   isTrip: 'true',
+ *   zeroOnes: '0'
+ * }, {
+ *   time: '2016-09-17 00:30:08',
+ *   value: '3',
+ *   surge: null,
+ *   isTrip: 'false',
+ *   zeroOnes: '1'
+ * }, {
+ *   time: null,
+ *   value: '2',
+ *   surge: '1.3',
+ *   isTrip: null,
+ *   zeroOnes: '1'
+ * }];
+ *
+ * const fieldOrder = ['time', 'value', 'surge', 'isTrip', 'zeroOnes'];
+ * const fields = getFieldsFromData(data, fieldOrder);
+ * // fields = [
+ * // {name: 'time', format: 'YYYY-M-D H:m:s', fieldIdx: 1, type: 'timestamp'},
+ * // {name: 'value', format: '', fieldIdx: 4, type: 'integer'},
+ * // {name: 'surge', format: '', fieldIdx: 5, type: 'real'},
+ * // {name: 'isTrip', format: '', fieldIdx: 6, type: 'boolean'},
+ * // {name: 'zeroOnes', format: '', fieldIdx: 7, type: 'integer'}];
+ *
+ */
+export function getFieldsFromData(data: RowData, fieldOrder: string[]): Field[] {
+  // add a check for epoch timestamp
+  const metadata = Analyzer.computeColMeta(
+    data,
+    [
+      {regex: /.*geojson|all_points/g, dataType: 'GEOMETRY'},
+      {regex: /.*census/g, dataType: 'STRING'}
+    ],
+    {ignoredDataTypes: IGNORE_DATA_TYPES}
+  );
+
+  const {fieldByIndex} = renameDuplicateFields(fieldOrder);
+
+  const result = fieldOrder.map((field, index) => {
+    const name = fieldByIndex[index];
+
+    const fieldMeta = metadata.find(m => m.key === field);
+
+    // fieldMeta could be undefined if the field has no data and Analyzer.computeColMeta
+    // will ignore the field. In this case, we will simply assign the field type to STRING
+    // since dropping the column in the RowData could be expensive
+    let type = fieldMeta?.type || 'STRING';
+    const format = fieldMeta?.format || '';
+
+    // check if string is hex wkb
+    if (type === AnalyzerDATA_TYPES.STRING) {
+      type = data.some(d => isHexWkb(d[name])) ? AnalyzerDATA_TYPES.GEOMETRY : type;
+    }
+
+    return {
+      name,
+      id: name,
+      displayName: name,
+      format,
+      fieldIdx: index,
+      type: analyzerTypeToFieldType(type),
+      analyzerType: type,
+      valueAccessor: dc => d => {
+        return dc.valueAt(d.index, index);
+      }
+    };
+  });
+
+  return result;
+}
+
+/**
+ * pass in an array of field names, rename duplicated one
+ * and return a map from old field index to new name
+ *
+ * @param fieldOrder
+ * @returns new field name by index
+ */
+export function renameDuplicateFields(fieldOrder: string[]): {
+  allNames: string[];
+  fieldByIndex: string[];
+} {
+  return fieldOrder.reduce<{allNames: string[]; fieldByIndex: string[]}>(
+    (accu, field, i) => {
+      const {allNames} = accu;
+      let fieldName = field;
+
+      // add a counter to duplicated names
+      if (allNames.includes(field)) {
+        let counter = 0;
+        while (allNames.includes(`${field}-${counter}`)) {
+          counter++;
+        }
+        fieldName = `${field}-${counter}`;
+      }
+
+      accu.fieldByIndex[i] = fieldName;
+      accu.allNames.push(fieldName);
+
+      return accu;
+    },
+    {allNames: [], fieldByIndex: []}
+  );
+}