diff --git a/lib/bufferReader.js b/lib/bufferReader.ts similarity index 70% rename from lib/bufferReader.js rename to lib/bufferReader.ts index bc2ace23..e1600813 100644 --- a/lib/bufferReader.js +++ b/lib/bufferReader.ts @@ -1,5 +1,29 @@ -module.exports = class BufferReader { - constructor(envelopeReader, options) { +interface BufferReaderOptions { + maxSpan?: number, + maxLength?: number, + queueWait?: number +} + +interface BufferReaderQueueRow { + offset: number, + length: number, + resolve: (buf: Buffer) => void + reject: unknown +} + +interface EnvelopeReader { + readFn: (start: number, finish: number) => Promise +} + +export default class BufferReader { + maxSpan: number + maxLength: number + queueWait: number + scheduled?: boolean + queue: Array + envelopeReader: EnvelopeReader + + constructor(envelopeReader: EnvelopeReader, options: BufferReaderOptions) { options = options || {}; this.envelopeReader = envelopeReader; this.maxSpan = options.maxSpan || 100000; // 100k @@ -9,7 +33,7 @@ module.exports = class BufferReader { this.queue = []; } - async read(offset, length) { + async read(offset: number, length: number) { if (!this.scheduled) { this.scheduled = true; setTimeout( () => { @@ -29,7 +53,7 @@ module.exports = class BufferReader { this.queue = []; queue.sort( (a,b) => a.offset - b.offset); - var subqueue = []; + var subqueue: Array = []; const readSubqueue = async () => { if (!subqueue.length) { @@ -61,6 +85,6 @@ module.exports = class BufferReader { subqueue = [d]; } }); - readSubqueue(subqueue); + readSubqueue(); } }; diff --git a/lib/codec/index.js b/lib/codec/index.js deleted file mode 100644 index a5435d1e..00000000 --- a/lib/codec/index.js +++ /dev/null @@ -1,4 +0,0 @@ -module.exports.PLAIN = require('./plain'); -module.exports.RLE = require('./rle'); -module.exports.PLAIN_DICTIONARY = require('./plain_dictionary'); - diff --git a/lib/codec/index.ts b/lib/codec/index.ts new file mode 100644 index 00000000..af182ab1 --- /dev/null +++ b/lib/codec/index.ts @@ -0,0 +1,5 @@ +export * as PLAIN from './plain' +export * as RLE from './rle' +export * as PLAIN_DICTIONARY from './plain_dictionary' + + diff --git a/lib/codec/plain.js b/lib/codec/plain.js deleted file mode 100644 index 57afe069..00000000 --- a/lib/codec/plain.js +++ /dev/null @@ -1,275 +0,0 @@ -'use strict'; -const INT53 = require('int53'); - -function encodeValues_BOOLEAN(values) { - let buf = Buffer.alloc(Math.ceil(values.length / 8)); - buf.fill(0); - - for (let i = 0; i < values.length; ++i) { - if (values[i]) { - buf[Math.floor(i / 8)] |= (1 << (i % 8)); - } - } - - return buf; -} - -function decodeValues_BOOLEAN(cursor, count) { - let values = []; - - for (let i = 0; i < count; ++i) { - let b = cursor.buffer[cursor.offset + Math.floor(i / 8)]; - values.push((b & (1 << (i % 8))) > 0); - } - - cursor.offset += Math.ceil(count / 8); - return values; -} - -function encodeValues_INT32(values) { - let buf = Buffer.alloc(4 * values.length); - for (let i = 0; i < values.length; i++) { - buf.writeInt32LE(values[i], i * 4) - } - - return buf; -} - -function decodeValues_INT32(cursor, count) { - let values = []; - - for (let i = 0; i < count; ++i) { - values.push(cursor.buffer.readInt32LE(cursor.offset)); - cursor.offset += 4; - } - - return values; -} - -function encodeValues_INT64(values) { - let buf = Buffer.alloc(8 * values.length); - for (let i = 0; i < values.length; i++) { - buf.writeBigInt64LE(BigInt(values[i]), i*8); - } - - return buf; -} - -function decodeValues_INT64(cursor, count) { - let values = []; - - for (let i = 0; i < count; ++i) { - values.push(cursor.buffer.readBigInt64LE(cursor.offset)); - cursor.offset += 8; - } - - return values; -} - -function encodeValues_INT96(values) { - let buf = Buffer.alloc(12 * values.length); - - for (let i = 0; i < values.length; i++) { - if (values[i] >= 0) { - INT53.writeInt64LE(values[i], buf, i * 12); - buf.writeUInt32LE(0, i * 12 + 8); // truncate to 64 actual precision - } else { - INT53.writeInt64LE((~-values[i]) + 1, buf, i * 12); - buf.writeUInt32LE(0xffffffff, i * 12 + 8); // truncate to 64 actual precision - } - } - - return buf; -} - -function decodeValues_INT96(cursor, count) { - let values = []; - - for (let i = 0; i < count; ++i) { - const low = INT53.readInt64LE(cursor.buffer, cursor.offset); - const high = cursor.buffer.readUInt32LE(cursor.offset + 8); - - if (high === 0xffffffff) { - values.push((~-low) + 1); // truncate to 64 actual precision - } else { - values.push(low); // truncate to 64 actual precision - } - - cursor.offset += 12; - } - - return values; -} - -function encodeValues_FLOAT(values) { - let buf = Buffer.alloc(4 * values.length); - for (let i = 0; i < values.length; i++) { - buf.writeFloatLE(values[i], i * 4) - } - - return buf; -} - -function decodeValues_FLOAT(cursor, count) { - let values = []; - - for (let i = 0; i < count; ++i) { - values.push(cursor.buffer.readFloatLE(cursor.offset)); - cursor.offset += 4; - } - - return values; -} - -function encodeValues_DOUBLE(values) { - let buf = Buffer.alloc(8 * values.length); - for (let i = 0; i < values.length; i++) { - buf.writeDoubleLE(values[i], i * 8) - } - - return buf; -} - -function decodeValues_DOUBLE(cursor, count) { - let values = []; - - for (let i = 0; i < count; ++i) { - values.push(cursor.buffer.readDoubleLE(cursor.offset)); - cursor.offset += 8; - } - - return values; -} - -function encodeValues_BYTE_ARRAY(values) { - let buf_len = 0; - for (let i = 0; i < values.length; i++) { - values[i] = Buffer.from(values[i]); - buf_len += 4 + values[i].length; - } - - let buf = Buffer.alloc(buf_len); - let buf_pos = 0; - for (let i = 0; i < values.length; i++) { - buf.writeUInt32LE(values[i].length, buf_pos) - values[i].copy(buf, buf_pos + 4); - buf_pos += 4 + values[i].length; - - } - - return buf; -} - -function decodeValues_BYTE_ARRAY(cursor, count) { - let values = []; - - for (let i = 0; i < count; ++i) { - let len = cursor.buffer.readUInt32LE(cursor.offset); - cursor.offset += 4; - values.push(cursor.buffer.slice(cursor.offset, cursor.offset + len)); - cursor.offset += len; - } - - return values; -} - - - -function encodeValues_FIXED_LEN_BYTE_ARRAY(values, opts) { - if (!opts.typeLength) { - throw "missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)"; - } - - let buf_len = 0; - for (let i = 0; i < values.length; i++) { - values[i] = Buffer.from(values[i]); - - if (values[i].length !== opts.typeLength) { - throw "invalid value for FIXED_LEN_BYTE_ARRAY: " + values[i]; - } - } - - return Buffer.concat(values); -} - -function decodeValues_FIXED_LEN_BYTE_ARRAY(cursor, count, opts) { - let values = []; - - if (!opts.typeLength) { - throw "missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)"; - } - - for (let i = 0; i < count; ++i) { - values.push(cursor.buffer.slice(cursor.offset, cursor.offset + opts.typeLength)); - cursor.offset += opts.typeLength; - } - - return values; -} - -exports.encodeValues = function(type, values, opts) { - switch (type) { - - case 'BOOLEAN': - return encodeValues_BOOLEAN(values); - - case 'INT32': - return encodeValues_INT32(values); - - case 'INT64': - return encodeValues_INT64(values); - - case 'INT96': - return encodeValues_INT96(values); - - case 'FLOAT': - return encodeValues_FLOAT(values); - - case 'DOUBLE': - return encodeValues_DOUBLE(values); - - case 'BYTE_ARRAY': - return encodeValues_BYTE_ARRAY(values); - - case 'FIXED_LEN_BYTE_ARRAY': - return encodeValues_FIXED_LEN_BYTE_ARRAY(values, opts); - - default: - throw 'unsupported type: ' + type; - - } -} - -exports.decodeValues = function(type, cursor, count, opts) { - switch (type) { - - case 'BOOLEAN': - return decodeValues_BOOLEAN(cursor, count); - - case 'INT32': - return decodeValues_INT32(cursor, count); - - case 'INT64': - return decodeValues_INT64(cursor, count); - - case 'INT96': - return decodeValues_INT96(cursor, count); - - case 'FLOAT': - return decodeValues_FLOAT(cursor, count); - - case 'DOUBLE': - return decodeValues_DOUBLE(cursor, count); - - case 'BYTE_ARRAY': - return decodeValues_BYTE_ARRAY(cursor, count); - - case 'FIXED_LEN_BYTE_ARRAY': - return decodeValues_FIXED_LEN_BYTE_ARRAY(cursor, count, opts); - - default: - throw 'unsupported type: ' + type; - - } -} - diff --git a/lib/codec/plain.ts b/lib/codec/plain.ts new file mode 100644 index 00000000..0fa0b645 --- /dev/null +++ b/lib/codec/plain.ts @@ -0,0 +1,292 @@ +import INT53 from "int53"; +import { Cursor, Options } from "./types"; + +function encodeValues_BOOLEAN(values: Array) { + let buf = Buffer.alloc(Math.ceil(values.length / 8)); + buf.fill(0); + + for (let i = 0; i < values.length; ++i) { + if (values[i]) { + buf[Math.floor(i / 8)] |= 1 << i % 8; + } + } + + return buf; +} + +function decodeValues_BOOLEAN(cursor: Cursor, count: number) { + let values = []; + + for (let i = 0; i < count; ++i) { + let b = cursor.buffer[cursor.offset + Math.floor(i / 8)]; + values.push((b & (1 << i % 8)) > 0); + } + + cursor.offset += Math.ceil(count / 8); + return values; +} + +function encodeValues_INT32(values: Array) { + let buf = Buffer.alloc(4 * values.length); + for (let i = 0; i < values.length; i++) { + buf.writeInt32LE(values[i], i * 4); + } + + return buf; +} + +function decodeValues_INT32(cursor: Cursor, count: number) { + let values = []; + + for (let i = 0; i < count; ++i) { + values.push(cursor.buffer.readInt32LE(cursor.offset)); + cursor.offset += 4; + } + + return values; +} + +function encodeValues_INT64(values: Array) { + let buf = Buffer.alloc(8 * values.length); + for (let i = 0; i < values.length; i++) { + buf.writeBigInt64LE(BigInt(values[i]), i * 8); + } + + return buf; +} + +function decodeValues_INT64(cursor: Cursor, count: number) { + let values = []; + + for (let i = 0; i < count; ++i) { + values.push(cursor.buffer.readBigInt64LE(cursor.offset)); + cursor.offset += 8; + } + + return values; +} + +function encodeValues_INT96(values: Array) { + let buf = Buffer.alloc(12 * values.length); + + for (let i = 0; i < values.length; i++) { + if (values[i] >= 0) { + INT53.writeInt64LE(values[i], buf, i * 12); + buf.writeUInt32LE(0, i * 12 + 8); // truncate to 64 actual precision + } else { + INT53.writeInt64LE(~-values[i] + 1, buf, i * 12); + buf.writeUInt32LE(0xffffffff, i * 12 + 8); // truncate to 64 actual precision + } + } + + return buf; +} + +function decodeValues_INT96(cursor: Cursor, count: number) { + let values = []; + + for (let i = 0; i < count; ++i) { + const low = INT53.readInt64LE(cursor.buffer, cursor.offset); + const high = cursor.buffer.readUInt32LE(cursor.offset + 8); + + if (high === 0xffffffff) { + values.push(~-low + 1); // truncate to 64 actual precision + } else { + values.push(low); // truncate to 64 actual precision + } + + cursor.offset += 12; + } + + return values; +} + +function encodeValues_FLOAT(values: Array) { + let buf = Buffer.alloc(4 * values.length); + for (let i = 0; i < values.length; i++) { + buf.writeFloatLE(values[i], i * 4); + } + + return buf; +} + +function decodeValues_FLOAT(cursor: Cursor, count: number) { + let values = []; + + for (let i = 0; i < count; ++i) { + values.push(cursor.buffer.readFloatLE(cursor.offset)); + cursor.offset += 4; + } + + return values; +} + +function encodeValues_DOUBLE(values: Array) { + let buf = Buffer.alloc(8 * values.length); + for (let i = 0; i < values.length; i++) { + buf.writeDoubleLE(values[i], i * 8); + } + + return buf; +} + +function decodeValues_DOUBLE(cursor: Cursor, count: number) { + let values = []; + + for (let i = 0; i < count; ++i) { + values.push(cursor.buffer.readDoubleLE(cursor.offset)); + cursor.offset += 8; + } + + return values; +} + +// Waylands reminder to check again +function encodeValues_BYTE_ARRAY(values: Array) { + let buf_len = 0; + const returnedValues: Array = []; + for (let i = 0; i < values.length; i++) { + returnedValues[i] = Buffer.from(values[i]); + buf_len += 4 + returnedValues[i].length; + } + + let buf = Buffer.alloc(buf_len); + let buf_pos = 0; + for (let i = 0; i < returnedValues.length; i++) { + buf.writeUInt32LE(returnedValues[i].length, buf_pos); + returnedValues[i].copy(buf, buf_pos + 4); + buf_pos += 4 + returnedValues[i].length; + } + + return buf; +} + +function decodeValues_BYTE_ARRAY(cursor: Cursor, count: number) { + let values = []; + + for (let i = 0; i < count; ++i) { + let len = cursor.buffer.readUInt32LE(cursor.offset); + cursor.offset += 4; + values.push(cursor.buffer.slice(cursor.offset, cursor.offset + len)); + cursor.offset += len; + } + + return values; +} + +function encodeValues_FIXED_LEN_BYTE_ARRAY( + values: Array, + opts: Options +) { + if (!opts.typeLength) { + throw "missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)"; + } + + const returnedValues: Array = []; + for (let i = 0; i < values.length; i++) { + returnedValues[i] = Buffer.from(values[i]); + + if (returnedValues[i].length !== opts.typeLength) { + throw "invalid value for FIXED_LEN_BYTE_ARRAY: " + returnedValues[i]; + } + } + + return Buffer.concat(returnedValues); +} + +function decodeValues_FIXED_LEN_BYTE_ARRAY( + cursor: Cursor, + count: number, + opts: Options +) { + let values = []; + + if (!opts.typeLength) { + throw "missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)"; + } + + for (let i = 0; i < count; ++i) { + values.push( + cursor.buffer.slice(cursor.offset, cursor.offset + opts.typeLength) + ); + cursor.offset += opts.typeLength; + } + + return values; +} + +type ValidValueTypes = "BOOLEAN" | "INT32" | "INT64" | "INT96" | "FLOAT" | "DOUBLE" | "BYTE_ARRAY" | "FIXED_LEN_BYTE_ARRAY" + +export const encodeValues = function ( + type: ValidValueTypes | string, + values: Array, + opts: Options +) { + switch (type) { + case "BOOLEAN": + return encodeValues_BOOLEAN(values as Array); + + case "INT32": + return encodeValues_INT32(values as Array); + + case "INT64": + return encodeValues_INT64(values as Array); + + case "INT96": + return encodeValues_INT96(values as Array); + + case "FLOAT": + return encodeValues_FLOAT(values as Array); + + case "DOUBLE": + return encodeValues_DOUBLE(values as Array); + + case "BYTE_ARRAY": + return encodeValues_BYTE_ARRAY(values as Array); + + case "FIXED_LEN_BYTE_ARRAY": + return encodeValues_FIXED_LEN_BYTE_ARRAY( + values as Array, + opts + ); + + default: + throw "unsupported type: " + type; + } +}; + +export const decodeValues = function ( + type: ValidValueTypes | string, + cursor: Cursor, + count: number, + opts: Options +) { + switch (type) { + case "BOOLEAN": + return decodeValues_BOOLEAN(cursor, count); + + case "INT32": + return decodeValues_INT32(cursor, count); + + case "INT64": + return decodeValues_INT64(cursor, count); + + case "INT96": + return decodeValues_INT96(cursor, count); + + case "FLOAT": + return decodeValues_FLOAT(cursor, count); + + case "DOUBLE": + return decodeValues_DOUBLE(cursor, count); + + case "BYTE_ARRAY": + return decodeValues_BYTE_ARRAY(cursor, count); + + case "FIXED_LEN_BYTE_ARRAY": + return decodeValues_FIXED_LEN_BYTE_ARRAY(cursor, count, opts); + + default: + throw "unsupported type: " + type; + } +}; diff --git a/lib/codec/plain_dictionary.js b/lib/codec/plain_dictionary.ts similarity index 54% rename from lib/codec/plain_dictionary.js rename to lib/codec/plain_dictionary.ts index ddd091b7..3e5c3ea0 100644 --- a/lib/codec/plain_dictionary.js +++ b/lib/codec/plain_dictionary.ts @@ -1,6 +1,7 @@ -const rle = require('./rle'); +import * as rle from './rle' +import { Cursor, Options } from './types' -exports.decodeValues = function(type, cursor, count, opts) { +export const decodeValues = function(type: string, cursor: Cursor, count: number, opts: Options) { opts.bitWidth = cursor.buffer.slice(cursor.offset, cursor.offset+1).readInt8(0); cursor.offset += 1; return rle.decodeValues(type, cursor, count, Object.assign({}, opts, {disableEnvelope: true})); diff --git a/lib/codec/rle.js b/lib/codec/rle.ts similarity index 81% rename from lib/codec/rle.js rename to lib/codec/rle.ts index e7a22865..cea6cdeb 100644 --- a/lib/codec/rle.js +++ b/lib/codec/rle.ts @@ -1,6 +1,7 @@ -const varint = require('varint') +import varint from 'varint' +import {Cursor, Options} from './types' -function encodeRunBitpacked(values, opts) { +function encodeRunBitpacked(values: Array, opts: Options) { for (let i = 0; i < values.length % 8; i++) { values.push(0); } @@ -18,7 +19,7 @@ function encodeRunBitpacked(values, opts) { ]); } -function encodeRunRepeated(value, count, opts) { +function encodeRunRepeated(value: number, count: number, opts: Options) { let buf = Buffer.alloc(Math.ceil(opts.bitWidth / 8)); for (let i = 0; i < buf.length; ++i) { @@ -32,7 +33,15 @@ function encodeRunRepeated(value, count, opts) { ]); } -exports.encodeValues = function(type, values, opts) { +function unknownToParsedInt(value: string | number) { + if (typeof value === 'string') { + return parseInt(value, 10) + } else { + return value + } +} + +export const encodeValues = function(type: string, values: Array, opts: Options) { if (!('bitWidth' in opts)) { throw 'bitWidth is required'; } @@ -42,7 +51,7 @@ exports.encodeValues = function(type, values, opts) { case 'BOOLEAN': case 'INT32': case 'INT64': - values = values.map((x) => parseInt(x, 10)); + values = values.map((x) => unknownToParsedInt(x)); break; default: @@ -92,7 +101,7 @@ exports.encodeValues = function(type, values, opts) { return envelope; }; -function decodeRunBitpacked(cursor, count, opts) { +function decodeRunBitpacked(cursor : Cursor, count: number, opts: Options) { if (count % 8 !== 0) { throw 'must be a multiple of 8'; } @@ -108,7 +117,7 @@ function decodeRunBitpacked(cursor, count, opts) { return values; } -function decodeRunRepeated(cursor, count, opts) { +function decodeRunRepeated(cursor: Cursor, count: number, opts: Options) { let value = 0; for (let i = 0; i < Math.ceil(opts.bitWidth / 8); ++i) { value << 8; @@ -119,7 +128,7 @@ function decodeRunRepeated(cursor, count, opts) { return new Array(count).fill(value); } -exports.decodeValues = function(type, cursor, count, opts) { +export const decodeValues = function(_: string, cursor: Cursor, count: number, opts: Options) { if (!('bitWidth' in opts)) { throw 'bitWidth is required'; } @@ -151,4 +160,4 @@ exports.decodeValues = function(type, cursor, count, opts) { } return values; -}; \ No newline at end of file +}; diff --git a/lib/codec/types.ts b/lib/codec/types.ts new file mode 100644 index 00000000..9a98c3e1 --- /dev/null +++ b/lib/codec/types.ts @@ -0,0 +1,10 @@ +export interface Options { + typeLength: number, + bitWidth: number, + disableEnvelope: boolean +} + +export interface Cursor { + buffer: Buffer, + offset: number, +} \ No newline at end of file diff --git a/lib/compression.js b/lib/compression.ts similarity index 51% rename from lib/compression.js rename to lib/compression.ts index 007f6c78..dce8ec79 100644 --- a/lib/compression.js +++ b/lib/compression.ts @@ -1,11 +1,20 @@ -'use strict'; -const zlib = require('zlib'); -const snappy = require('snappyjs'); +import zlib from 'zlib' +import snappy from 'snappyjs' import { compress as brotliCompress, decompress as brotliDecompress } from 'wasm-brotli' +type d_identity = (value: ArrayBuffer | Buffer | Uint8Array ) => ArrayBuffer | Buffer | Uint8Array +type d_gzip = (value: ArrayBuffer | Buffer | string ) => Buffer +type d_snappy = (value: ArrayBuffer | Buffer | Uint8Array ) => ArrayBuffer | Buffer | Uint8Array +type d_brotli = (value: Uint8Array ) => Promise +interface PARQUET_COMPRESSION_METHODS { + [key:string]: { + deflate: Function + inflate: Function + } +} // LZO compression is disabled. See: https://github.com/LibertyDSNP/parquetjs/issues/18 -const PARQUET_COMPRESSION_METHODS = { +export const PARQUET_COMPRESSION_METHODS: PARQUET_COMPRESSION_METHODS = { 'UNCOMPRESSED': { deflate: deflate_identity, inflate: inflate_identity @@ -27,7 +36,7 @@ const PARQUET_COMPRESSION_METHODS = { /** * Deflate a value using compression method `method` */ -async function deflate(method, value) { +export async function deflate(method: string, value: unknown) { if (!(method in PARQUET_COMPRESSION_METHODS)) { throw 'invalid compression method: ' + method; } @@ -35,31 +44,33 @@ async function deflate(method, value) { return PARQUET_COMPRESSION_METHODS[method].deflate(value); } -function deflate_identity(value) { +function deflate_identity(value: ArrayBuffer | Buffer | Uint8Array) { return value; } -function deflate_gzip(value) { +function deflate_gzip(value: ArrayBuffer | Buffer | string) { return zlib.gzipSync(value); } -function deflate_snappy(value) { +function deflate_snappy(value: ArrayBuffer | Buffer | Uint8Array) { return snappy.compress(value); } -async function deflate_brotli(value) { - const compressedContent = await brotliCompress(value, { +async function deflate_brotli(value: Uint8Array) { + const compressedContent = await brotliCompress(value/*, { mode: 0, quality: 8, lgwin: 22 - }) + } + */) + return Buffer.from(compressedContent); } /** * Inflate a value using compression method `method` */ -async function inflate(method, value) { +export async function inflate(method: string, value: unknown) { if (!(method in PARQUET_COMPRESSION_METHODS)) { throw 'invalid compression method: ' + method; } @@ -67,22 +78,21 @@ async function inflate(method, value) { return await PARQUET_COMPRESSION_METHODS[method].inflate(value); } -function inflate_identity(value) { +function inflate_identity(value: ArrayBuffer | Buffer | Uint8Array) { return value; } -function inflate_gzip(value) { +function inflate_gzip(value: Buffer | ArrayBuffer | string) { return zlib.gunzipSync(value); } -function inflate_snappy(value) { +function inflate_snappy(value: ArrayBuffer | Buffer | Uint8Array) { return snappy.uncompress(value); } -async function inflate_brotli(value) { +async function inflate_brotli(value: Uint8Array) { const uncompressedContent = await brotliDecompress(value) return Buffer.from(uncompressedContent); } -module.exports = { PARQUET_COMPRESSION_METHODS, deflate, inflate }; diff --git a/lib/custom.d.ts b/lib/custom.d.ts new file mode 100644 index 00000000..9ee74add --- /dev/null +++ b/lib/custom.d.ts @@ -0,0 +1,11 @@ + +declare module 'int53' { + export const writeInt64LE: (value: number, buf: Buffer, num: number) => void + export const readInt64LE: (buf: Buffer, offset: number) => number +} + +declare module 'snappyjs' { + export const compress: (value: ArrayBuffer | Buffer | Uint8Array) => ArrayBuffer | Buffer | Uint8Array + export const uncompress: (value: ArrayBuffer | Buffer | Uint8Array) => ArrayBuffer | Buffer | Uint8Array +} + diff --git a/lib/types.js b/lib/types.ts similarity index 58% rename from lib/types.js rename to lib/types.ts index 6657e01a..bd6e9db1 100644 --- a/lib/types.js +++ b/lib/types.ts @@ -1,7 +1,22 @@ 'use strict'; -const BSON = require('bson'); +import * as BSON from "bson" +interface PARQUET_LOGICAL_TYPES { + [key:string]: { + primitiveType: string, + toPrimitive: Function, + fromPrimitive?: Function, + originalType?: string, + typeLength?: number + } +} -const PARQUET_LOGICAL_TYPES = { +interface INTERVAL { + months: number, + days: number, + milliseconds: number +} + +export const PARQUET_LOGICAL_TYPES: PARQUET_LOGICAL_TYPES = { 'BOOLEAN': { primitiveType: 'BOOLEAN', toPrimitive: toPrimitive_BOOLEAN, @@ -140,7 +155,7 @@ const PARQUET_LOGICAL_TYPES = { * Convert a value from it's native representation to the internal/underlying * primitive type */ -function toPrimitive(type, value) { +export function toPrimitive(type: string, value: unknown) { if (!(type in PARQUET_LOGICAL_TYPES)) { throw 'invalid type: ' + type; } @@ -152,228 +167,199 @@ function toPrimitive(type, value) { * Convert a value from it's internal/underlying primitive representation to * the native representation */ -function fromPrimitive(type, value) { +export function fromPrimitive(type: string, value: unknown) { if (!(type in PARQUET_LOGICAL_TYPES)) { throw 'invalid type: ' + type; } - - if ("fromPrimitive" in PARQUET_LOGICAL_TYPES[type]) { - return PARQUET_LOGICAL_TYPES[type].fromPrimitive(value); + + const typeFromPrimitive = PARQUET_LOGICAL_TYPES[type].fromPrimitive + if (typeFromPrimitive !== undefined) { + return typeFromPrimitive(value) } else { return value; } } -function toPrimitive_BOOLEAN(value) { +function toPrimitive_BOOLEAN(value: boolean) { return !!value; } -function fromPrimitive_BOOLEAN(value) { +function fromPrimitive_BOOLEAN(value: boolean) { return !!value; } -function toPrimitive_FLOAT(value) { - const v = parseFloat(value); - if (isNaN(v)) { - throw 'invalid value for FLOAT: ' + value; +function toPrimitive_FLOAT(value: number | string) { + if (typeof value === 'string') { + const v = parseFloat(value); + return v; + } else if (typeof value === 'number') { + return value; } - - return v; + throw 'invalid value for FLOAT: ' + value; } -function toPrimitive_DOUBLE(value) { - const v = parseFloat(value); - if (isNaN(v)) { - throw 'invalid value for DOUBLE: ' + value; +function toPrimitive_DOUBLE(value: number | string) { + if (typeof value === 'string') { + const v = parseFloat(value); + return v; + } else if (typeof value === 'number') { + return value; } - - return v; + throw 'invalid value for DOUBLE: ' + value; } -function toPrimitive_INT8(value) { +function toPrimitive_INT8(value: number | bigint | string) { try { - const v = parseInt(value, 10); - const bigV = BigInt(value); - checkValidValue(-0x80, 0x7f, v, bigV); - if (typeof value === 'bigint' || typeof value === 'string') { - return bigV; - } + let v = value; + if (typeof v === 'string') v = BigInt(value); + checkValidValue(-0x80, 0x7f, v); return v; } catch { throw 'invalid value for INT8: ' + value; } - } -function toPrimitive_UINT8(value) { +function toPrimitive_UINT8(value: number | bigint | string) { try { - const v = parseInt(value, 10); - const bigV = BigInt(value); - checkValidValue(0, 0xff, v, bigV); - if (typeof value === 'bigint' || typeof value === 'string') { - return bigV; - } - + let v = value; + if (typeof v === 'string') v = BigInt(value); + checkValidValue(0, 0xff, v); + return v; } catch { throw 'invalid value for UINT8: ' + value; } - } -function toPrimitive_INT16(value) { +function toPrimitive_INT16(value: number | bigint | string) { try { - const v = parseInt(value, 10); - const bigV = BigInt(value); - checkValidValue(-0x8000, 0x7fff, v, bigV); - if (typeof value === 'bigint' || typeof value === 'string') { - return bigV; - } - + let v = value; + if (typeof v === 'string') v = BigInt(value); + checkValidValue(-0x8000, 0x7fff, v); + return v; } catch { throw 'invalid value for INT16: ' + value; } - } -function toPrimitive_UINT16(value) { +function toPrimitive_UINT16(value: number | bigint | string) { try { - const v = parseInt(value, 10); - const bigV = BigInt(value); - checkValidValue(0, 0xffff, v, bigV); - if (typeof value === 'bigint' || typeof value === 'string') { - return bigV; - } - + let v = value; + if (typeof v === 'string') v = BigInt(value); + checkValidValue(0, 0xffff, v); + return v; } catch { throw 'invalid value for UINT16: ' + value; } - } -function toPrimitive_INT32(value) { +function toPrimitive_INT32(value: number | bigint | string) { try { - const v = parseInt(value, 10); - const bigV = BigInt(value); - checkValidValue(-0x80000000, 0x7fffffff, v, bigV); - if (typeof value === 'bigint' || typeof value === 'string') { - return bigV; - } - + let v = value; + if (typeof v === 'string') v = BigInt(value); + checkValidValue(-0x80000000, 0x7fffffff, v); + return v; } catch { throw 'invalid value for INT32: ' + value; } - } -function toPrimitive_UINT32(value) { + +function toPrimitive_UINT32(value: number | bigint | string) { try { - const v = parseInt(value, 10); - const bigV = BigInt(value); - checkValidValue(0, 0xffffffffffff, v, bigV); - if (typeof value === 'bigint' || typeof value === 'string') { - return bigV; - } - + let v = value; + if (typeof v === 'string') v = BigInt(value); + checkValidValue(0, 0xffffffffffff, v); + return v; } catch { throw 'invalid value for UINT32: ' + value; } - } -function toPrimitive_INT64(value) { +function toPrimitive_INT64(value: number | bigint | string) { try { - const v = parseInt(value, 10); - const bigV = BigInt(value); - checkValidValue(-0x8000000000000000, 0x7fffffffffffffff, v, bigV); - if (typeof value === 'bigint' || typeof value === 'string') { - return bigV; - } - + let v = value; + if (typeof v === 'string') v = BigInt(value); + checkValidValue(-0x8000000000000000, 0x7fffffffffffffff, v); + return v; } catch { throw 'invalid value for INT64: ' + value; } - } -function toPrimitive_UINT64(value) { +function toPrimitive_UINT64(value: number | bigint | string) { try { - const v = parseInt(value, 10); - const bigV = BigInt(value); - checkValidValue(0, 0xffffffffffffffff, v, bigV); - if (typeof value === 'bigint' || typeof value === 'string') { - return bigV; - } - + let v = value; + if (typeof v === 'string') v = BigInt(value); + checkValidValue(0, 0xffffffffffffffff, v); + return v; } catch { throw 'invalid value for UINT64: ' + value; } - } -function toPrimitive_INT96(value) { +function toPrimitive_INT96(value: number | bigint | string) { try { - const v = parseInt(value, 10); - const bigV = BigInt(value); - checkValidValue(-0x800000000000000000000000, 0x7fffffffffffffffffffffff, v, bigV); - if (typeof value === 'bigint' || typeof value === 'string') { - return bigV; - } - + let v = value; + if (typeof v === 'string') v = BigInt(value); + checkValidValue(-0x800000000000000000000000, 0x7fffffffffffffffffffffff, v); + return v; } catch { - throw 'invalid value for INT96: ' + value; + throw 'invalid value for INT96: ' + value; } - } -function toPrimitive_BYTE_ARRAY(value) { +function toPrimitive_BYTE_ARRAY(value: Array) { return Buffer.from(value); } -function toPrimitive_UTF8(value) { +function toPrimitive_UTF8(value: string) { return Buffer.from(value, 'utf8'); } -function fromPrimitive_UTF8(value) { +function fromPrimitive_UTF8(value: string) { return (value !== undefined && value !== null) ? value.toString() : value; } -function toPrimitive_JSON(value) { +function toPrimitive_JSON(value: object) { return Buffer.from(JSON.stringify(value)); } -function fromPrimitive_JSON(value) { +function fromPrimitive_JSON(value: string) { return JSON.parse(value); } -function toPrimitive_BSON(value) { +function toPrimitive_BSON(value: BSON.Document) { return Buffer.from(BSON.serialize(value)); } -function fromPrimitive_BSON(value) { +function fromPrimitive_BSON(value: Buffer) { return BSON.deserialize(value); } -function toPrimitive_TIME_MILLIS(value) { - const v = parseInt(value, 10); - if (v < 0 || v > 0xffffffffffffffff || isNaN(v)) { +function toPrimitive_TIME_MILLIS(value: string | number) { + let v = value + if (typeof value === `string`) { + v = parseInt(value, 10); + } + if (v < 0 || v > 0xffffffffffffffff || typeof v !== 'number') { throw 'invalid value for TIME_MILLIS: ' + value; } return v; } -function toPrimitive_TIME_MICROS(value) { +function toPrimitive_TIME_MICROS(value: string | number | bigint) { const v = BigInt(value); - if (v < 0n || isNaN(v)) { + if (v < 0n ) { throw 'invalid value for TIME_MICROS: ' + value; } @@ -382,50 +368,57 @@ function toPrimitive_TIME_MICROS(value) { const kMillisPerDay = 86400000; -function toPrimitive_DATE(value) { +function toPrimitive_DATE(value: string | Date | number) { /* convert from date */ if (value instanceof Date) { return value.getTime() / kMillisPerDay; } - /* convert from integer */ - { - const v = parseInt(value, 10); - if (v < 0 || isNaN(v)) { - throw 'invalid value for DATE: ' + value; - } +/* convert from integer */ + let v = value + if (typeof value === 'string') { + v = parseInt(value, 10); + } - return v; + if (v < 0 || typeof v !== 'number') { + throw 'invalid value for DATE: ' + value; } + + return v; + } -function fromPrimitive_DATE(value) { +function fromPrimitive_DATE(value: number ) { return new Date(+value * kMillisPerDay); } -function toPrimitive_TIMESTAMP_MILLIS(value) { +function toPrimitive_TIMESTAMP_MILLIS(value: string | Date | number) { /* convert from date */ if (value instanceof Date) { return value.getTime(); } /* convert from integer */ - { - const v = parseInt(value, 10); - if (v < 0 || isNaN(v)) { - throw 'invalid value for TIMESTAMP_MILLIS: ' + value; - } - return v; + let v = value + if (typeof value === 'string' ) { + v = parseInt(value, 10); + } + + if (v < 0 || typeof v !== 'number') { + throw 'invalid value for TIMESTAMP_MILLIS: ' + value; } + + return v; + } -function fromPrimitive_TIMESTAMP_MILLIS(value) { +function fromPrimitive_TIMESTAMP_MILLIS(value: number | string | bigint) { return new Date(Number(value)); } -function toPrimitive_TIMESTAMP_MICROS(value) { +function toPrimitive_TIMESTAMP_MICROS(value: Date | string | number | bigint) { /* convert from date */ if (value instanceof Date) { return BigInt(value.getTime()) * 1000n; @@ -442,11 +435,11 @@ function toPrimitive_TIMESTAMP_MICROS(value) { } } -function fromPrimitive_TIMESTAMP_MICROS(value) { - return new Date(parseInt(value / 1000n)); -} +function fromPrimitive_TIMESTAMP_MICROS(value: number | bigint) { + return typeof value === 'bigint' ? new Date(Number(value / 1000n)): new Date(value / 1000); + } -function toPrimitive_INTERVAL(value) { +function toPrimitive_INTERVAL(value: INTERVAL) { if (!value.months || !value.days || !value.milliseconds) { throw "value for INTERVAL must be object { months: ..., days: ..., milliseconds: ... }"; } @@ -458,7 +451,7 @@ function toPrimitive_INTERVAL(value) { return buf; } -function fromPrimitive_INTERVAL(value) { +function fromPrimitive_INTERVAL(value: string) { const buf = Buffer.from(value); const months = buf.readUInt32LE(0); const days = buf.readUInt32LE(4); @@ -467,12 +460,9 @@ function fromPrimitive_INTERVAL(value) { return { months: months, days: days, milliseconds: millis }; } -function checkValidValue(lowerRange, upperRange, v, bigV) { - if (bigV < lowerRange || bigV > upperRange || isNaN(v)) { +function checkValidValue(lowerRange: number, upperRange: number, v: number | bigint) { + if (v < lowerRange || v > upperRange) { throw "invalid value" } } - -module.exports = { PARQUET_LOGICAL_TYPES, toPrimitive, fromPrimitive }; - diff --git a/package-lock.json b/package-lock.json index a2358a3b..2d1b769f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "0.0.0", "license": "MIT", "dependencies": { + "@types/varint": "^6.0.0", "browserify-zlib": "^0.2.0", "bson": "4.4.0", "cross-fetch": "^3.1.4", @@ -1778,8 +1779,7 @@ "node_modules/@types/node": { "version": "14.17.4", "resolved": "https://registry.npmjs.org/@types/node/-/node-14.17.4.tgz", - "integrity": "sha512-8kQ3+wKGRNN0ghtEn7EGps/B8CzuBz1nXZEIGGLP2GnwbqYn4dbTs7k+VKLTq1HvZLRCIDtN3Snx1Ege8B7L5A==", - "dev": true + "integrity": "sha512-8kQ3+wKGRNN0ghtEn7EGps/B8CzuBz1nXZEIGGLP2GnwbqYn4dbTs7k+VKLTq1HvZLRCIDtN3Snx1Ege8B7L5A==" }, "node_modules/@types/node-int64": { "version": "0.4.29", @@ -1834,6 +1834,14 @@ "@types/node": "*" } }, + "node_modules/@types/varint": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@types/varint/-/varint-6.0.0.tgz", + "integrity": "sha512-2jBazyxGl4644tvu3VAez8UA/AtrcEetT9HOeAbqZ/vAcRVL/ZDFQjSS7rkWusU5cyONQVUz+nwwrNZdMva4ow==", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@ungap/promise-all-settled": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@ungap/promise-all-settled/-/promise-all-settled-1.1.2.tgz", @@ -7351,8 +7359,7 @@ "@types/node": { "version": "14.17.4", "resolved": "https://registry.npmjs.org/@types/node/-/node-14.17.4.tgz", - "integrity": "sha512-8kQ3+wKGRNN0ghtEn7EGps/B8CzuBz1nXZEIGGLP2GnwbqYn4dbTs7k+VKLTq1HvZLRCIDtN3Snx1Ege8B7L5A==", - "dev": true + "integrity": "sha512-8kQ3+wKGRNN0ghtEn7EGps/B8CzuBz1nXZEIGGLP2GnwbqYn4dbTs7k+VKLTq1HvZLRCIDtN3Snx1Ege8B7L5A==" }, "@types/node-int64": { "version": "0.4.29", @@ -7407,6 +7414,14 @@ "@types/node": "*" } }, + "@types/varint": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@types/varint/-/varint-6.0.0.tgz", + "integrity": "sha512-2jBazyxGl4644tvu3VAez8UA/AtrcEetT9HOeAbqZ/vAcRVL/ZDFQjSS7rkWusU5cyONQVUz+nwwrNZdMva4ow==", + "requires": { + "@types/node": "*" + } + }, "@ungap/promise-all-settled": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@ungap/promise-all-settled/-/promise-all-settled-1.1.2.tgz", diff --git a/package.json b/package.json index 7059466d..301e2650 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "url": "git://github.com/LibertyDSNP/parquetjs.git" }, "dependencies": { + "@types/varint": "^6.0.0", "browserify-zlib": "^0.2.0", "bson": "4.4.0", "cross-fetch": "^3.1.4", diff --git a/test/codec_plain.test.js b/test/codec_plain.test.js index 722ab8e3..ffb06535 100644 --- a/test/codec_plain.test.js +++ b/test/codec_plain.test.js @@ -1,7 +1,7 @@ 'use strict'; const chai = require('chai'); const assert = chai.assert; -const parquet_codec_plain = require('../lib/codec/plain.js'); +const parquet_codec_plain = require('../lib/codec/plain'); const assert_util = require('./util/assert_util.js'); describe('ParquetCodec::PLAIN', function() { diff --git a/test/codec_rle.js b/test/codec_rle.js index a20583d7..8714bbf9 100644 --- a/test/codec_rle.js +++ b/test/codec_rle.js @@ -1,7 +1,7 @@ 'use strict'; const chai = require('chai'); const assert = chai.assert; -const parquet_codec_rle = require('../lib/codec/rle.js'); +const parquet_codec_rle = require('../lib/codec/rle'); describe('ParquetCodec::RLE', function() { diff --git a/test/types.js b/test/types.js index 6380e90b..11258857 100644 --- a/test/types.js +++ b/test/types.js @@ -1,10 +1,10 @@ 'use strict'; -const { toPrimitive, fromPrimitive } = require("../lib/types.js") +const { toPrimitive, fromPrimitive } = require("../lib/types") const chai = require('chai'); const assert = chai.assert; describe("toPrimitive INT* should give the correct values back", () => { - it('toPrimitive(INT_8, 127)', () => { + it('toPrimitive(INT_8, 127n)', () => { assert.equal(toPrimitive('INT_8',127n), 127n) }), it('toPrimitive(UINT_8, 255n)', () => { diff --git a/tsconfig.json b/tsconfig.json index fe428752..fd342ec8 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -16,8 +16,12 @@ "watch": false, "typeRoots": [ "node_modules/@types", - "gen-nodejs" + "gen-nodejs", ] }, - "include": ["parquet.js","lib/**/*", "gen-nodejs/*"] + "include": ["parquet.js","lib/**/*", "gen-nodejs/*"], + "files": ["lib/custom.d.ts"], + "ts-node": { + "files": true + } }