diff --git a/src/arrow-backend.ts b/src/arrow-backend.ts index ee6da2d..167586e 100644 --- a/src/arrow-backend.ts +++ b/src/arrow-backend.ts @@ -3,7 +3,7 @@ import { PassThrough } from 'stream'; import { CancellationToken } from 'vscode'; import { ParquetBackend } from './parquet-backend'; import { jsonSpace } from './settings'; -import { readParquet } from 'parquet-reader'; +import { Stream } from 'stream'; function bigIntToJson(value: bigint) { // serialize as a number if it's in bounds, otherwise as a string @@ -14,9 +14,23 @@ function bigIntToJson(value: bigint) { } export class ArrowBackend extends ParquetBackend { + readParquet_: ((path: string, stream: Stream) => void) | undefined; + + private async readParquet(path: string, stream: Stream) { + if (typeof (this.readParquet_) == 'undefined') { + try { + const module = await import("parquet-reader"); + this.readParquet_ = module.readParquet; + } catch (_) { + throw new Error('cannot find prebuilt arrow module, either build the module or use another backend'); + } + } + this.readParquet_(path, stream); + } + public async * toJsonImpl(parquetPath: string, _token?: CancellationToken): AsyncGenerator { const stream = new PassThrough; - readParquet(parquetPath, stream); + await this.readParquet(parquetPath, stream); const batches = await AsyncRecordBatchStreamReader.from(stream); // read all records from the file and print them