Skip to content

Commit

Permalink
feat: add CSV output option
Browse files Browse the repository at this point in the history
user can now choose between JSON and CSV output
and customize the CSV separator

closes: #75
  • Loading branch information
dvirtz committed May 22, 2024
1 parent 820c2dc commit 8f5baf0
Show file tree
Hide file tree
Showing 39 changed files with 2,852 additions and 73 deletions.
5 changes: 3 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@
"preLaunchTask": "npm: build",
"env": {
"DEBUG_MODE": "TRUE",
"LOG_TO_CONSOLE": "true"
"LOG_TO_CONSOLE": "true",
"TEST_GLOB_PATTERN": "extension.test.js"
},
"resolveSourceMapLocations": [
"${workspaceFolder}/**",
"!**/node_modules/**"
],
"skipFiles": [
// "<node_internals>/**"
"<node_internals>/**"
]
},
{
Expand Down
16 changes: 11 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

# parquet-viewer

Views [Apache Parquet](https://parquet.apache.org/) files as JSON.
Views [Apache Parquet](https://parquet.apache.org/) files as text (JSON or CSV).

## Features

When opening a Parquet file, a JSON presentation of the file will open automatically:
When opening a Parquet file, a textual presentation of the file will open automatically:

![automatic](images/automatic.gif)

After closing the JSON view, it is possible to reopen it by clicking on the link in the parquet view.
After closing the textual view, it is possible to reopen it by clicking on the link in the parquet view.

![command](images/reopen.gif)

Expand Down Expand Up @@ -38,21 +38,27 @@ It only supports parquet version 1.0.0 with snappy compression.

This is a legacy Java backend, using [parquet-tools](https://mvnrepository.com/artifact/org.apache.parquet/parquet-tools). To use that, you should set `parquet-viewer.backend` to `parquet-tools` and `paruqet-tools` should be in your `PATH`, or pointed by the `parquet-viewer.parquetToolsPath` setting.

![settings](images/settings.png)
## Format

The textual output can be either JSON or CSV based on the [parquet-viewer.format](#settings) setting.

## Settings

![settings](images/settings.png)

The following setting options are available:

|name|default|description|
|----|-------|-----------|
|`parquet-viewer.backend`|`parquets`|Which backend to use for reading the files|
|`parquet-viewer.format`|`json`|textual output format|
|`parquet-viewer.logging.panel`|`false`|Whether to write diagnostic logs to an output panel|
|`parquet-viewer.logging.folder`|empty|Write diagnostic logs under the given directory|
|`parquet-viewer.logging.level`|info|Diagnostic log level. Choose between: `off`, `fatal`, `error`, `warn`, `info`, `debug` or `trace`|
|`parquet-viewer.parquetToolsPath`|`parquet-tools`|The name of the parquet-tools executable or a path to the parquet-tools jar|
|`parquet-viewer.json.space`|0|JSON indentation space, passed to `JSON.stringify` as is, see [mdn](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify#parameters) for details|
|`parquet-viewer.json.asArray`|`false`|Wether to format output JSON as one big array|
|`parquet-viewer.json.asArray`|`false`|Whether to format output JSON as one big array|
|`parquet-viewer.csv.separator`|`', '`|CSV separator|

## Notes

Expand Down
21 changes: 20 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@
},
"parquet-viewer.logging.panel": {
"description": "Write diagnostic logs to an output panel",
"type": "boolean"
"type": "boolean",
"default": false
},
"parquet-viewer.logging.folder": {
"description": "Write diagnostic logs under the given directory",
Expand Down Expand Up @@ -136,6 +137,24 @@
"markdownDescription": "Format output JSON as one big array",
"type": "boolean",
"default": false
},
"parquet-viewer.format": {
"markdownDescription": "Textual representation format",
"type": "string",
"enum": [
"json",
"csv"
],
"enumItemLabels": [
"JSON",
"CSV"
],
"default": "json"
},
"parquet-viewer.csv.separator": {
"markdownDescription": "CSV field separator",
"type": "string",
"default": ", "
}
}
},
Expand Down
6 changes: 0 additions & 6 deletions src/formatter-factory.ts

This file was deleted.

27 changes: 27 additions & 0 deletions src/formatters/csv-formatter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { csvSeparator } from '../settings';
import { Formatter } from "./formatter";

export class CsvFormatter implements Formatter {
async* format(lines: AsyncGenerator<object>): AsyncGenerator<string> {
const first = await lines.next();
if (first.value) {
yield this.generateHeader(first.value);
yield this.generateRow(first.value);
}
for await (const line of lines) {
yield this.generateRow(line);
}
}

format_error(message: string): string {
return message;
}

private generateHeader(line: object) {
return Object.keys(line).join(csvSeparator());
}

private generateRow(line: object) {
return Object.values(line).join(csvSeparator());
}
}
13 changes: 13 additions & 0 deletions src/formatters/formatter-factory.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { format } from "../settings";
import { CsvFormatter } from "./csv-formatter";
import { Formatter } from "./formatter";
import { JsonFormatter } from "./json-formatter";

export function createFormatter(): Formatter {
switch (format()) {
case 'json':
return new JsonFormatter;
case 'csv':
return new CsvFormatter;
}
}
3 changes: 3 additions & 0 deletions src/formatters/formatter-name.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// https://stackoverflow.com/a/64174790/621176
export const FormatterNames = ['json', 'csv'] as const;
export type FormatterName = typeof FormatterNames[number];
File renamed without changes.
2 changes: 1 addition & 1 deletion src/json-formatter.ts → src/formatters/json-formatter.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Formatter } from "./formatter";
import { jsonSpace, jsonAsArray } from './settings';
import { jsonSpace, jsonAsArray } from '../settings';

export class JsonFormatter extends Formatter {
async* format(lines: AsyncGenerator<object>): AsyncGenerator<string> {
Expand Down
4 changes: 2 additions & 2 deletions src/parquet-document.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import * as os from 'os';
import { getLogger } from './logger';
import { createParquetBackend } from './backends/parquet-backend-factory';
import { backend, affectsDocument } from './settings';
import { createFormatter } from './formatter-factory';
import { createFormatter } from './formatters/formatter-factory';
import assert from 'assert';

export default class ParquetDocument implements vscode.Disposable {
Expand All @@ -22,7 +22,7 @@ export default class ParquetDocument implements vscode.Disposable {
private constructor(uri: vscode.Uri, emitter: vscode.EventEmitter<vscode.Uri>) {
this._uri = uri;
this._emitter = emitter;
this._parquetPath = this._uri.fsPath.replace(/\.as\.json$/, '');
this._parquetPath = this._uri.fsPath.replace(/\.as\.\w+$/, '');
const watcher = vscode.workspace.createFileSystemWatcher(new vscode.RelativePattern(this._parquetPath, "*"));
this._disposable = vscode.Disposable.from(watcher,
watcher.onDidChange(this.update.bind(this)),
Expand Down
15 changes: 7 additions & 8 deletions src/parquet-editor-provider.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
import * as vscode from "vscode";
import * as path from 'path';
import { getNonce } from './util';
import { Disposable } from "./dispose";
import { ParquetTextDocumentContentProvider } from './parquet-document-provider';
import { getLogger } from './logger';
import { ParquetTextDocumentContentProvider } from './parquet-document-provider';
import { format } from "./settings";
import { getNonce } from './util';
class CustomParquetDocument extends Disposable implements vscode.CustomDocument {
uri: vscode.Uri;
path: string;

constructor(uri: vscode.Uri) {
super();
this.uri = uri;
this.path = uri.fsPath;
this.path = `${uri.fsPath}.as.${format()}`;
}

public async open() {
getLogger().info(`opening ${this.path}.as.json`);
getLogger().info(`opening ${this.path}`);
await vscode.window.showTextDocument(
this.uri.with({ scheme: 'parquet', path: this.path + '.as.json' })
this.uri.with({ scheme: 'parquet', path: `${this.path}` })
);
}
}
Expand Down Expand Up @@ -80,9 +80,8 @@ export class ParquetEditorProvider implements vscode.CustomReadonlyEditorProvide
<!-- <meta http-equiv="Content-Security-Policy" content="default-src 'none'; script-src ${webview.cspSource} 'nonce-${nonce}'; img-src ${webview.cspSource}; style-src 'unsafe-inline' ${webview.cspSource};"> -->
</head>
<body>
<p>Click <a href="${path.basename(document.uri.fsPath)}.as.json" id="here">here</a> to open JSON</p>
<p>Click <a href="${document.path}" id="here">here</a> to view contents</p>
<script nonce="${nonce}">
//# sourceURL=to-json.js
const vscode = acquireVsCodeApi();
document.getElementById('here').addEventListener('click', _ => {
vscode.postMessage('clicked');
Expand Down
31 changes: 17 additions & 14 deletions src/settings.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import * as vscode from 'vscode';
import { LogLevel } from '@vscode-logging/logger';
import { BackendName } from './backends/backend-name';
import { name } from '../package.json'
import { name, contributes } from '../package.json';
import { FormatterName } from './formatters/formatter-name';

const propertiesMeta = contributes.configuration.properties;

function settings() {
return vscode.workspace.getConfiguration(name);
Expand All @@ -11,16 +14,8 @@ export function parquetTools(): string | undefined {
return settings().get('parquetToolsPath');
}

export async function setParquetTools(parquetTools: string | undefined): Promise<void> {
await settings().update('parquetToolsPath', parquetTools);
}

export function logPanel(): boolean {
return settings().get('logging.panel', settings().get('logPanel', false));
}

export async function setLogPanel(logPanel: boolean | undefined): Promise<void> {
await settings().update('logging.panel', logPanel);
return settings().get('logging.panel', settings().get('logPanel', propertiesMeta['parquet-viewer.logging.panel']['default']));
}

export function logFolder(): string {
Expand All @@ -32,7 +27,7 @@ export async function setLogFolder(logFolder: string | undefined): Promise<void>
}

export function logLevel(): LogLevel {
return settings().get('logging.level', settings().get('logLevel', 'info'));
return settings().get('logging.level', settings().get('logLevel', propertiesMeta['parquet-viewer.logging.level']['default'] as LogLevel));
}

export async function setLogLevel(logLevel: LogLevel | undefined): Promise<void> {
Expand All @@ -48,11 +43,19 @@ export function jsonSpace(): number | string | undefined {
}

export function backend(): BackendName {
return useParquetTools() ? 'parquet-tools' : settings().get('backend', 'parquets');
return useParquetTools() ? 'parquet-tools' : settings().get('backend', propertiesMeta['parquet-viewer.backend']['default'] as BackendName);
}

export function jsonAsArray(): boolean {
return settings().get('json.asArray', false);
return settings().get('json.asArray', propertiesMeta['parquet-viewer.json.asArray']['default']);
}

export function format(): FormatterName {
return settings().get('format', propertiesMeta['parquet-viewer.format']['default'] as FormatterName);
}

export function csvSeparator(): string {
return settings().get('csv.separator', propertiesMeta['parquet-viewer.csv.separator']['default']);
}

function settingsChanged(e: vscode.ConfigurationChangeEvent, sections: string[]): boolean {
Expand All @@ -64,5 +67,5 @@ export function affectsLogging(e: vscode.ConfigurationChangeEvent): boolean {
}

export function affectsDocument(e: vscode.ConfigurationChangeEvent): boolean {
return settingsChanged(e, ['backend', 'useParquetTools', 'json.space', 'json.asArray']);
return settingsChanged(e, ['backend', 'useParquetTools', 'json.space', 'json.asArray', 'csv.separator']);
}
2 changes: 1 addition & 1 deletion test/integration/parquet-editor-provider.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ export async function runTest() {
expectedContent: string | RegExp,
context: TestContext): Promise<Mock<(document: vscode.TextDocument) => void>> {
const listener = context.mock.fn((document: vscode.TextDocument) => {
assert.equal(document.fileName, `${file.fsPath}.as.json`);
assert.equal(document.fileName, `${file.fsPath}.as.${settings.format()}`);
const actual = document.getText();
const message = [`${document.fileName} content mismatch`, 'expected', expectedContent, 'actual', actual].join('\n');
if (typeof expectedContent === 'string') {
Expand Down
5 changes: 3 additions & 2 deletions test/integration/runTests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,23 @@ import * as meta from '../../package.json';

export async function run(): Promise<void> {
const vscode = await import('vscode');
const pattern = process.env.TEST_GLOB_PATTERN || '*.test.[jt]s';
try {
if (process.env.TEST_SUBPROCESS) {
const { run: runNodeTests } = await import('node:test');
const { glob } = await import('glob');
const { spec } = await import('node:test/reporters');
const { finished } = await import('stream/promises')
await finished(runNodeTests({
files: await glob(`${__dirname}/*.test.[jt]s`),
files: await glob(pattern, { cwd: __dirname }),
concurrency: false,
inspectPort: process.debugPort,
})
.compose(new spec)
.pipe(process.stdout));
} else {
const { globIterate } = await import('glob');
for await (const test of globIterate('*.test.[jt]s', { cwd: __dirname })) {
for await (const test of globIterate(pattern, { cwd: __dirname })) {
const { runTest } = await import(`./${test}`);
await runTest();
}
Expand Down
5 changes: 2 additions & 3 deletions test/unit/backend.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@ import toArray from '@async-generators/to-array';
import { test } from 'node:test';
import { strict as assert } from 'node:assert';
import os from 'os';
import * as path from 'path';
import { BackendNames } from '../../src/backends/backend-name';
import { createParquetBackend } from '../../src/backends/parquet-backend-factory';
import { workspace } from './workspace';
import * as workspace from './workspace';

// parquet-tools doesn't work on Apple M1
for (const backendName of BackendNames.filter(backend => os.type() != 'Darwin' || os.arch() == 'x64' || backend != 'parquet-tools')) {
Expand Down Expand Up @@ -40,7 +39,7 @@ for (const backendName of BackendNames.filter(backend => os.type() != 'Darwin' |
})(),
onCancellationRequested: context.mock.fn(_ => ({dispose: () => undefined}))
};
assert.equal((await toArray(backend.generateRows(path.join(workspace, `small.parquet`), token))).length, 1);
assert.equal((await toArray(backend.generateRows(workspace.parquet('small'), token))).length, 1);
assert.equal(token.isCancellationRequestedMock.mock.callCount(), 2);
});
});
Expand Down
Loading

0 comments on commit 8f5baf0

Please sign in to comment.