Skip to content

Commit

Permalink
getting a build working and passing tests
Browse files Browse the repository at this point in the history
disable LZO completely due to overrun error
  • Loading branch information
shannonwells committed Jun 29, 2021
1 parent e1c0809 commit 274eedb
Show file tree
Hide file tree
Showing 16 changed files with 442 additions and 87 deletions.
74 changes: 74 additions & 0 deletions esbuild-plugins.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/**
* this plugin resolves to a browser version of compression.js that
* does not include LZO or Brötli comprssion.
*/
const compressionBrowserPlugin = {
name: 'compressionBrowser',
setup(build) {
let path = require('path')
build.onResolve({filter: /^\.\/compression$/}, args => {
return {
path: path.resolve(__dirname, "lib","browser","compression.js")
}
})
}
}

// Lifted from https://esbuild.github.io/plugins/#webassembly-plugin
const wasmPlugin = {
name: 'wasm',
setup(build) {
let path = require('path')
let fs = require('fs')

// Resolve ".wasm" files to a path with a namespace
build.onResolve({ filter: /\.wasm$/ }, args => {
// If this is the import inside the stub module, import the
// binary itself. Put the path in the "wasm-binary" namespace
// to tell our binary load callback to load the binary file.
if (args.namespace === 'wasm-stub') {
return {
path: args.path,
namespace: 'wasm-binary',
}
}

// Otherwise, generate the JavaScript stub module for this
// ".wasm" file. Put it in the "wasm-stub" namespace to tell
// our stub load callback to fill it with JavaScript.
//
// Resolve relative paths to absolute paths here since this
// resolve callback is given "resolveDir", the directory to
// resolve imports against.
if (args.resolveDir === '') {
return // Ignore unresolvable paths
}
return {
path: path.isAbsolute(args.path) ? args.path : path.join(args.resolveDir, args.path),
namespace: 'wasm-stub',
}
})

// Virtual modules in the "wasm-stub" namespace are filled with
// the JavaScript code for compiling the WebAssembly binary. The
// binary itself is imported from a second virtual module.
build.onLoad({ filter: /.*/, namespace: 'wasm-stub' }, async (args) => ({
contents: `import wasm from ${JSON.stringify(args.path)}
export default (imports) =>
WebAssembly.instantiate(wasm, imports).then(
result => result.instance.exports)`,
}))

// Virtual modules in the "wasm-binary" namespace contain the
// actual bytes of the WebAssembly file. This uses esbuild's
// built-in "binary" loader instead of manually embedding the
// binary data inside JavaScript code ourselves.
build.onLoad({ filter: /.*/, namespace: 'wasm-binary' }, async (args) => ({
contents: await fs.promises.readFile(args.path),
loader: 'binary',
}))
},
}

module.exports = { compressionBrowserPlugin, wasmPlugin}

20 changes: 20 additions & 0 deletions esbuild-serve.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { compressionBrowserPlugin, wasmPlugin} from "./esbuild-plugins";
// esbuild has TypeScript support by default
require('esbuild')
.serve({
servedir: __dirname,
}, {
entryPoints: ['parquet.js'],
outfile: 'main.js',
define: {"process.env.NODE_DEBUG": false, "process.env.NODE_ENV": "\"production\"", global: "window" },
platform: 'browser',
plugins: [compressionBrowserPlugin,wasmPlugin],
sourcemap: "external",
bundle: true,
globalName: 'parquetjs',
inject: ['./esbuild-shims.js']
}).then(server => {
console.log("serving parquetjs", server)
// Call "stop" on the web server when you're done
// server.stop()
})
32 changes: 32 additions & 0 deletions esbuild.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
const path = require("path")
const {compressionBrowserPlugin, wasmPlugin} = require("./esbuild-plugins");
// esbuild has TypeScript support by default
const outfile = 'parquet-bundle.min.js'
require('esbuild')
.build({
bundle: true,
entryPoints: ['parquet.js'],
outdir: path.resolve(__dirname, "dist","browser"),
define: {
"process.env.NODE_DEBUG": false,
"process.env.NODE_ENV": "\"production\"",
global: "window"
},
globalName: 'parquetjs',
inject: ['./esbuild-shims.js'],
minify: true,
platform: 'browser', // default
plugins: [compressionBrowserPlugin, wasmPlugin],
target: "esnext" // default
})
.then(res => {
if (!res.warnings.length) {
console.log("built with no errors or warnings")
}
})
.catch(e => {
console.error("Finished with errors: ", e.toString());
});



8 changes: 8 additions & 0 deletions examples/server/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Example Server
This is a toy server that illustrates how to use the parquetjs library built with esbuild.
To run it:
1. npm install
1. View and edit the files in `views` to taste
1. node app.js
1. Build and serve the parquetjs bundle
1. visit `http://localhost:3000` and click buttons, do things in the console.
20 changes: 20 additions & 0 deletions examples/server/app.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
const express = require('express')
const path = require("path")
const app = express()
const port = 3000

app.use(express.static(path.join(__dirname, 'public')));
app.engine('ejs', require('ejs').__express);

app.set('view engine', 'ejs');

app.get('/', (req, res) => {
res.render('parquetFiles', {
// files: "fruits, fruits-no-index".split(','),
title: "Parquet Files",
})
})

app.listen(port, () => {
console.log(`Example app listening at http://localhost:${port}`)
})
10 changes: 10 additions & 0 deletions examples/server/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"name": "untitled",
"version": "1.0.0",
"main": "index.js",
"license": "MIT",
"dependencies": {
"@dsnp/parquetjs": "../parquetjs",
"ejs": "^3.1.6"
}
}
2 changes: 2 additions & 0 deletions examples/server/views/footer.ejs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
</body>
</html>
10 changes: 10 additions & 0 deletions examples/server/views/header.ejs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title><%= title %></title>
<script src="http://localhost:8000/main.js"></script>
<link rel="icon" href="data:,">
</head>
<body>
44 changes: 44 additions & 0 deletions examples/server/views/parquetFiles.ejs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@

<%- include("./header.ejs", {title: "Parquet files" }) %>
<script>
const readit = function(filename) {
parquetjs.ParquetReader
.openUrl(`http://localhost:3000/files/${filename}`)
.then(async (reader) => {
let cursor = await reader.getCursor();
let record = null;
while (record = await cursor.next()) {
console.log("record", record);
}
return reader;
})
.then((reader) => reader.close())
.catch((e) => {
console.log("error", e);
})
}
const search = async function(filename, columnName, searchTerm ) {
const rdr = await parquetjs.ParquetReader.openUrl(`http://localhost:3000/files/${filename}`)
const bfs = await rdr.getBloomFiltersFor([columnName])
if (!bfs[columnName]) return false
const all = Promise.all(bfs[columnName].map( async item => item.sbbf.check(searchTerm)))
.then( values => values.some((el) => (el === true)))
rdr.close()
return all
}
</script>

<ul>
<li><button onclick="readit('fruits-no-index.parquet')" >No index</button></li>
<li><button onclick="readit('fruits-statistics.parquet')" >with statistics</button></li>
<li><button onclick="readit('fruits-bloomfilter.parquet')" >with Bloom filter</button></li>
<li><button onclick="readit('alltypes_dictionary.parquet')" >all types w/ dictionary</button></li>
<li><button onclick="readit('fruits.parquet')" >With (unsupported) Brötli compression</button></li>
</ul>
<h3>Search fruits-bloomfilter.parquet "name" column for</h3>
<button onclick="search('fruits-bloomfilter.parquet', 'name', 'kiwi')">Search fruits-bloomfilter.parquet for "kiwi"</button>

<%- include("./footer.ejs") %>
12 changes: 4 additions & 8 deletions lib/bloomFilterIO/bloomFilterReader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,10 @@ const filterColumnChunksWithBloomFilters = (
columnChunkDataCollection: Array<ColumnChunkData>
) => {
return columnChunkDataCollection.filter((columnChunk) => {
const {
column: {
meta_data: {
bloom_filter_offset: { buffer: bloomFilterOffsetBuffer },
},
},
} = columnChunk;
return bloomFilterOffsetBuffer;
if (!columnChunk.column.meta_data.bloom_filter_offset) {
return null
}
return columnChunk?.column?.meta_data?.bloom_filter_offset?.buffer
});
};

Expand Down
1 change: 0 additions & 1 deletion lib/codec/plain.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ function decodeValues_INT32(cursor, count) {
function encodeValues_INT64(values) {
let buf = Buffer.alloc(8 * values.length);
for (let i = 0; i < values.length; i++) {
//console.log(typeof values[i]);
buf.writeBigInt64LE(BigInt(values[i]), i*8);
}

Expand Down
35 changes: 18 additions & 17 deletions lib/compression.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
const zlib = require('zlib');
const snappy = require('snappyjs');
// const lzo = require('lzo');
import wasmBrotli from 'wasm-brotli'
import { compress as brotliCompress, decompress as brotliDecompress } from 'wasm-brotli'


const PARQUET_COMPRESSION_METHODS = {
'UNCOMPRESSED': {
Expand All @@ -17,10 +18,11 @@ const PARQUET_COMPRESSION_METHODS = {
deflate: deflate_snappy,
inflate: inflate_snappy
},
'LZO': {
deflate: deflate_lzo,
inflate: inflate_lzo
},
// Disabled. See: https://github.com/LibertyDSNP/parquetjs/issues/18
// 'LZO': {
// deflate: deflate_lzo,
// inflate: inflate_lzo
// },
'BROTLI': {
deflate: deflate_brotli,
inflate: inflate_brotli
Expand Down Expand Up @@ -49,14 +51,13 @@ function deflate_gzip(value) {
function deflate_snappy(value) {
return snappy.compress(value);
}

function deflate_lzo(value) {
return lzo.compress(value);
}
//
// function deflate_lzo(value) {
// return lzo.compress(value);
// }

async function deflate_brotli(value) {
const { compress } = await wasmBrotli
const compressedContent = compress(value, {
const compressedContent = await brotliCompress(value, {
mode: 0,
quality: 8,
lgwin: 22
Expand Down Expand Up @@ -86,15 +87,15 @@ function inflate_gzip(value) {
function inflate_snappy(value) {
return snappy.uncompress(value);
}

function inflate_lzo(value) {
return lzo.decompress(value);
}
//
// function inflate_lzo(value) {
// return lzo.decompress(value);
// }

async function inflate_brotli(value) {
const { uncompress } = await wasmBrotli
const uncompressedContent = uncompress(value)
const uncompressedContent = await brotliDecompress(value)
return Buffer.from(uncompressedContent);
}

module.exports = { PARQUET_COMPRESSION_METHODS, deflate, inflate };

Loading

0 comments on commit 274eedb

Please sign in to comment.