From 123bf4f8773119025f2b885dca715db5ac591545 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Thu, 6 Jul 2023 10:54:20 +1200 Subject: [PATCH 1/3] feat(group): add cli to group input items into set size outputs --- README.md | 10 +++ src/commands/common.ts | 10 ++- src/commands/group/__test__/group.test.ts | 58 +++++++++++++++++ src/commands/group/group.ts | 64 +++++++++++++++++++ src/commands/index.ts | 2 + .../tileindex-validate/tileindex.validate.ts | 11 +--- 6 files changed, 145 insertions(+), 10 deletions(-) create mode 100644 src/commands/group/__test__/group.test.ts create mode 100644 src/commands/group/group.ts diff --git a/README.md b/README.md index be50f468..64ffbab1 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ LINZ uses [Argo workflows](https://argoproj.github.io/workflows/) for running bu - [lds-fetch-layer](#lds-fetch-layer) - [create-manifest](#create-manifest) +- [group](#group) - [list](#list) - [stac catalog](#stac-catalog) - [stac github-import](#stac-github-import) @@ -83,6 +84,15 @@ Copy a manifest of files between two locations, for manifest creation see [creat copy ./debug/manifest-eMxkhansySrfQt79rIbAGOGrQ2ne-h4GdLXkbA3O6mo.json --concurrency 10 ``` + +### group + +group a input list into a array of arrays +```bash +group --size 2 "a" "b" "c" '["1","2","3"]' +# [["a","b"], ["c","1"], ["2", "3"]] +``` + ### stac catalog Create STAC catalog JSON file when given links to catalog template JSON file and location to search for collection.json files. diff --git a/src/commands/common.ts b/src/commands/common.ts index d8832d04..3209a6b3 100644 --- a/src/commands/common.ts +++ b/src/commands/common.ts @@ -1,4 +1,4 @@ -import { flag, option, optional, string } from 'cmd-ts'; +import { boolean, flag, option, optional, string } from 'cmd-ts'; import { registerFileSystem } from '../fs.register.js'; import { registerLogger } from '../log.js'; @@ -13,6 +13,14 @@ export const verbose = flag({ description: 'Verbose logging', }); +export const forceOutput = flag({ + type: boolean, + defaultValue: () => false, + long: 'force-output', + description: 'force output additional files', + defaultValueIsSerializable: true, +}); + export function registerCli(args: { verbose?: boolean; config?: string }): void { cleanArgs(args); registerLogger(args); diff --git a/src/commands/group/__test__/group.test.ts b/src/commands/group/__test__/group.test.ts new file mode 100644 index 00000000..a73517f4 --- /dev/null +++ b/src/commands/group/__test__/group.test.ts @@ -0,0 +1,58 @@ +import { fsa } from '@chunkd/fs'; +import { FsMemory } from '@chunkd/source-memory'; +import assert from 'node:assert'; +import { before, describe, it } from 'node:test'; +import { commandGroup, groupItems } from '../group.js'; + +describe('groupItems', () => { + it('should group items', () => { + assert.deepEqual(groupItems([1, 2, 3, 4], 2), [ + [1, 2], + [3, 4], + ]); + + assert.deepEqual(groupItems([1, 2, 3, 4], 3), [[1, 2, 3], [4]]); + }); + + it('should group max', () => { + assert.deepEqual(groupItems([1, 2, 3, 4], 100), [[1, 2, 3, 4]]); + }); + + it('should group min', () => { + assert.deepEqual(groupItems([1, 2, 3, 4], 1), [[1], [2], [3], [4]]); + }); +}); + +describe('group', () => { + const memoryFs = new FsMemory(); + before(() => { + fsa.register('/tmp/group', memoryFs); + }); + it('should load from a JSON array', async () => { + await commandGroup.handler({ inputs: [JSON.stringify([1, 2, 3, 4])], forceOutput: true, size: 50 } as any); + assert.deepEqual(await fsa.readJson('/tmp/group/output.json'), [[1, 2, 3, 4]]); + }); + + it('should load from multiple JSON arrays', async () => { + await commandGroup.handler({ + inputs: [JSON.stringify([1, 2, 3, 4]), JSON.stringify(['alpha'])], + forceOutput: true, + size: 3, + } as any); + assert.deepEqual(await fsa.readJson('/tmp/group/output.json'), [ + [1, 2, 3], + [4, 'alpha'], + ]); + }); + it('should load from strings', async () => { + await commandGroup.handler({ + inputs: ['s3://foo/bar', JSON.stringify([1, 2, 3, 4]), JSON.stringify(['alpha'])], + forceOutput: true, + size: 3, + } as any); + assert.deepEqual(await fsa.readJson('/tmp/group/output.json'), [ + ['s3://foo/bar', 1, 2], + [3, 4, 'alpha'], + ]); + }); +}); diff --git a/src/commands/group/group.ts b/src/commands/group/group.ts new file mode 100644 index 00000000..d082e903 --- /dev/null +++ b/src/commands/group/group.ts @@ -0,0 +1,64 @@ +import { fsa } from '@chunkd/fs'; +import { command, number, option, restPositionals, string } from 'cmd-ts'; +import { logger } from '../../log.js'; +import { isArgo } from '../../utils/argo.js'; +import { config, forceOutput, registerCli, verbose } from '../common.js'; + +/** Chunk a array into a group size + * @example + * + * ```typescript + * groupItems(["a","b","c"], 2) => [["a","b"], ["c"] + *``` + */ +export function groupItems(items: T[], groupSize: number): T[][] { + const output: T[][] = []; + for (let index = 0; index < items.length; index += groupSize) { + const current = items.slice(index, index + groupSize); + output.push(current); + } + return output; +} + +/** Normalize a input as either a JSON array or just a array */ +function loadInput(x: string): string[] { + if (x.startsWith('[')) return JSON.parse(x); + return [x]; +} + +export const CommandGroupArgs = { + config, + verbose, + forceOutput, + size: option({ + type: number, + long: 'size', + description: 'Group items into this number of items group', + defaultValue: () => 50, + defaultValueIsSerializable: true, + }), + inputs: restPositionals({ + type: string, + displayName: 'items', + description: 'list of items to group, can be a JSON array', + }), +}; + +export const commandGroup = command({ + name: 'group', + description: 'group a array of inputs into a set ', + args: CommandGroupArgs, + handler: async (args) => { + registerCli(args); + if (args.inputs.length === 0) { + logger.error('Group:Error:Empty'); + process.exit(1); + } + const allFiles = await Promise.all([...args.inputs.map(loadInput)]); + const grouped = groupItems(allFiles.flat(), args.size); + logger.info({ files: allFiles.length, groups: grouped.length }, 'Group:Done'); + if (args.forceOutput || isArgo()) { + await fsa.write('/tmp/group/output.json', JSON.stringify(grouped)); + } + }, +}); diff --git a/src/commands/index.ts b/src/commands/index.ts index 79a64587..741e902a 100644 --- a/src/commands/index.ts +++ b/src/commands/index.ts @@ -8,6 +8,7 @@ import { commandStacSync } from './stac-sync/stac.sync.js'; import { commandStacValidate } from './stac-validate/stac.validate.js'; import { commandTileIndexValidate } from './tileindex-validate/tileindex.validate.js'; import { commandStacGithubImport } from './stac-github-import/stac.github.import.js'; +import { commandGroup } from './group/group.js'; export const cmd = subcommands({ name: 'argo-tasks', @@ -15,6 +16,7 @@ export const cmd = subcommands({ cmds: { copy: commandCopy, 'create-manifest': commandCreateManifest, + group: commandGroup, flatten: commandCreateManifest, 'lds-fetch-layer': commandLdsFetch, list: commandList, diff --git a/src/commands/tileindex-validate/tileindex.validate.ts b/src/commands/tileindex-validate/tileindex.validate.ts index a9c70b1c..6d32b5f5 100644 --- a/src/commands/tileindex-validate/tileindex.validate.ts +++ b/src/commands/tileindex-validate/tileindex.validate.ts @@ -8,9 +8,8 @@ import { isArgo } from '../../utils/argo.js'; import { FileFilter, getFiles } from '../../utils/chunk.js'; import { findBoundingBox } from '../../utils/geotiff.js'; import { MapSheet, SheetRanges } from '../../utils/mapsheet.js'; -import { config, registerCli, verbose } from '../common.js'; +import { config, forceOutput, registerCli, verbose } from '../common.js'; import { CommandListArgs } from '../list/list.js'; -// import { CommandListArgs } from '../list/list.js'; const SHEET_MIN_X = MapSheet.origin.x + 4 * MapSheet.width; // The minimum x coordinate of a valid sheet / tile const SHEET_MAX_X = MapSheet.origin.x + 46 * MapSheet.width; // The maximum x coordinate of a valid sheet / tile @@ -106,13 +105,7 @@ export const commandTileIndexValidate = command({ description: 'Validate that all input tiffs perfectly align to tile grid', defaultValueIsSerializable: true, }), - forceOutput: flag({ - type: boolean, - defaultValue: () => false, - long: 'force-output', - description: 'force output additional files', - defaultValueIsSerializable: true, - }), + forceOutput, location: restPositionals({ type: string, displayName: 'location', description: 'Location of the source files' }), }, async handler(args) { From 8087266e0a0dd66523a03fc504b6c8ee893d8834 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Thu, 6 Jul 2023 11:51:32 +1200 Subject: [PATCH 2/3] Update README.md Co-authored-by: Alice Fage --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 64ffbab1..acc5c791 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ copy ./debug/manifest-eMxkhansySrfQt79rIbAGOGrQ2ne-h4GdLXkbA3O6mo.json --concurr ### group -group a input list into a array of arrays +group an input list into an array of arrays ```bash group --size 2 "a" "b" "c" '["1","2","3"]' # [["a","b"], ["c","1"], ["2", "3"]] From b80033bca5e1a6d4cc53e1ece99d5f956b83f401 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Thu, 6 Jul 2023 11:52:01 +1200 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: paulfouquet <86932794+paulfouquet@users.noreply.github.com> --- src/commands/group/group.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/commands/group/group.ts b/src/commands/group/group.ts index d082e903..0510e06b 100644 --- a/src/commands/group/group.ts +++ b/src/commands/group/group.ts @@ -4,7 +4,7 @@ import { logger } from '../../log.js'; import { isArgo } from '../../utils/argo.js'; import { config, forceOutput, registerCli, verbose } from '../common.js'; -/** Chunk a array into a group size +/** Chunk an array into a group size * @example * * ```typescript @@ -20,7 +20,7 @@ export function groupItems(items: T[], groupSize: number): T[][] { return output; } -/** Normalize a input as either a JSON array or just a array */ +/** Normalize an input as either a JSON array or just an array */ function loadInput(x: string): string[] { if (x.startsWith('[')) return JSON.parse(x); return [x];