Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(group): add cli to group input items into set size outputs #539

Merged
merged 3 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ LINZ uses [Argo workflows](https://argoproj.github.io/workflows/) for running bu

- [lds-fetch-layer](#lds-fetch-layer)
- [create-manifest](#create-manifest)
- [group](#group)
- [list](#list)
- [stac catalog](#stac-catalog)
- [stac github-import](#stac-github-import)
Expand Down Expand Up @@ -83,6 +84,15 @@ Copy a manifest of files between two locations, for manifest creation see [creat
copy ./debug/manifest-eMxkhansySrfQt79rIbAGOGrQ2ne-h4GdLXkbA3O6mo.json --concurrency 10
```


### group

group an input list into an array of arrays
```bash
group --size 2 "a" "b" "c" '["1","2","3"]'
# [["a","b"], ["c","1"], ["2", "3"]]
```

### stac catalog

Create STAC catalog JSON file when given links to catalog template JSON file and location to search for collection.json files.
Expand Down
10 changes: 9 additions & 1 deletion src/commands/common.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { flag, option, optional, string } from 'cmd-ts';
import { boolean, flag, option, optional, string } from 'cmd-ts';
import { registerFileSystem } from '../fs.register.js';
import { registerLogger } from '../log.js';

Expand All @@ -13,6 +13,14 @@ export const verbose = flag({
description: 'Verbose logging',
});

export const forceOutput = flag({
type: boolean,
defaultValue: () => false,
long: 'force-output',
description: 'force output additional files',
defaultValueIsSerializable: true,
});

export function registerCli(args: { verbose?: boolean; config?: string }): void {
cleanArgs(args);
registerLogger(args);
Expand Down
58 changes: 58 additions & 0 deletions src/commands/group/__test__/group.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { fsa } from '@chunkd/fs';
import { FsMemory } from '@chunkd/source-memory';
import assert from 'node:assert';
import { before, describe, it } from 'node:test';
import { commandGroup, groupItems } from '../group.js';

describe('groupItems', () => {
it('should group items', () => {
assert.deepEqual(groupItems([1, 2, 3, 4], 2), [
[1, 2],
[3, 4],
]);

assert.deepEqual(groupItems([1, 2, 3, 4], 3), [[1, 2, 3], [4]]);
});

it('should group max', () => {
assert.deepEqual(groupItems([1, 2, 3, 4], 100), [[1, 2, 3, 4]]);
});

it('should group min', () => {
assert.deepEqual(groupItems([1, 2, 3, 4], 1), [[1], [2], [3], [4]]);
});
});

describe('group', () => {
const memoryFs = new FsMemory();
before(() => {
fsa.register('/tmp/group', memoryFs);
});
it('should load from a JSON array', async () => {
await commandGroup.handler({ inputs: [JSON.stringify([1, 2, 3, 4])], forceOutput: true, size: 50 } as any);
assert.deepEqual(await fsa.readJson('/tmp/group/output.json'), [[1, 2, 3, 4]]);
});

it('should load from multiple JSON arrays', async () => {
await commandGroup.handler({
inputs: [JSON.stringify([1, 2, 3, 4]), JSON.stringify(['alpha'])],
forceOutput: true,
size: 3,
} as any);
assert.deepEqual(await fsa.readJson('/tmp/group/output.json'), [
[1, 2, 3],
[4, 'alpha'],
]);
});
it('should load from strings', async () => {
await commandGroup.handler({
inputs: ['s3://foo/bar', JSON.stringify([1, 2, 3, 4]), JSON.stringify(['alpha'])],
forceOutput: true,
size: 3,
} as any);
assert.deepEqual(await fsa.readJson('/tmp/group/output.json'), [
['s3://foo/bar', 1, 2],
[3, 4, 'alpha'],
]);
});
});
64 changes: 64 additions & 0 deletions src/commands/group/group.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { fsa } from '@chunkd/fs';
import { command, number, option, restPositionals, string } from 'cmd-ts';
import { logger } from '../../log.js';
import { isArgo } from '../../utils/argo.js';
import { config, forceOutput, registerCli, verbose } from '../common.js';

/** Chunk an array into a group size
* @example
*
* ```typescript
* groupItems(["a","b","c"], 2) => [["a","b"], ["c"]
*```
*/
export function groupItems<T>(items: T[], groupSize: number): T[][] {
const output: T[][] = [];
for (let index = 0; index < items.length; index += groupSize) {
const current = items.slice(index, index + groupSize);
output.push(current);
}
return output;
}

/** Normalize an input as either a JSON array or just an array */
function loadInput(x: string): string[] {
if (x.startsWith('[')) return JSON.parse(x);
return [x];
}

export const CommandGroupArgs = {
config,
verbose,
forceOutput,
size: option({
type: number,
long: 'size',
description: 'Group items into this number of items group',
defaultValue: () => 50,
defaultValueIsSerializable: true,
}),
inputs: restPositionals({
type: string,
displayName: 'items',
description: 'list of items to group, can be a JSON array',
}),
};

export const commandGroup = command({
name: 'group',
description: 'group a array of inputs into a set ',
args: CommandGroupArgs,
handler: async (args) => {
registerCli(args);
if (args.inputs.length === 0) {
logger.error('Group:Error:Empty');
process.exit(1);
}
const allFiles = await Promise.all([...args.inputs.map(loadInput)]);
const grouped = groupItems(allFiles.flat(), args.size);
logger.info({ files: allFiles.length, groups: grouped.length }, 'Group:Done');
if (args.forceOutput || isArgo()) {
await fsa.write('/tmp/group/output.json', JSON.stringify(grouped));
}
},
});
2 changes: 2 additions & 0 deletions src/commands/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ import { commandStacSync } from './stac-sync/stac.sync.js';
import { commandStacValidate } from './stac-validate/stac.validate.js';
import { commandTileIndexValidate } from './tileindex-validate/tileindex.validate.js';
import { commandStacGithubImport } from './stac-github-import/stac.github.import.js';
import { commandGroup } from './group/group.js';

export const cmd = subcommands({
name: 'argo-tasks',
description: 'Utility tasks for argo',
cmds: {
copy: commandCopy,
'create-manifest': commandCreateManifest,
group: commandGroup,
flatten: commandCreateManifest,
'lds-fetch-layer': commandLdsFetch,
list: commandList,
Expand Down
11 changes: 2 additions & 9 deletions src/commands/tileindex-validate/tileindex.validate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ import { isArgo } from '../../utils/argo.js';
import { FileFilter, getFiles } from '../../utils/chunk.js';
import { findBoundingBox } from '../../utils/geotiff.js';
import { MapSheet, SheetRanges } from '../../utils/mapsheet.js';
import { config, registerCli, verbose } from '../common.js';
import { config, forceOutput, registerCli, verbose } from '../common.js';
import { CommandListArgs } from '../list/list.js';
// import { CommandListArgs } from '../list/list.js';

const SHEET_MIN_X = MapSheet.origin.x + 4 * MapSheet.width; // The minimum x coordinate of a valid sheet / tile
const SHEET_MAX_X = MapSheet.origin.x + 46 * MapSheet.width; // The maximum x coordinate of a valid sheet / tile
Expand Down Expand Up @@ -106,13 +105,7 @@ export const commandTileIndexValidate = command({
description: 'Validate that all input tiffs perfectly align to tile grid',
defaultValueIsSerializable: true,
}),
forceOutput: flag({
type: boolean,
defaultValue: () => false,
long: 'force-output',
description: 'force output additional files',
defaultValueIsSerializable: true,
}),
forceOutput,
location: restPositionals({ type: string, displayName: 'location', description: 'Location of the source files' }),
},
async handler(args) {
Expand Down