From 8ddc6c4995a0b2e6bca7bc73f5256897d0c5313c Mon Sep 17 00:00:00 2001 From: sharkinsspatial Date: Fri, 25 Jan 2019 15:17:49 -0500 Subject: [PATCH 1/7] Refactor stream to merge collection and item properties during ingest. --- packages/api-lib/libs/es.js | 87 +++++++++++-------- .../tests/integration/ingestCollections.js | 9 ++ .../api-lib/tests/integration/ingestData.js | 9 +- .../tests/integration/runIntegration.sh | 2 +- 4 files changed, 69 insertions(+), 38 deletions(-) create mode 100644 packages/api-lib/tests/integration/ingestCollections.js diff --git a/packages/api-lib/libs/es.js b/packages/api-lib/libs/es.js index b4eacff..ac857e0 100644 --- a/packages/api-lib/libs/es.js +++ b/packages/api-lib/libs/es.js @@ -8,7 +8,6 @@ const ElasticsearchWritableStream = require('./ElasticSearchWriteableStream') const logger = require('./logger') let _esClient - /* This module is used for connecting to an Elasticsearch instance, writing records, searching records, and managing the indexes. It looks for the ES_HOST environment @@ -77,7 +76,6 @@ async function esClient() { // Create STAC mappings async function prepare(index) { // TODO - different mappings for collection and item - let ready const props = { 'type': 'object', properties: { @@ -106,7 +104,6 @@ async function prepare(index) { }] const client = await esClient() const indexExists = await client.indices.exists({ index }) - if (!indexExists) { const payload = { index, @@ -133,49 +130,71 @@ async function prepare(index) { try { await client.indices.create(payload) logger.info(`Created index: ${JSON.stringify(payload)}`) - ready = 0 } catch (error) { const debugMessage = `Error creating index, already created: ${error}` logger.debug(debugMessage) } } - return ready } // Given an input stream and a transform, write records to an elasticsearch instance async function _stream() { - const toEs = through2.obj({ objectMode: true }, (data, encoding, next) => { - let index = '' - if (data && data.hasOwnProperty('extent')) { - index = 'collections' - } else if (data && data.hasOwnProperty('geometry')) { - index = 'items' - } else { - next() - return - } - // remove any hierarchy links in a non-mutating way - const hlinks = ['self', 'root', 'parent', 'child', 'collection', 'item'] - const links = data.links.filter((link) => hlinks.includes(link)) - const dataNoLinks = Object.assign({}, data, { links }) - - // create ES record - const record = { - index, - type: 'doc', - id: dataNoLinks.id, - action: 'update', - _retry_on_conflict: 3, - body: { - doc: dataNoLinks, - doc_as_upsert: true - } - } - next(null, record) - }) let esStreams try { + let collections = [] const client = await esClient() + const indexExists = await client.indices.exists({ index: 'collections' }) + if (indexExists) { + const body = { query: { match_all: {} } } + const searchParams = { + index: 'collections', + body + } + const resultBody = await client.search(searchParams) + collections = resultBody.hits.hits.map((r) => (r._source)) + } + + const toEs = through2.obj({ objectMode: true }, (data, encoding, next) => { + let index = '' + if (data && data.hasOwnProperty('extent')) { + index = 'collections' + } else if (data && data.hasOwnProperty('geometry')) { + index = 'items' + } else { + next() + return + } + // remove any hierarchy links in a non-mutating way + const hlinks = ['self', 'root', 'parent', 'child', 'collection', 'item'] + const links = data.links.filter((link) => hlinks.includes(link)) + let esDataObject = Object.assign({}, data, { links }) + if (index === 'items') { + const collectionId = data.properties.collection + const itemCollection = + collections.find((collection) => (collectionId === collection.id)) + if (itemCollection) { + const flatProperties = + Object.assign({}, itemCollection.properties, data.properties) + esDataObject = Object.assign({}, esDataObject, { properties: flatProperties }) + } else { + logger.error(`${data.id} has no collection`) + } + } + + // create ES record + const record = { + index, + type: 'doc', + id: esDataObject.id, + action: 'update', + _retry_on_conflict: 3, + body: { + doc: esDataObject, + doc_as_upsert: true + } + } + next(null, record) + }) const esStream = new ElasticsearchWritableStream({ client: client }, { objectMode: true, highWaterMark: process.env.ES_BATCH_SIZE || 500 diff --git a/packages/api-lib/tests/integration/ingestCollections.js b/packages/api-lib/tests/integration/ingestCollections.js new file mode 100644 index 0000000..8c056e3 --- /dev/null +++ b/packages/api-lib/tests/integration/ingestCollections.js @@ -0,0 +1,9 @@ +process.env.ES_HOST = `http://${process.env.DOCKER_NAME}:4571` +const ingest = require('../../libs/ingest').ingest +const backend = require('../../libs/es') + +async function doIngest() { + await ingest('../fixtures/stac/catalog.json', backend, true, true) + console.log('Collections done') +} +doIngest() diff --git a/packages/api-lib/tests/integration/ingestData.js b/packages/api-lib/tests/integration/ingestData.js index 4354af3..f72c42a 100644 --- a/packages/api-lib/tests/integration/ingestData.js +++ b/packages/api-lib/tests/integration/ingestData.js @@ -2,6 +2,9 @@ process.env.ES_HOST = `http://${process.env.DOCKER_NAME}:4571` const ingest = require('../../libs/ingest').ingest const backend = require('../../libs/es') -//ingest('../fixtures/stac/catalog.json', backend) -ingest('https://landsat-stac.s3.amazonaws.com/landsat-8-l1/catalog.json', backend) - +async function doIngest() { + await ingest('../fixtures/stac/catalog.json', backend) + console.log('Items done') +} +//ingest('https://landsat-stac.s3.amazonaws.com/landsat-8-l1/catalog.json', backend) +doIngest() diff --git a/packages/api-lib/tests/integration/runIntegration.sh b/packages/api-lib/tests/integration/runIntegration.sh index 7baf485..f378d7b 100755 --- a/packages/api-lib/tests/integration/runIntegration.sh +++ b/packages/api-lib/tests/integration/runIntegration.sh @@ -1,4 +1,4 @@ #!/bin/bash docker-compose up & while ! nc -z $DOCKER_NAME 4571; do sleep 1; done; sleep 20; -node ./ingestData.js && yarn ava ./tests/integration/test_api.js +node ./ingestCollections.js && node ./ingestData.js && yarn ava ./tests/integration/test_api.js From 9c09adfdd6de62ecfa022d4b1a5afdbc1e4b1c33 Mon Sep 17 00:00:00 2001 From: sharkinsspatial Date: Fri, 25 Jan 2019 15:54:07 -0500 Subject: [PATCH 2/7] Update unit tests for simplified flat items in Elasticsearch. --- packages/api-lib/libs/api.js | 9 +--- packages/api-lib/tests/test_api_search.js | 40 +++++++++++++++++- .../api-lib/tests/test_api_searchItems.js | 41 ------------------- 3 files changed, 39 insertions(+), 51 deletions(-) delete mode 100644 packages/api-lib/tests/test_api_searchItems.js diff --git a/packages/api-lib/libs/api.js b/packages/api-lib/libs/api.js index 0487181..b8956ee 100644 --- a/packages/api-lib/libs/api.js +++ b/packages/api-lib/libs/api.js @@ -236,15 +236,8 @@ const buildPageLinks = function (meta, parameters, endpoint) { } const searchItems = async function (parameters, page, limit, backend, endpoint) { - const arbitraryLimit = 5000 - const { results: collectionResults } = - await backend.search(parameters, 'collections', 1, arbitraryLimit) - const collectionList = collectionResults.map((result) => result.id) - const collectionsQuery = Object.assign( - {}, parameters, { parentCollections: collectionList } - ) const { results: itemsResults, meta: itemsMeta } = - await backend.search(collectionsQuery, 'items', page, limit) + await backend.search(parameters, 'items', page, limit) const pageLinks = buildPageLinks(itemsMeta, parameters, endpoint) const items = addItemLinks(itemsResults, endpoint) const response = wrapResponseInFeatureCollection(itemsMeta, items, pageLinks) diff --git a/packages/api-lib/tests/test_api_search.js b/packages/api-lib/tests/test_api_search.js index 02f714c..6183236 100644 --- a/packages/api-lib/tests/test_api_search.js +++ b/packages/api-lib/tests/test_api_search.js @@ -3,6 +3,11 @@ const sinon = require('sinon') const proxquire = require('proxyquire') const api = require('../libs/api') const item = require('./fixtures/item.json') +const itemLinks = require('./fixtures/itemLinks.json') + +function cloneMutatedItem() { + return Object.assign({}, item, { links: item.links.slice(0) }) +} test('search es error', async (t) => { const error = sinon.spy() @@ -65,6 +70,36 @@ test('search /stac', async (t) => { 'Returns STAC catalog with links to collections') }) +test('search /stac/search wraps results', async (t) => { + const limit = 10 + const page = 1 + const meta = { + limit, + page, + found: 1, + returned: 1 + } + const clonedItem = cloneMutatedItem() + const results = [clonedItem] + + const itemsResults = { meta, results } + const search = sinon.stub() + search.resolves(itemsResults) + const backend = { search } + const actual = await api.search('/stac/search', {}, backend, 'endpoint') + t.deepEqual(actual.features[0].links, itemLinks.links, + 'Adds correct relative STAC links') + + const expectedMeta = { + limit, + page, + found: 1, + returned: 1 + } + t.deepEqual(actual.meta, expectedMeta, 'Adds correct response metadata fields') + t.is(actual.type, 'FeatureCollection', 'Wraps response as FeatureCollection') +}) + test('search /stac/search query parameters', async (t) => { const search = sinon.stub().resolves({ results: [], meta: {} }) const backend = { search } @@ -237,10 +272,11 @@ test('search /collections/collectionId/items/itemId', async (t) => { found: 1, returned: 1 } - + const clonedItem = cloneMutatedItem() + const results = [clonedItem] const search = sinon.stub().resolves({ meta, - results: [item] + results }) const backend = { search } const itemId = 'itemId' diff --git a/packages/api-lib/tests/test_api_searchItems.js b/packages/api-lib/tests/test_api_searchItems.js deleted file mode 100644 index b6b71af..0000000 --- a/packages/api-lib/tests/test_api_searchItems.js +++ /dev/null @@ -1,41 +0,0 @@ -const test = require('ava') -const sinon = require('sinon') -const api = require('../libs/api') -const item = require('./fixtures/item.json') -const itemLinks = require('./fixtures/itemLinks.json') - -test('searchItems', async (t) => { - const collectionId = 'collectionId' - const limit = 10 - const page = 1 - const meta = { - limit, - page, - found: 1, - returned: 1 - } - const collectionResults = { meta, results: [{ id: collectionId }] } - const itemsResults = { meta, results: [item] } - const search = sinon.stub() - search.onFirstCall().resolves(collectionResults) - search.onSecondCall().resolves(itemsResults) - const backend = { search } - const actual = await api.searchItems({}, page, limit, backend, 'endpoint') - t.is(search.firstCall.args[2], 1, 'Searches collections with no paging') - t.is(search.firstCall.args[3], 5000, 'Searches collections with arbitrary limit') - - t.is(search.secondCall.args[0].parentCollections[0], collectionId, - 'Searches for items from found collections that meet the search criteria') - - t.deepEqual(actual.features[0].links, itemLinks.links, - 'Adds correct relative STAC links') - - const expectedMeta = { - limit, - page, - found: 1, - returned: 1 - } - t.deepEqual(actual.meta, expectedMeta, 'Adds correct response metadata fields') - t.is(actual.type, 'FeatureCollection', 'Wraps response as FeatureCollection') -}) From ad2ef3e64ff2d9080c6e533f4bf65b8ede5c94b7 Mon Sep 17 00:00:00 2001 From: sharkinsspatial Date: Fri, 25 Jan 2019 17:28:46 -0500 Subject: [PATCH 3/7] Refactoring Elasticsearch queries for flattened collection properties. --- packages/api-lib/libs/es.js | 19 ++++----------- .../tests/fixtures/stac/collection2.json | 2 +- .../api-lib/tests/integration/test_api.js | 24 +++++++++++++++++++ 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/packages/api-lib/libs/es.js b/packages/api-lib/libs/es.js index ac857e0..84e7035 100644 --- a/packages/api-lib/libs/es.js +++ b/packages/api-lib/libs/es.js @@ -262,9 +262,11 @@ function buildDatetimeQuery(parameters) { function buildQuery(parameters) { const eq = 'eq' - const { query, parentCollections, intersects } = parameters + const { query, intersects } = parameters let must = [] if (query) { + // Using reduce rather than map as we don't currently support all + // stac query operators. must = Object.keys(query).reduce((accumulator, property) => { const operatorsObject = query[property] const operators = Object.keys(operatorsObject) @@ -284,6 +286,7 @@ function buildQuery(parameters) { return accumulator }, must) } + if (intersects) { const { geometry } = intersects must.push({ @@ -298,19 +301,7 @@ function buildQuery(parameters) { must.push(datetimeQuery) } - let filter - if (parentCollections && parentCollections.length !== 0) { - filter = { - bool: { - should: [ - { terms: { 'properties.collection': parentCollections } }, - { bool: { must } } - ] - } - } - } else { - filter = { bool: { must } } - } + const filter = { bool: { must } } const queryBody = { constant_score: { filter } } diff --git a/packages/api-lib/tests/fixtures/stac/collection2.json b/packages/api-lib/tests/fixtures/stac/collection2.json index a3d3bda..15a98c4 100644 --- a/packages/api-lib/tests/fixtures/stac/collection2.json +++ b/packages/api-lib/tests/fixtures/stac/collection2.json @@ -220,7 +220,7 @@ "eo:gsd": 15, "eo:instrument": "OLI_TIRS", "eo:off_nadir": 0, - "eo:platform": "landsat-8" + "eo:platform": "platform2" }, "providers": [ { diff --git a/packages/api-lib/tests/integration/test_api.js b/packages/api-lib/tests/integration/test_api.js index f3e7b9f..c83b5ef 100644 --- a/packages/api-lib/tests/integration/test_api.js +++ b/packages/api-lib/tests/integration/test_api.js @@ -151,3 +151,27 @@ test('stac/search sort', async (t) => { }, backend, endpoint) t.is(response.features[0].id, 'LC80100102015082LGN00') }) + +test('stac/search flattened collection properties', async (t) => { + let response = await search('/stac/search', { + query: { + 'eo:platform': { + eq: 'platform2' + } + } + }, backend, endpoint) + t.is(response.features[0].id, 'collection2_item') + + response = await search('/stac/search', { + query: { + 'eo:platform': { + eq: 'landsat-8' + } + } + }, backend, endpoint) + const havePlatform = + response.features.filter( + (item) => (item.properties['eo:platform'] === 'landsat-8') + ) + t.is(havePlatform.length, response.features.length) +}) From 2e8f07ecd591294a66530dacf1d94bcd6eaae3a7 Mon Sep 17 00:00:00 2001 From: sharkinsspatial Date: Fri, 25 Jan 2019 19:19:36 -0500 Subject: [PATCH 4/7] Handle bbox passed as string. Fixes #146. --- packages/api-lib/libs/api.js | 8 +++++++- packages/api-lib/tests/test_api_search.js | 7 ++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/packages/api-lib/libs/api.js b/packages/api-lib/libs/api.js index 0487181..855d448 100644 --- a/packages/api-lib/libs/api.js +++ b/packages/api-lib/libs/api.js @@ -38,7 +38,13 @@ const extractBbox = function (params) { let intersectsGeometry const { bbox } = params if (bbox) { - const boundingBox = extent(bbox) + let bboxArray + if (typeof bbox === 'string') { + bboxArray = JSON.parse(bbox) + } else { + bboxArray = bbox + } + const boundingBox = extent(bboxArray) const geojson = feature(boundingBox.polygon()) intersectsGeometry = geojson } diff --git a/packages/api-lib/tests/test_api_search.js b/packages/api-lib/tests/test_api_search.js index 02f714c..1f6f1b4 100644 --- a/packages/api-lib/tests/test_api_search.js +++ b/packages/api-lib/tests/test_api_search.js @@ -105,8 +105,9 @@ test('search /stac/search bbox parameter', async (t) => { const s = -10 const e = 10 const n = 10 + const bbox = [w, s, e, n] const queryParams = { - bbox: [w, s, e, n], + bbox, page: 1, limit: 1 } @@ -128,6 +129,10 @@ test('search /stac/search bbox parameter', async (t) => { t.deepEqual(search.firstCall.args[0].intersects, expected, 'Converts a [w,s,e,n] bbox to an intersects search parameter') search.resetHistory() + queryParams.bbox = `[${bbox.toString()}]` + await api.search('/stac/search', queryParams, backend, 'endpoint') + t.deepEqual(search.firstCall.args[0].intersects, expected, + 'Converts stringified [w,s,e,n] bbox to an intersects search parameter') }) test('search /stac/search time parameter', async (t) => { From e9a2e1a62f358d820b0ed4b889379812f326f29d Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Fri, 25 Jan 2019 19:59:19 -0500 Subject: [PATCH 5/7] bump version to 0.2.3-b1 and update CHANGELOG --- CHANGELOG.md | 8 ++++++++ lerna.json | 2 +- packages/api-lib/package.json | 2 +- packages/api/package.json | 4 ++-- packages/ingest/package.json | 4 ++-- 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f72f12..7e48838 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,14 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Fixed +- Proper handling of bounding box passed as string + +### Changed +- De-normalize Item properties to include all properties from collection +- Flattened elastic search to simplify query logic +- Items returned will now include all 'Common' properties that are in the Items Collection + ## [v0.2.2] - 2019-01-21 ### Fixed diff --git a/lerna.json b/lerna.json index d484d88..63bd68e 100644 --- a/lerna.json +++ b/lerna.json @@ -1,6 +1,6 @@ { "lerna": "2.11.0", - "version": "0.2.2", + "version": "0.2.3-b1", "npmClient": "yarn", "packages": [ "packages/*" diff --git a/packages/api-lib/package.json b/packages/api-lib/package.json index a4b9c07..c5e2ff9 100644 --- a/packages/api-lib/package.json +++ b/packages/api-lib/package.json @@ -1,6 +1,6 @@ { "name": "@sat-utils/api-lib", - "version": "0.2.2", + "version": "0.2.3-b1", "description": "A library for creating a search API of public Satellites metadata using Elasticsearch", "main": "index.js", "scripts": { diff --git a/packages/api/package.json b/packages/api/package.json index 01afe19..5a3e6fc 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -1,6 +1,6 @@ { "name": "@sat-utils/api", - "version": "0.2.2", + "version": "0.2.3-b1", "description": "The api lambda function for sat-api", "main": "index.js", "repository": { @@ -26,7 +26,7 @@ }, "homepage": "https://github.com/sat-utils/sat-api#readme", "dependencies": { - "@sat-utils/api-lib": "^0.2.2" + "@sat-utils/api-lib": "^0.2.3-b1" }, "devDependencies": { "ava": "^0.25.0", diff --git a/packages/ingest/package.json b/packages/ingest/package.json index c69b500..6e0b796 100644 --- a/packages/ingest/package.json +++ b/packages/ingest/package.json @@ -1,6 +1,6 @@ { "name": "@sat-utils/ingest", - "version": "0.2.2", + "version": "0.2.3-b1", "description": "ingest lambda function of sat-api", "main": "index.js", "bin": { @@ -29,7 +29,7 @@ }, "homepage": "https://github.com/sat-utils/sat-api#readme", "dependencies": { - "@sat-utils/api-lib": "^0.2.2" + "@sat-utils/api-lib": "^0.2.3-b1" }, "devDependencies": { "ava": "^0.25.0", From b8d17634f35504be362bd9921cef0c8f188e0ebc Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Tue, 29 Jan 2019 14:17:26 -0500 Subject: [PATCH 6/7] bump version to 0.2.3 --- lerna.json | 2 +- packages/api-lib/package.json | 2 +- packages/api/package.json | 4 ++-- packages/ingest/package.json | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lerna.json b/lerna.json index 63bd68e..4097e82 100644 --- a/lerna.json +++ b/lerna.json @@ -1,6 +1,6 @@ { "lerna": "2.11.0", - "version": "0.2.3-b1", + "version": "0.2.3", "npmClient": "yarn", "packages": [ "packages/*" diff --git a/packages/api-lib/package.json b/packages/api-lib/package.json index c5e2ff9..e6ce477 100644 --- a/packages/api-lib/package.json +++ b/packages/api-lib/package.json @@ -1,6 +1,6 @@ { "name": "@sat-utils/api-lib", - "version": "0.2.3-b1", + "version": "0.2.3", "description": "A library for creating a search API of public Satellites metadata using Elasticsearch", "main": "index.js", "scripts": { diff --git a/packages/api/package.json b/packages/api/package.json index 5a3e6fc..7a1bc13 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -1,6 +1,6 @@ { "name": "@sat-utils/api", - "version": "0.2.3-b1", + "version": "0.2.3", "description": "The api lambda function for sat-api", "main": "index.js", "repository": { @@ -26,7 +26,7 @@ }, "homepage": "https://github.com/sat-utils/sat-api#readme", "dependencies": { - "@sat-utils/api-lib": "^0.2.3-b1" + "@sat-utils/api-lib": "^0.2.3" }, "devDependencies": { "ava": "^0.25.0", diff --git a/packages/ingest/package.json b/packages/ingest/package.json index 6e0b796..b3a88ff 100644 --- a/packages/ingest/package.json +++ b/packages/ingest/package.json @@ -1,6 +1,6 @@ { "name": "@sat-utils/ingest", - "version": "0.2.3-b1", + "version": "0.2.3", "description": "ingest lambda function of sat-api", "main": "index.js", "bin": { @@ -29,7 +29,7 @@ }, "homepage": "https://github.com/sat-utils/sat-api#readme", "dependencies": { - "@sat-utils/api-lib": "^0.2.3-b1" + "@sat-utils/api-lib": "^0.2.3" }, "devDependencies": { "ava": "^0.25.0", From 1e3f7c981cb6d0bd87de54e6efc41f0f27499b18 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Tue, 29 Jan 2019 14:20:03 -0500 Subject: [PATCH 7/7] update changelog --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e48838..eab9d2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +## [v0.2.3] - 2019-01-29 + ### Fixed - Proper handling of bounding box passed as string @@ -82,7 +84,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Refactor and improve splitting [Unreleased]: https://github.com/sat-utils/sat-api/compare/master...develop -[v0.2.2]: https://github.com/sat-utils/sat-api/compare/v0.2.0...v0.2.2 +[v0.2.3]: https://github.com/sat-utils/sat-api/compare/v0.2.2...v0.2.3 +[v0.2.2]: https://github.com/sat-utils/sat-api/compare/v0.2.1...v0.2.2 [v0.2.1]: https://github.com/sat-utils/sat-api/compare/v0.2.0...v0.2.1 [v0.2.0]: https://github.com/sat-utils/sat-api/compare/v0.1.0...v0.2.0 [v0.1.0]: https://github.com/sat-utils/sat-api/compare/v0.0.2...v0.1.0