diff --git a/CHANGELOG.md b/CHANGELOG.md index db8159976..b1fb3bba9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - 'via' and 'canonical' rel types as options in items. - Added clarification about how collection-level asset object properties do not remove the need for item-level asset object properties in the `item-assets` extension ([#880](https://github.com/radiantearth/stac-spec/pull/880)) - Added [processing extension](extensions/processing/README.md) +- Added [file info extension](extensions/file/README.md) ([#879](https://github.com/radiantearth/stac-spec/pull/879), [#921](https://github.com/radiantearth/stac-spec/issues/921)) - Added additional acquisition parameters in the `sat` extension: sat:platform_international_designator, sat:absolute_orbit, sat:anx_datetime* ([#894](https://github.com/radiantearth/stac-spec/pull/894)) ### Changed @@ -19,11 +20,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Removed +- Checksum extension (field `checksum:multihash`). Use File Info extension (field `file:checksum`) instead. + ### Fixed - Label extension: `label:classes` was flagged as required in JSON Schema, but is only required for categorical data. - Fixed JSON Schema for `providers` (Collections and Items) to be an object and require a `name`. -- JSON Schema for `sar:polarizations` in `assets` fixed +- JSON Schema for `sar:polarizations` in `assets` fixed ## [v1.0.0-beta.2] - 2020-07-08 diff --git a/extensions/README.md b/extensions/README.md index 7a563a5f4..00f95d0b0 100644 --- a/extensions/README.md +++ b/extensions/README.md @@ -46,10 +46,10 @@ An extension can add new fields to STAC entities (content extension), or can add | Extension Title | Identifier | Field Name Prefix | Scope | Maturity | Description | | ------------------------------------------------ | ----------------- | ------------------- | ------------------------- | ---------- | ----------- | -| [Checksum](checksum/README.md) | checksum | checksum | Item, Catalog, Collection | *Proposal* | Provides a way to specify file checksums for assets and links in Items, Catalogs and Collections. | | [Collection Assets](collection-assets/README.md) | collection-assets | - | Collection | *Proposal* | Provides a way to specify assets available on the collection-level. | | [Data Cube](datacube/README.md) | datacube | cube | Item, Collection | *Proposal* | Data Cube related metadata, especially to describe their dimensions. | | [Electro-Optical](eo/README.md) | eo | eo | Item | *Proposal* | Covers electro-optical data that represents a snapshot of the earth for a single date and time. It could consist of multiple spectral bands, for example visible bands, infrared bands, red edge bands and panchromatic bands. The extension provides common fields like bands, cloud cover, gsd and more. | +| [File Info](file/README.md) | file | file | Item, Catalog, Collection | *Proposal* | Provides a way to specify file details such as size, data type and checksum for assets and links in Items, Catalogs and Collections. | | [Item Asset Definition](item-assets/README.md) | item-assets | - | Collection | *Proposal* | Provides a way to specify details about what assets may be found in Items belonging to a collection. | | [Label](label/README.md) | label | label | Item | *Proposal* | Items that relate labeled AOIs with source imagery | | [Point Cloud](pointcloud/README.md) | pointcloud | pc | Item | *Proposal* | Provides a way to describe point cloud datasets. The point clouds can come from either active or passive sensors, and data is frequently acquired using tools such as LiDAR or coincidence-matched imagery. | diff --git a/extensions/checksum/README.md b/extensions/checksum/README.md deleted file mode 100644 index 78a466feb..000000000 --- a/extensions/checksum/README.md +++ /dev/null @@ -1,35 +0,0 @@ -# Checksum Extension Specification - -- **Title: Checksum** -- **Identifier: checksum** -- **Field Name Prefix: checksum** -- **Scope: Item, Catalog, Collection** -- **Extension [Maturity Classification](../README.md#extension-maturity): Proposal** - -Provides a way to specify file checksums (e.g. BLAKE2, MD5, SHA1, SHA2, SHA3) for assets and links in STAC Items, STAC Catalogs and STAC Collections. The hashes are self-identifying hashes as described in the [Multihash specification](https://github.com/multiformats/multihash). - -- [Example](examples/sentinel1.json) -- [JSON Schema](json-schema/schema.json) - -## [`Link Object`](../../item-spec/item-spec.md#link-object) and [`Asset Object`](../../item-spec/item-spec.md#asset-object) fields - -| Field Name | Type | Description | -| ------------------ | ------ | ------------------------------------------------------------ | -| checksum:multihash | string | Multihash for the corresponding file, encoded as hexadecimal (base 16) string with lowercase letters. | - -This extension can OPTIONALLY be used with the [Collection Assets Extension](../collection-assets/README.md). Checksums MUST NOT be part of the [Item Assets Definition](../item-assets/README.md) in Collections. - -### Examples - -Checksum for a text file with file content `test`. - -| Field Name | Algorithm | Example | -| ------------------ | ------------------------------------- | ---------------------------------------------------------------------- | -| checksum:multihash | sha1 (160 bits) | `1114a94a8fe5ccb19ba61c4c0873d391e987982fbbd3` | -| checksum:multihash | sha2 (256 bits) | `12209f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08` | -| checksum:multihash | sha2 (256 bits truncated to 160 bits) | `12149f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b` | -| checksum:multihash | blake2b-128 | `90e4021044a8995dd50b6657a037a7839304535b` | - -## Implementations - -None yet, still in proposal stage. diff --git a/extensions/file/README.md b/extensions/file/README.md new file mode 100644 index 000000000..7b76fa0ef --- /dev/null +++ b/extensions/file/README.md @@ -0,0 +1,63 @@ +# File Info Extension Specification + +- **Title: File Info** +- **Identifier: file** +- **Field Name Prefix: file** +- **Scope: Item, Catalog, Collection** +- **Extension [Maturity Classification](../README.md#extension-maturity): Proposal** + +Provides a way to specify file related details such as checksum, data type and size for assets and links in [STAC Items](../../item-spec/item-spec.md), [STAC Catalogs](../../catalog-spec/catalog-spec.md) and [STAC Collections](../../collection-spec/collection-spec.md). + +- [Example](examples/sentinel1.json) +- [JSON Schema](json-schema/schema.json) + +## *Link Object* and *Asset Object* fields + +The following fields can be used for Links (in the [`Link Object`](../../item-spec/item-spec.md#link-object)) and assets (in the [`Asset Object`](../../item-spec/item-spec.md#asset-object)). + +| Field Name | Type | Description | +| ------------------ | ------ | ------------------------------------------------------------ | +| file:byte_order | string | The byte order of integer values in the file. One of `big-endian` or `little-endian`. | +| file:checksum | string | Provides a way to specify file [checksums](#checksums) (e.g. BLAKE2, MD5, SHA1, SHA2, SHA3). The hashes are self-identifying hashes as described in the [Multihash specification](https://github.com/multiformats/multihash) and must be encoded as hexadecimal (base 16) string with lowercase letters. | +| file:data_type | string | The data type of the file. One of the [data types](#data-types) below. | +| file:header_size | integer | The header [size](#sizes) of the file, specified in bytes. | +| file:size | integer | The file [size](#sizes), specified in bytes. | + +This extension can OPTIONALLY be used with the [Collection Assets Extension](../collection-assets/README.md). +File specific details should not be part of the [Item Assets Definition](../item-assets/README.md) in Collections. + +### Sizes + +Please be aware that the integer values (always unsigned) given for the sizes (especially `file:size`) may exceed the maximum value for the default integer data type in your environment / programming language. In this specification `integer` specifies a integer number without an upper limit. You might need to use other data types to store the values in. For example, files with a size larger than around 2,14 GB would exceed the maximum value for int32 and in JavaScript `BigInt` could be used then. + +### Data Types + +- `int8`: 8-bit integer +- `int16`: 16-bit integer +- `int32`: 32-bit integer +- `int64`: 64-bit integer +- `uint8`: unsigned 8-bit integer (common for 8-bit RGB PNG's) +- `unit16`: unsigned 16-bit integer +- `uint32`: unsigned 32-bit integer +- `uint64`: unsigned 64-bit integer +- `float32`: 32-bit float +- `float64`: 64-big float +- `cint16`: 16-bit complex integer +- `cint32`: 32-bit complex integer +- `cfloat32`: 32-bit complex float +- `cfloat64`: 64-bit complex float + +### Checksums + +`file:checksum` was previously defined in the `checksum` extension and the field name was `checksum:multihash` before STAC v1.0.0-beta.3. The specification of the field has not changed. + +Checksum examples for some algorithms supported by Multihash in `file:checksum`. The examples are given for a text file with file content `test`. + +- Algorithm `sha1` (160 bits): `1114a94a8fe5ccb19ba61c4c0873d391e987982fbbd3` +- Algorithm `sha2` (256 bits): `12209f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08` +- Algorithm `sha2` (256 bits truncated to 160 bits): `12149f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b` +- Algorithm `blake2b-128`: `90e4021044a8995dd50b6657a037a7839304535b` + +## Implementations + +None yet, still in proposal stage. diff --git a/extensions/checksum/examples/sentinel1.json b/extensions/file/examples/sentinel1.json similarity index 71% rename from extensions/checksum/examples/sentinel1.json rename to extensions/file/examples/sentinel1.json index c843058b6..83096de33 100644 --- a/extensions/checksum/examples/sentinel1.json +++ b/extensions/file/examples/sentinel1.json @@ -3,8 +3,8 @@ "type": "Feature", "stac_version": "1.0.0-beta.2", "stac_extensions": [ - "checksum" - ], + "file" + ], "bbox": [-70.275032,-64.72924,-65.087479,-51.105831], "geometry": { "type": "Polygon", @@ -26,32 +26,39 @@ "href": "./annotation/calibration/noise-s1a-ew-grd-hh-20181103t235855-20181103t235955-024430-02ad5d-001.xml", "title": "Calibration Schema", "type": "text/xml", - "checksum:multihash": "90e40210a30d1711e81a4b11ef67b28744321659" + "file:checksum": "90e40210a30d1711e81a4b11ef67b28744321659" }, "calibrations": { "href": "./annotation/calibration/calibration-s1a-ew-grd-hh-20181103t235855-20181103t235955-024430-02ad5d-001.xml", "title": "Noise Schema", "type": "text/xml", - "checksum:multihash": "90e402104fc5351af67db0b8f1746efe421a05e4" + "file:checksum": "90e402104fc5351af67db0b8f1746efe421a05e4" }, "products": { "href": "./annotation/s1a-ew-grd-hh-20181103t235855-20181103t235955-024430-02ad5d-001.xml", "title": "Product Schema", "type": "text/xml", - "checksum:multihash": "90e402107a7f2588a85362b9beea2a12d4514d45" + "file:checksum": "90e402107a7f2588a85362b9beea2a12d4514d45" }, "measurement": { "href": "./measurement/s1a-ew-grd-hh-20181103t235855-20181103t235955-024430-02ad5d-001.tiff", "title": "Measurements", "type": "image/tiff", "sar:polarizations": ["HH"], - "checksum:multihash": "90e40210163700a8a6501eccd00b6d3b44ddaed0" + "file:byte_order": "little-endian", + "file:data_type": "uint16", + "file:size": 209715200, + "file:header_size": 4096, + "file:checksum": "90e40210163700a8a6501eccd00b6d3b44ddaed0" }, "thumbnail": { "href": "./preview/quick-look.png", "title": "Thumbnail", "type": "image/png", - "checksum:multihash": "90e40210f52acd32b09769d3b1871b420789456c" + "file:byte_order": "big-endian", + "file:data_type": "uint8", + "file:size": 146484, + "file:checksum": "90e40210f52acd32b09769d3b1871b420789456c" } }, "links": [ @@ -62,12 +69,12 @@ { "rel": "parent", "href": "https://example.com/collections/sentinel-1", - "checksum:multihash": "11146d97123fd2c02dec9a1b6d3b13136dbe600cf966" + "file:checksum": "11146d97123fd2c02dec9a1b6d3b13136dbe600cf966" }, { "rel": "root", "href": "https://example.com/collections", - "checksum:multihash": "1114fa4b9d69fdddc7c1be7bed9440621400b383b43f" + "file:checksum": "1114fa4b9d69fdddc7c1be7bed9440621400b383b43f" } ] } diff --git a/extensions/checksum/json-schema/schema.json b/extensions/file/json-schema/schema.json similarity index 52% rename from extensions/checksum/json-schema/schema.json rename to extensions/file/json-schema/schema.json index 3d5222ca5..2e8a6e437 100644 --- a/extensions/checksum/json-schema/schema.json +++ b/extensions/file/json-schema/schema.json @@ -1,8 +1,8 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://schemas.stacspec.org/v1.0.0-beta.2/extensions/checksum/json-schema/schema.json#", - "title": "Checksum Extension Specification", - "description": "STAC Checksum Extension to a STAC Item", + "$id": "https://schemas.stacspec.org/v1.0.0-beta.2/extensions/file/json-schema/schema.json#", + "title": "File Info Extension Specification", + "description": "STAC File Info Extension to a STAC Item, Catalog or Collection", "oneOf": [ { "allOf": [ @@ -13,10 +13,10 @@ "$ref": "#/definitions/stac_extensions" }, { - "$ref": "#/definitions/checksum_links" + "$ref": "#/definitions/file_links" }, { - "$ref": "#/definitions/checksum_assets" + "$ref": "#/definitions/file_assets" } ] }, @@ -29,7 +29,7 @@ "$ref": "#/definitions/stac_extensions" }, { - "$ref": "#/definitions/checksum_links" + "$ref": "#/definitions/file_links" } ] }, @@ -42,10 +42,10 @@ "$ref": "#/definitions/stac_extensions" }, { - "$ref": "#/definitions/checksum_links" + "$ref": "#/definitions/file_links" }, { - "$ref": "#/definitions/checksum_assets" + "$ref": "#/definitions/file_assets" } ] } @@ -61,44 +61,82 @@ "type": "array", "contains": { "enum": [ - "checksum", - "https://schemas.stacspec.org/v1.0.0-beta.2/extensions/checksum/json-schema/schema.json" + "file", + "https://schemas.stacspec.org/v1.0.0-beta.2/extensions/file/json-schema/schema.json" ] } } } }, - "checksum_links": { + "file_links": { "type": "object", "properties": { "links": { "type": "array", "items": { - "$ref": "#/definitions/checksums" + "$ref": "#/definitions/file" } } } }, - "checksum_assets": { + "file_assets": { "type": "object", "properties": { "assets": { "type": "object", "patternProperties": { ".+": { - "$ref": "#/definitions/checksums" + "$ref": "#/definitions/file" } } } } }, - "checksums": { + "file": { "type": "object", "properties": { - "checksum:multihash": { + "file:byte_order": { + "type": "string", + "enum": [ + "big-endian", + "little-endian" + ], + "title": "File Byte Order" + }, + "file:checksum": { "type": "string", "pattern": "^[a-f0-9]+$", - "title": "Multihash" + "title": "File Checksum (Multihash)" + }, + "file:data_type": { + "type": "string", + "enum": [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float32", + "float64", + "cint16", + "cint32", + "cfloat32", + "cfloat64" + ], + "title": "File Data Type" + }, + "file:header_size": { + "type": "integer", + "minimum": 0, + "title": "File Header Size" + }, + "file:size": { + "type": "integer", + "minimum": 0, + "title": "File Size" } } }