From 1bb6309bf6d3bb863c89cd6bed26dfcc463db011 Mon Sep 17 00:00:00 2001 From: Ankit Dixit Date: Tue, 24 Sep 2019 17:19:37 +0530 Subject: [PATCH] Kinesis connector --- pom.xml | 55 ++ presto-docs/src/main/sphinx/connector.rst | 1 + .../src/main/sphinx/connector/kinesis.rst | 252 ++++++++ presto-kinesis/etc/catalog/kinesis.properties | 34 ++ presto-kinesis/etc/kinesis/testtable.json | 38 ++ presto-kinesis/pom.xml | 228 +++++++ .../plugin/kinesis/KinesisClientManager.java | 73 +++ .../plugin/kinesis/KinesisClientProvider.java | 31 + .../plugin/kinesis/KinesisColumnHandle.java | 161 +++++ .../plugin/kinesis/KinesisConfig.java | 306 ++++++++++ .../plugin/kinesis/KinesisConnector.java | 84 +++ .../kinesis/KinesisConnectorFactory.java | 95 +++ .../plugin/kinesis/KinesisErrorCode.java | 48 ++ .../kinesis/KinesisFieldValueProvider.java | 46 ++ .../plugin/kinesis/KinesisHandleResolver.java | 53 ++ .../KinesisInternalFieldDescription.java | 97 +++ .../plugin/kinesis/KinesisMetadata.java | 212 +++++++ .../plugin/kinesis/KinesisModule.java | 89 +++ .../plugin/kinesis/KinesisPlugin.java | 40 ++ .../plugin/kinesis/KinesisRecordSet.java | 456 ++++++++++++++ .../kinesis/KinesisRecordSetProvider.java | 78 +++ .../kinesis/KinesisSessionProperties.java | 157 +++++ .../kinesis/KinesisShardCheckpointer.java | 160 +++++ .../plugin/kinesis/KinesisSplit.java | 114 ++++ .../plugin/kinesis/KinesisSplitManager.java | 168 ++++++ .../kinesis/KinesisStreamDescription.java | 82 +++ .../KinesisStreamFieldDescription.java | 155 +++++ .../kinesis/KinesisStreamFieldGroup.java | 59 ++ .../KinesisTableDescriptionSupplier.java | 127 ++++ .../plugin/kinesis/KinesisTableHandle.java | 125 ++++ .../kinesis/KinesisTransactionHandle.java | 22 + .../kinesis/s3config/S3TableConfigClient.java | 214 +++++++ .../plugin/kinesis/TestKinesisConfig.java | 99 ++++ .../plugin/kinesis/TestKinesisPlugin.java | 85 +++ .../TestKinesisTableDescriptionSupplier.java | 121 ++++ .../kinesis/TestMinimalFunctionality.java | 191 ++++++ .../plugin/kinesis/TestRecordAccess.java | 194 ++++++ .../TestingKinesisConnectorFactory.java | 75 +++ .../s3config/TestS3TableConfigClient.java | 123 ++++ .../kinesis/util/EmbeddedKinesisStream.java | 102 ++++ .../util/KinesisTestClientManager.java | 54 ++ .../kinesis/util/MockKinesisClient.java | 561 ++++++++++++++++++ .../plugin/kinesis/util/TestUtils.java | 143 +++++ .../test/resources/decoder/json/event.json | 32 + .../test/resources/decoder/json/message.json | 80 +++ .../tableDescriptions/EmptyTable.json | 5 + .../tableDescriptions/MinimalTable.json | 23 + .../tableDescriptions/SampleTable.json | 23 + .../tableDescriptions/TableWithMessage.json | 23 + presto-server/src/main/provisio/presto.xml | 6 + 50 files changed, 5800 insertions(+) create mode 100644 presto-docs/src/main/sphinx/connector/kinesis.rst create mode 100644 presto-kinesis/etc/catalog/kinesis.properties create mode 100644 presto-kinesis/etc/kinesis/testtable.json create mode 100644 presto-kinesis/pom.xml create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisClientManager.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisClientProvider.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisColumnHandle.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisConfig.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisConnector.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisConnectorFactory.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisErrorCode.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisFieldValueProvider.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisHandleResolver.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisInternalFieldDescription.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisMetadata.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisModule.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisPlugin.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisRecordSet.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisRecordSetProvider.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisSessionProperties.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisShardCheckpointer.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisSplit.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisSplitManager.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisStreamDescription.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisStreamFieldDescription.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisStreamFieldGroup.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisTableDescriptionSupplier.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisTableHandle.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisTransactionHandle.java create mode 100644 presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/s3config/S3TableConfigClient.java create mode 100644 presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestKinesisConfig.java create mode 100644 presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestKinesisPlugin.java create mode 100644 presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestKinesisTableDescriptionSupplier.java create mode 100644 presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestMinimalFunctionality.java create mode 100644 presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestRecordAccess.java create mode 100644 presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestingKinesisConnectorFactory.java create mode 100644 presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/s3config/TestS3TableConfigClient.java create mode 100644 presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/EmbeddedKinesisStream.java create mode 100644 presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/KinesisTestClientManager.java create mode 100644 presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/MockKinesisClient.java create mode 100644 presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/TestUtils.java create mode 100644 presto-kinesis/src/test/resources/decoder/json/event.json create mode 100644 presto-kinesis/src/test/resources/decoder/json/message.json create mode 100644 presto-kinesis/src/test/resources/tableDescriptions/EmptyTable.json create mode 100644 presto-kinesis/src/test/resources/tableDescriptions/MinimalTable.json create mode 100644 presto-kinesis/src/test/resources/tableDescriptions/SampleTable.json create mode 100644 presto-kinesis/src/test/resources/tableDescriptions/TableWithMessage.json diff --git a/pom.xml b/pom.xml index d5bc64e14089..0fd956313896 100644 --- a/pom.xml +++ b/pom.xml @@ -81,6 +81,7 @@ presto-jmx presto-record-decoder presto-kafka + presto-kinesis presto-redis presto-accumulo presto-cassandra @@ -885,6 +886,60 @@ + + com.amazonaws + aws-java-sdk-kinesis + ${dep.aws-sdk.version} + + + joda-time + joda-time + + + commons-logging + commons-logging + + + + + + com.amazonaws + amazon-kinesis-client + 1.6.3 + + + commons-logging + commons-logging + + + commons-lang + commons-lang + + + joda-time + joda-time + + + com.google.protobuf + protobuf-java + + + com.amazonaws + aws-java-sdk + + + com.amazonaws + aws-java-sdk-core + + + + + + com.amazonaws + aws-java-sdk-dynamodb + ${dep.aws-sdk.version} + + com.amazonaws aws-java-sdk-sts diff --git a/presto-docs/src/main/sphinx/connector.rst b/presto-docs/src/main/sphinx/connector.rst index 780f0045625e..5d7887a42da7 100644 --- a/presto-docs/src/main/sphinx/connector.rst +++ b/presto-docs/src/main/sphinx/connector.rst @@ -33,3 +33,4 @@ from different data sources. connector/thrift connector/tpcds connector/tpch + connector/kinesis diff --git a/presto-docs/src/main/sphinx/connector/kinesis.rst b/presto-docs/src/main/sphinx/connector/kinesis.rst new file mode 100644 index 000000000000..0a4e27ddcae4 --- /dev/null +++ b/presto-docs/src/main/sphinx/connector/kinesis.rst @@ -0,0 +1,252 @@ +================= +Kinesis Connector +================= +Kinesis is Amazon's fully managed cloud-based service for real-time processing of large, distributed data streams. + +This connector allows the use of Kinesis streams as tables in Presto, such that each data-blob (message) +in a kinesis stream is presented as a row in Presto. A flexible table mapping approach lets us +treat fields of the messages as columns in the table. + +Under the hood, a Kinesis shard iterator is used to retrieve the records, along with a series of getRecords calls. +The shard iterator starts by default 24 hours before the current time and works its way forward. To be able to query a stream, table mappings are needed. These table definitions can be +stored on Amazon S3 (preferred) or stored in a local directory on each Presto node. + +This connector is a read-only connector. It can only fetch data from kinesis streams, but cannot create streams or push +data into existing streams. + +To configure the Kinesis connector, create a catalog properties file, ``etc/catalog/CATALOG_NAME.properties`` with the following +content and replace the properties as appropriate: + +.. sourcecode:: bash + + connector.name=kinesis + kinesis.access-key=XXXXXX + kinesis.secret-key=XXXXXX + +Configuration Properties +------------------------ +The following configuration properties are available. + ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| Property Name | Description | ++===================================+===========================================================================================================+ +| ``kinesis.default-schema`` | Default schema name for tables | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| ``kinesis.table-description-dir`` | Directory containing table description files | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| ``kinesis.table-descriptions-s3`` | Amazon S3 bucket URL with table description files. Leave blank to read from the directory on the server. | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| ``kinesis.access-key`` | Access key to aws account or blank to use default provider chain | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| ``kinesis.secret-key`` | Secret key to aws account or blank to use default provider chain | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| ``kinesis.hide-internal-columns`` | Controls whether internal columns are part of the table schema or not | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| ``kinesis.aws-region`` | AWS region to be used to read kinesis stream from | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| ``kinesis.batch-size`` | Maximum number of records to return in one batch. Maximum Limit 10000 | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| ``kinesis.fetch-attempts`` | Read attempts made when no records returned and not caught up | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| ``kinesis.max-batches`` | Maximum batches to read from Kinesis in one single query | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| ``kinesis.sleep-time`` | Time for thread to sleep waiting to make next attempt to fetch batch | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| ``kinesis.iter-from-timestamp`` | Begin iterating from a given timestamp instead of the trim horizon (true by default) | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ +| ``kinesis.iter-offset-seconds`` | Number of seconds before current time to start iterating | ++-----------------------------------+-----------------------------------------------------------------------------------------------------------+ + +The configuration properties are described in detail here: + +* ``kinesis.default-schema``: Defines the schema which will contain all tables that were defined without a qualifying schema name. + This property is optional; the default is ``default``. +* ``kinesis.table-description-dir``: References a folder within Presto deployment that holds one or more JSON files (must end with .json) which contain table description files. + This property is optional; the default is ``etc/kinesis``. +* ``kinesis.table-descriptions-s3``: An S3 URL giving the location of the JSON table description files. When this is given, S3 will be used as the source of table description files and table-description-dir is ignored. The S3 bucket and folder will be checked every 10 minutes for updates and changed files. + This property is optional; the default is blank, which means table-description-dir will be the source of the table definitions. +* ``kinesis.access-key``: Defines the access key ID for AWS root account or IAM roles, which is used to sign programmatic requests to AWS Kinesis. + This property is optional; if not defined, connector will try to follow ``Default-Credential-Provider-Chain`` provided by AWS in the following order: + + - Environment Variable: Load credentials from environment variables ``AWS_ACCESS_KEY_ID`` and ``AWS_SECRET_ACCESS_KEY``. + - Java System Variable: Load from java system as ``aws.accessKeyId`` and ``aws.secretKey`` + - Profile Credentials File: Load from file typically located at ~/.aws/credentials + - Instance profile credentials: These credentials can be used on EC2 instances, and are delivered through the Amazon EC2 metadata service. + +* ``kinesis.secret-key``: Defines the secret key for AWS root account or IAM roles, which together with Access Key ID, is used to sign programmatic requests to AWS Kinesis. + This property is optional; if not defined, connector will try to follow Default- Credential-Provider-Chain same as above. +* ``kinesis.aws-region``: Defines AWS Kinesis regional endpoint. Selecting appropriate region may reduce latency in fetching data. + This field is optional; The default region is ``us-east-1`` referring to end point 'kinesis.us-east-1.amazonaws.com'. + + **Amazon Kinesis Regions** + + For each Amazon Kinesis account, the following available regions can be used. + + +----------------+-------------+-------------------+---------------------------------------+ + | Region | Name | Region | Endpoint | + +================+=============+===================+=======================================+ + | us-east-1 | US East | (N. Virginia) | kinesis.us-east-1.amazonaws.com | + +----------------+-------------+-------------------+---------------------------------------+ + | us-west-1 | US West | (N. California) | kinesis.us-west-1.amazonaws.com | + +----------------+-------------+-------------------+---------------------------------------+ + | us-west-2 | US West | (Oregon) | kinesis.us-west-2.amazonaws.com | + +----------------+-------------+-------------------+---------------------------------------+ + | eu-west-1 | EU | (Ireland) | kinesis.eu-west-1.amazonaws.com | + +----------------+-------------+-------------------+---------------------------------------+ + | eu-central-1 | EU | (Frankfurt) | kinesis.eu-central-1.amazonaws.com | + +----------------+-------------+-------------------+---------------------------------------+ + | ap-southeast-1 | Asia Pacific| (Singapore) | kinesis.ap-southeast-1.amazonaws.com | + +----------------+-------------+-------------------+---------------------------------------+ + | ap-southeast-2 | Asia Pacific| (Sydney) | kinesis.ap-southeast-2.amazonaws.com | + +----------------+-------------+-------------------+---------------------------------------+ + | ap-northeast-1 | Asia Pacific| (Tokyo) | kinesis.ap-northeast-1.amazonaws.com | + +----------------+-------------+-------------------+---------------------------------------+ + +* ``kinesis.batch-size``: Defines maximum number of records to return in one request to Kinesis Streams. Maximum Limit is 10000 records. If a value greater than 10000 is specified, will throw InvalidArgumentException. + This field is optional; the default value is 10000. +* ``kinesis.fetch-attempts``: Defines number of attempts made to read a batch from Kinesis Streams when no records are returned and the "millis behind latest" parameter shows we are not yet caught up. When records are returned no additional attempts are necessary. + It has been found that sometimes GetRecordResult returns empty records, when shard is not empty. That is why multiple attempts need to be made. + This field is optional; the default value is 2. +* ``kinesis.max-batches``: The maximum number of batches to read in a single query. The default value is 1000. +* ``kinesis.sleep-time``: Defines the milliseconds for which thread needs to sleep between get-record-attempts made to fetch data. The quantity should be followed by 'ms' string. + This field is optional; the default value is 1000ms. +* ``iter-from-timestamp``: Use an initial shard iterator type of AT_TIMESTAMP starting iterOffsetSeconds before the current time. When this is false, an iterator type of TRIM_HORIZON will be used, meaning it will start from the oldest record in the stream. + The default is true. +* ``iter-offset-seconds``: When iterFromTimestamp is true, the shard iterator will start at ``iter-offset-seconds`` before the current time. + The default is 86400 seconds or 24 hours. +* ``kinesis.hide-internal-columns``: In addition to the data columns defined in a table description file, the connector maintains a number of additional columns for each table. If these columns are hidden, they can still be used in queries but do not show up in ``DESCRIBE `` or ``SELECT *``. + This property is optional; the default is true. + +Internal Columns +---------------- +For each defined table, the connector maintains the following columns: + ++-----------------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column name | Type | Description | ++=======================+==========+======================================================================================================================================================================================+ +| ``_shard_id`` | VARCHAR | ID of the Kinesis stream shard which contains this row | ++-----------------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``_shard_sequence_id``| VARCHAR | Sequence id within the Kinesis shard for this row | ++-----------------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``_segment_start`` | BIGINT | Lowest offset in the segment (inclusive) which contains this row. This offset is partition specific. | ++-----------------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``_segment_end`` | BIGINT | Highest offset in the segment (exclusive) which contains this row. The offset is partition specific. This is the same value as ``_segment_start`` of the next segment (if it exists).| ++-----------------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``_segment_count`` | BIGINT | Running count for the current row within the segment. For an uncompacted topic, ``_segment_start + _segment_count`` is equal to ``_partition_offset``. | ++-----------------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``_message_valid`` | BOOLEAN | True if the decoder could decode the message successfully for this row. When false, data columns mapped from the message should be treated as invalid. | ++-----------------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``_message`` | VARCHAR | Message bytes as an UTF-8 encoded string. This is only useful for a text topic. | ++-----------------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``_message_length`` | BIGINT | Number of bytes in the message. | ++-----------------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``_message_timestamp``| TIMESTAMP| Approximate arrival time of the message (milliseconds granularity). | ++-----------------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``_key`` | VARCHAR | Key bytes as an UTF-8 encoded string. This is only useful for textual keys. | ++-----------------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``_partition_key`` | VARCHAR | Partition Key bytes as an UTF-8 encoded string | ++-----------------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +For tables without a table definition file, the _message_valid column will always be true. + +Table Definition +---------------- +A table definition file consists of a JSON definition for a table, which corresponds to one stream in Kinesis. The name of the file can be arbitrary but must end in .json. The structure of the table definition is as follows: + +.. sourcecode:: bash + + { + "tableName": ..., + "schemaName": ..., + "key": { + "dataFormat": ..., + "fields": [ + ... + ] + }, + "value": { + "dataFormat": ..., + "fields": [ + ... + ] + } + } + ++---------------+---------+-------------+----------------------------------------------------------------------------------------------------------------------+ +| Field | Required| Type | Description | ++===============+=========+=============+======================================================================================================================+ +| ``tableName`` | required| string | Presto table name defined by this file. | ++---------------+---------+-------------+----------------------------------------------------------------------------------------------------------------------+ +| ``schemaName``| optional| string | Schema which will contain the table. If omitted, the default schema name is used. | ++---------------+---------+-------------+----------------------------------------------------------------------------------------------------------------------+ +| ``streamName``| required| string | Name of the Kinesis Stream that is mapped | ++---------------+---------+-------------+----------------------------------------------------------------------------------------------------------------------+ +| ``message`` | optional| JSON object | Field definitions for data columns mapped to the message itself. | ++---------------+---------+-------------+----------------------------------------------------------------------------------------------------------------------+ + +Every message in a Kinesis stream can be decoded using the definition provided in the message object. The json object message in the table definition contains two fields. + ++---------------+---------+------------+----------------------------------------------------------------------------------------------------------------------+ +| Field | Required| Type | Description | ++===============+=========+============+======================================================================================================================+ +| ``dataFormat``| required| string | Selects the decoder for this group of fields. | ++---------------+---------+------------+----------------------------------------------------------------------------------------------------------------------+ +| ``fields`` | required| JSON array | A list of field definitions. Each field definition creates a new column in the Presto table. | ++---------------+---------+------------+----------------------------------------------------------------------------------------------------------------------+ + +Each field definition is a JSON object. At a minimum, you'll want to provide a name, type, and a mapping. The overall structure looks like this. + +.. sourcecode:: bash + + { + "name": ..., + "type": ..., + "dataFormat": ..., + "mapping": ..., + "formatHint": ..., + "hidden": ..., + "comment": ... + } + ++---------------+---------+---------+----------------------------------------------------------------------------------------------------------------------+ +| Field | Required| Type | Description | ++===============+=========+=========+======================================================================================================================+ +| ``name`` | required| string | Name of the column in the Presto table. | ++---------------+---------+---------+----------------------------------------------------------------------------------------------------------------------+ +| ``type`` | required| string | Presto type of the column. | ++---------------+---------+---------+----------------------------------------------------------------------------------------------------------------------+ +| ``dataFormat``| optional| string | Selects the column decoder for this field. Defaults to the default decoder for this row data format and column type. | ++---------------+---------+---------+----------------------------------------------------------------------------------------------------------------------+ +| ``mapping`` | optional| string | Mapping information for the column. This is decoder specific, see below. | ++---------------+---------+---------+----------------------------------------------------------------------------------------------------------------------+ +| ``formatHint``| optional| string | Sets a column specific format hint to the column decoder. | ++---------------+---------+---------+----------------------------------------------------------------------------------------------------------------------+ +| ``hidden`` | optional| boolean | Hides the column from ``DESCRIBE `` and ``SELECT *``. Defaults to ``false``. | ++---------------+---------+---------+----------------------------------------------------------------------------------------------------------------------+ +| ``comment`` | optional| string | Adds a column comment which is shown with ``DESCRIBE
``. | ++---------------+---------+---------+----------------------------------------------------------------------------------------------------------------------+ + +The name field is exposed to presto as the column name, while the mapping field is the portion of the message that gets +mapped to that column. For JSON object messages, this refers to the field name of an object, and can be a path that drills +into the object structure of the message. Additionally, you can map a field of the JSON object to a string column type, +and if it is a more complex type (JSON array or JSON object) then the JSON itself will become the field value. + +There is no limit on field descriptions for either key or message. + +Developer Setup +--------------- +Add the following system properties to run the test cases. + +* kinesis.awsAccessKey +* kinesis.awsSecretKey +* kinesis.tableDescriptionS3 + + .. sourcecode:: bash + + + NONE + NONE + + s3://sample-bucket/unit-test/presto-kinesis + + \ No newline at end of file diff --git a/presto-kinesis/etc/catalog/kinesis.properties b/presto-kinesis/etc/catalog/kinesis.properties new file mode 100644 index 000000000000..17310e69c39a --- /dev/null +++ b/presto-kinesis/etc/catalog/kinesis.properties @@ -0,0 +1,34 @@ +# Configuration for the kinesis connector +# +# This is a sample configuration file for the Kinesis connector. +# The values given here are the defaults. + +# Connector name, usually keep this as kinesis +connector.name=kinesis + +kinesis.default-schema=default +kinesis.aws-region=us-east-1 +kinesis.hide-internal-columns=false +kinesis.batch-size=10000 +kinesis.max-batches=100 +kinesis.fetch-attempts=2 +kinesis.sleep-time=1000ms + +# Use an initial shard iterator type of AT_TIMESTAMP starting +# iteratorOffsetSeconds before the current time +kinesis.iterator-from-timestamp=true +kinesis.iterator-offset-seconds=86400 +# info log one line each time a Kinesis batch is read with some useful info +#kinesis.log-batches=true + +# enable query checkpointing via Dynamo DB (configure other +# properties below if needed) +kinesis.checkpoint-enabled=false + +# properties related to checkpointing with Dynamo DB +#kinesis.dynamo-read-capacity=50 +#kinesis.dynamo-write-capacity=10 +#kinesis.checkpoint-interval-ms=60000ms +#kinesis.checkpoint-logical-name=process1 +#kinesis.iterator-number=0 + diff --git a/presto-kinesis/etc/kinesis/testtable.json b/presto-kinesis/etc/kinesis/testtable.json new file mode 100644 index 000000000000..bcd9cd18a5b6 --- /dev/null +++ b/presto-kinesis/etc/kinesis/testtable.json @@ -0,0 +1,38 @@ +{ + "tableName": "test_table", + "schemaName": "prod", + "streamName": "test_kinesis_stream", + "message": { + "dataFormat": "json", + "fields": [ + { + "name": "client_id", + "type": "BIGINT", + "mapping": "client_id", + "comment": "The client ID field" + }, + { + "name": "acct_balance", + "type": "DOUBLE", + "mapping": "acct_balance", + "comment": "Current account balance" + }, + { + "name": "service_type", + "mapping": "service_type", + "type": "VARCHAR(20)" + }, + { + "name": "signup_date", + "mapping": "signup_date", + "type": "DATE", + "dataFormat": "iso8601" + } + ] + }, + "comment" : "This test adds some extra fields to make sure they are ignored and don't cause issues.", + "client_metadata" : { + "name" : "Sample Query", + "query" : "select client_id, service_type, signup_date, _shard_id, _message_length from prod.test_table" + } +} \ No newline at end of file diff --git a/presto-kinesis/pom.xml b/presto-kinesis/pom.xml new file mode 100644 index 000000000000..dc67e898f408 --- /dev/null +++ b/presto-kinesis/pom.xml @@ -0,0 +1,228 @@ + + + 4.0.0 + + io.prestosql + presto-root + 320-SNAPSHOT + + + presto-kinesis + Presto - Kinesis Connector + presto-plugin + + + ${project.parent.basedir} + + 1.11.12 + + + + + + io.airlift + bootstrap + + + + io.airlift + json + + + + io.airlift + log + + + + io.airlift + configuration + + + + com.google.guava + guava + + + + com.google.inject + guice + + + + com.fasterxml.jackson.core + jackson-databind + + + + javax.inject + javax.inject + + + + javax.annotation + javax.annotation-api + + + + javax.validation + validation-api + + + + com.amazonaws + aws-java-sdk-kinesis + + + + com.amazonaws + amazon-kinesis-client + + + + com.amazonaws + aws-java-sdk-dynamodb + + + + com.amazonaws + aws-java-sdk-core + + + joda-time + joda-time + + + commons-logging + commons-logging + + + + + + com.amazonaws + aws-java-sdk-s3 + ${dep.aws-sdk.version} + + + joda-time + joda-time + + + commons-logging + commons-logging + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + + + + + org.openjdk.jol + jol-core + provided + + + + io.prestosql + presto-spi + provided + + + + io.airlift + slice + provided + + + + io.airlift + units + provided + + + + com.fasterxml.jackson.core + jackson-annotations + provided + + + + com.google.code.findbugs + jsr305 + + + + + org.testng + testng + test + + + + io.prestosql + presto-tests + test + + + + io.prestosql + presto-record-decoder + + + + io.prestosql + presto-main + test + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + **/TestMinimalFunctionality.java + **/TestS3TableConfigClient.java + + + ACCESS-KEY + SECRET-KEY + s3://S3-LOC + + + + + + + + + test-kinesis + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + + + + + + diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisClientManager.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisClientManager.java new file mode 100644 index 000000000000..d8b865202245 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisClientManager.java @@ -0,0 +1,73 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; +import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; +import com.amazonaws.services.kinesis.AmazonKinesisClient; +import com.amazonaws.services.s3.AmazonS3Client; +import com.google.inject.Inject; + +import static com.google.common.base.Strings.isNullOrEmpty; + +/** + * Creates and manages AWS clients for this connector. + *

+ * Note: credentials can be supplied explicitly through the configuration. However when these are + * omitted, the default AWS provider chain is used (which includes instance profile credentials). + */ +public class KinesisClientManager + implements KinesisClientProvider +{ + private final AmazonKinesisClient client; + private final AmazonS3Client amazonS3Client; + private final AmazonDynamoDBClient dynamoDbClient; // for Checkpointing + + @Inject + public KinesisClientManager(KinesisConfig config) + { + if (!isNullOrEmpty(config.getAccessKey()) && !isNullOrEmpty(config.getSecretKey())) { + BasicAWSCredentials awsCredentials = new BasicAWSCredentials(config.getAccessKey(), config.getSecretKey()); + this.client = new AmazonKinesisClient(awsCredentials); + this.amazonS3Client = new AmazonS3Client(awsCredentials); + this.dynamoDbClient = new AmazonDynamoDBClient(awsCredentials); + } + else { + DefaultAWSCredentialsProviderChain defaultChain = new DefaultAWSCredentialsProviderChain(); + this.client = new AmazonKinesisClient(defaultChain); + this.amazonS3Client = new AmazonS3Client(defaultChain); + this.dynamoDbClient = new AmazonDynamoDBClient(defaultChain); + } + + this.client.setEndpoint("kinesis." + config.getAwsRegion() + ".amazonaws.com"); + this.dynamoDbClient.setEndpoint("dynamodb." + config.getAwsRegion() + ".amazonaws.com"); + } + + @Override + public AmazonKinesisClient getClient() + { + return client; + } + + public AmazonDynamoDBClient getDynamoDbClient() + { + return dynamoDbClient; + } + + public AmazonS3Client getS3Client() + { + return amazonS3Client; + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisClientProvider.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisClientProvider.java new file mode 100644 index 000000000000..665c463cd2ab --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisClientProvider.java @@ -0,0 +1,31 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; +import com.amazonaws.services.kinesis.AmazonKinesisClient; +import com.amazonaws.services.s3.AmazonS3Client; + +/** + * Interface to a client manager that provides the AWS clients needed. + */ +//TODO: This interface needs to be removed and abstraction in unneccesary +public interface KinesisClientProvider +{ + AmazonKinesisClient getClient(); + + AmazonDynamoDBClient getDynamoDbClient(); + + AmazonS3Client getS3Client(); +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisColumnHandle.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisColumnHandle.java new file mode 100644 index 000000000000..7b570b53feb8 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisColumnHandle.java @@ -0,0 +1,161 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.prestosql.decoder.DecoderColumnHandle; +import io.prestosql.spi.connector.ColumnMetadata; +import io.prestosql.spi.type.Type; + +import javax.annotation.Nullable; + +import java.util.Objects; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + +//TODO: Use Optional for nullable fields, changes to be done across Kafka and Redis too +public class KinesisColumnHandle + implements DecoderColumnHandle +{ + private final int ordinalPosition; + private final String name; + private final Type type; + private final String mapping; + + private final String dataFormat; // Data format to use (selects the decoder). Can be null. + private final String formatHint; // Additional format hint for the selected decoder. Selects a decoder subtype (e.g. which timestamp decoder). + private final boolean hidden; + private final boolean internal; + + //TODO: use Optional and check that Optional wrapper passed in is not null, across Kafka and Redis too + @JsonCreator + public KinesisColumnHandle( + @JsonProperty("ordinalPosition") int ordinalPosition, + @JsonProperty("name") String name, + @JsonProperty("type") Type type, + @JsonProperty("mapping") String mapping, + @JsonProperty("dataFormat") String dataFormat, + @JsonProperty("formatHint") String formatHint, + @JsonProperty("hidden") boolean hidden, + @JsonProperty("internal") boolean internal) + { + this.ordinalPosition = ordinalPosition; + this.name = requireNonNull(name, "name is null"); + this.type = requireNonNull(type, "type is null"); + this.mapping = mapping; + this.dataFormat = dataFormat; + this.formatHint = formatHint; + this.hidden = hidden; + this.internal = internal; + } + + @JsonProperty + public int getOrdinalPosition() + { + return ordinalPosition; + } + + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public Type getType() + { + return type; + } + + @Nullable + @JsonProperty + public String getMapping() + { + return mapping; + } + + @Nullable + @JsonProperty + public String getDataFormat() + { + return dataFormat; + } + + @JsonProperty + public String getFormatHint() + { + return formatHint; + } + + @JsonProperty + public boolean isHidden() + { + return hidden; + } + + @JsonProperty + public boolean isInternal() + { + return internal; + } + + ColumnMetadata getColumnMetadata() + { + return new ColumnMetadata(name, type, null, hidden); // name/type/comment/hidden + } + + @Override + public int hashCode() + { + return Objects.hash(ordinalPosition, name, type, mapping, dataFormat, formatHint, hidden, internal); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + + KinesisColumnHandle other = (KinesisColumnHandle) obj; + return Objects.equals(this.ordinalPosition, other.ordinalPosition) && + Objects.equals(this.name, other.name) && + Objects.equals(this.type, other.type) && + Objects.equals(this.mapping, other.mapping) && + Objects.equals(this.dataFormat, other.dataFormat) && + Objects.equals(this.formatHint, other.formatHint) && + Objects.equals(this.hidden, other.hidden) && + Objects.equals(this.internal, other.internal); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("ordinalPosition", ordinalPosition) + .add("name", name) + .add("type", type) + .add("mapping", mapping) + .add("dataFormat", dataFormat) + .add("formatHint", formatHint) + .add("hidden", hidden) + .add("internal", internal) + .toString(); + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisConfig.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisConfig.java new file mode 100644 index 000000000000..2be80c1fbf19 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisConfig.java @@ -0,0 +1,306 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import io.airlift.configuration.Config; +import io.airlift.configuration.ConfigDescription; +import io.airlift.units.Duration; + +import javax.validation.constraints.Max; +import javax.validation.constraints.Min; +import javax.validation.constraints.NotNull; + +import java.util.concurrent.TimeUnit; + +public class KinesisConfig +{ + private String defaultSchema = "default"; + private String tableDescriptionLocation = "etc/kinesis/"; + private boolean hideInternalColumns = true; + private String awsRegion = "us-east-1"; + private int batchSize = 10000; + private int maxBatches = 600; + private int fetchAttempts = 2; + private Duration sleepTime = new Duration(1000, TimeUnit.MILLISECONDS); + private boolean isIteratorFromTimestamp = true; + private long iteratorOffsetSeconds = 86400; + private String accessKey; + private String secretKey; + private boolean logKinesisBatches = true; + private boolean checkpointEnabled; + private long dynamoReadCapacity = 50L; + private long dynamoWriteCapacity = 10L; + private Duration checkpointInterval = new Duration(60000, TimeUnit.MILLISECONDS); + private String logicalProcessName = "process1"; + private int iteratorNumber; + + @NotNull + public String getTableDescriptionLocation() + { + return tableDescriptionLocation; + } + + @Config("kinesis.table-description-location") + @ConfigDescription("S3 or local filesystem directory location where table schema descriptions are present") + public KinesisConfig setTableDescriptionLocation(String tableDescriptionLocation) + { + this.tableDescriptionLocation = tableDescriptionLocation; + return this; + } + + public boolean isHideInternalColumns() + { + return hideInternalColumns; + } + + @Config("kinesis.hide-internal-columns") + @ConfigDescription("Toggle to decide whether to show Kinesis internal columns or not") + public KinesisConfig setHideInternalColumns(boolean hideInternalColumns) + { + this.hideInternalColumns = hideInternalColumns; + return this; + } + + @NotNull + public String getDefaultSchema() + { + return defaultSchema; + } + + @Config("kinesis.default-schema") + @ConfigDescription("Sets default schema for kinesis catalogs") + public KinesisConfig setDefaultSchema(String defaultSchema) + { + this.defaultSchema = defaultSchema; + return this; + } + + public String getAccessKey() + { + return this.accessKey; + } + + @Config("kinesis.access-key") + @ConfigDescription("S3 Access Key to access s3 locations") + public KinesisConfig setAccessKey(String accessKey) + { + this.accessKey = accessKey; + return this; + } + + public String getSecretKey() + { + return this.secretKey; + } + + @Config("kinesis.secret-key") + @ConfigDescription("S3 Secret Key to access s3 locations") + public KinesisConfig setSecretKey(String secretKey) + { + this.secretKey = secretKey; + return this; + } + + public String getAwsRegion() + { + return awsRegion; + } + + @Config("kinesis.aws-region") + @ConfigDescription("Region to set while creating S3 client") + public KinesisConfig setAwsRegion(String awsRegion) + { + this.awsRegion = awsRegion; + return this; + } + + @Min(1) + @Max(Integer.MAX_VALUE) + public int getBatchSize() + { + return this.batchSize; + } + + @Config("kinesis.batch-size") + @ConfigDescription("Limit maximum number of rows to return in a batch") + public KinesisConfig setBatchSize(int batchSize) + { + this.batchSize = batchSize; + return this; + } + + @Min(1) + public int getMaxBatches() + { + return this.maxBatches; + } + + @Min(1) + @Config("kinesis.max-batches") + @ConfigDescription("Maximum number of calls to Kinesis per query") + public KinesisConfig setMaxBatches(int maxBatches) + { + this.maxBatches = maxBatches; + return this; + } + + @Min(1) + @Max(1000) + public int getFetchAttempts() + { + return this.fetchAttempts; + } + + @Min(1) + @Max(1000) + @Config("kinesis.fetch-attempts") + @ConfigDescription("Maximum number of attempts to fetch the next batch from a shard iterator") + public KinesisConfig setFetchAttempts(int fetchAttempts) + { + this.fetchAttempts = fetchAttempts; + return this; + } + + public Duration getSleepTime() + { + return this.sleepTime; + } + + @Config("kinesis.sleep-time") + @ConfigDescription("Sleep time between fetch attempt retries") + public KinesisConfig setSleepTime(Duration sleepTime) + { + this.sleepTime = sleepTime; + return this; + } + + public boolean isLogBatches() + { + return logKinesisBatches; + } + + @Config("kinesis.log-batches") + @ConfigDescription("Decides whether to log batch fetch details") + public KinesisConfig setLogBatches(boolean logBatches) + { + this.logKinesisBatches = logBatches; + return this; + } + + public boolean isIteratorFromTimestamp() + { + return isIteratorFromTimestamp; + } + + @Config("kinesis.iterator-from-timestamp") + @ConfigDescription("Whether to use start timestamp from shard iterator") + public KinesisConfig setIteratorFromTimestamp(boolean isIteratorFromTimestamp) + { + this.isIteratorFromTimestamp = isIteratorFromTimestamp; + return this; + } + + public long getIteratorOffsetSeconds() + { + return iteratorOffsetSeconds; + } + + @Config("kinesis.iterator-offset-seconds") + @ConfigDescription("Seconds before current time to start fetching records from") + public KinesisConfig setIteratorOffsetSeconds(long iteratorOffsetSeconds) + { + this.iteratorOffsetSeconds = iteratorOffsetSeconds; + return this; + } + + public boolean isCheckpointEnabled() + { + return checkpointEnabled; + } + + @Config("kinesis.checkpoint-enabled") + @ConfigDescription("Whether to remember last read sequence number and use it in later requests") + public KinesisConfig setCheckpointEnabled(boolean checkpointEnabled) + { + this.checkpointEnabled = checkpointEnabled; + return this; + } + + public long getDynamoReadCapacity() + { + return dynamoReadCapacity; + } + + @Config("kinesis.dynamo-read-capacity") + @ConfigDescription("DynamoDB read capacity to be set in client") + public KinesisConfig setDynamoReadCapacity(long dynamoReadCapacity) + { + this.dynamoReadCapacity = dynamoReadCapacity; + return this; + } + + public long getDynamoWriteCapacity() + { + return dynamoWriteCapacity; + } + + @Config("kinesis.dynamo-write-capacity") + @ConfigDescription("DynamoDB read capacity to be set in client") + public KinesisConfig setDynamoWriteCapacity(long dynamoWriteCapacity) + { + this.dynamoWriteCapacity = dynamoWriteCapacity; + return this; + } + + public Duration getCheckpointInterval() + { + return checkpointInterval; + } + + @Config("kinesis.checkpoint-interval") + @ConfigDescription("Intervals at which to checkpoint shard iterator details") + public KinesisConfig setCheckpointInterval(Duration checkpointInterval) + { + this.checkpointInterval = checkpointInterval; + return this; + } + + public String getLogicalProcessName() + { + return logicalProcessName; + } + + @Config("kinesis.checkpoint-logical-name") + @ConfigDescription("Prefix to the checkpoint name") + public KinesisConfig setLogicalProcessName(String logicalPrcessName) + { + this.logicalProcessName = logicalPrcessName; + return this; + } + + @Min(0) + public int getIteratorNumber() + { + return iteratorNumber; + } + + @Min(0) + @Config("kinesis.iterator-number") + @ConfigDescription("Checkpoint iteration number") + public KinesisConfig setIteratorNumber(int iteratorNumber) + { + this.iteratorNumber = iteratorNumber; + return this; + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisConnector.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisConnector.java new file mode 100644 index 000000000000..ca3447691a37 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisConnector.java @@ -0,0 +1,84 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.google.common.collect.ImmutableList; +import com.google.inject.Inject; +import io.prestosql.spi.connector.Connector; +import io.prestosql.spi.connector.ConnectorMetadata; +import io.prestosql.spi.connector.ConnectorRecordSetProvider; +import io.prestosql.spi.connector.ConnectorSplitManager; +import io.prestosql.spi.connector.ConnectorTransactionHandle; +import io.prestosql.spi.session.PropertyMetadata; +import io.prestosql.spi.transaction.IsolationLevel; + +import java.util.List; + +import static io.prestosql.spi.transaction.IsolationLevel.READ_COMMITTED; +import static io.prestosql.spi.transaction.IsolationLevel.checkConnectorSupports; +import static java.util.Objects.requireNonNull; + +public class KinesisConnector + implements Connector +{ + private final KinesisMetadata metadata; + private final KinesisSplitManager splitManager; + private final KinesisRecordSetProvider recordSetProvider; + + private final List> propertyList; + + @Inject + public KinesisConnector( + KinesisMetadata metadata, + KinesisSplitManager splitManager, + KinesisRecordSetProvider recordSetProvider, + KinesisSessionProperties properties) + { + this.metadata = requireNonNull(metadata, "metadata is null"); + this.splitManager = requireNonNull(splitManager, "splitManager is null"); + this.recordSetProvider = requireNonNull(recordSetProvider, "recordSetProvider is null"); + this.propertyList = ImmutableList.copyOf(properties.getSessionProperties()); + } + + @Override + public ConnectorMetadata getMetadata(ConnectorTransactionHandle transactionHandle) + { + return metadata; + } + + @Override + public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean b) + { + checkConnectorSupports(READ_COMMITTED, isolationLevel); + return KinesisTransactionHandle.INSTANCE; + } + + @Override + public ConnectorSplitManager getSplitManager() + { + return splitManager; + } + + @Override + public ConnectorRecordSetProvider getRecordSetProvider() + { + return recordSetProvider; + } + + @Override + public List> getSessionProperties() + { + return propertyList; + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisConnectorFactory.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisConnectorFactory.java new file mode 100644 index 000000000000..00a186db79ca --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisConnectorFactory.java @@ -0,0 +1,95 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.google.common.collect.ImmutableMap; +import com.google.inject.Injector; +import com.google.inject.Scopes; +import com.google.inject.TypeLiteral; +import io.airlift.bootstrap.Bootstrap; +import io.airlift.json.JsonModule; +import io.prestosql.spi.NodeManager; +import io.prestosql.spi.connector.Connector; +import io.prestosql.spi.connector.ConnectorContext; +import io.prestosql.spi.connector.ConnectorFactory; +import io.prestosql.spi.connector.ConnectorHandleResolver; +import io.prestosql.spi.connector.SchemaTableName; +import io.prestosql.spi.type.TypeManager; + +import java.util.Map; +import java.util.function.Supplier; + +import static com.google.common.base.Throwables.throwIfUnchecked; +import static java.util.Objects.requireNonNull; + +/** + * This factory class creates the KinesisConnector during server start and binds all the dependency + * by calling create() method. + */ +public class KinesisConnectorFactory + implements ConnectorFactory +{ + private Map optionalConfig = ImmutableMap.of(); + + public KinesisConnectorFactory() + { + //TODO: Remove this when once aws version is upgraded to latest + System.setProperty("com.amazonaws.sdk.disableCbor", "true"); + } + + @Override + public String getName() + { + return "kinesis"; + } + + @Override + public ConnectorHandleResolver getHandleResolver() + { + return new KinesisHandleResolver(); + } + + @Override + public Connector create(String catalogName, Map config, ConnectorContext context) + { + requireNonNull(catalogName, "catalogName is null"); + requireNonNull(config, "config is null"); + + try { + Bootstrap app = new Bootstrap( + new JsonModule(), + new KinesisModule(), + binder -> { + binder.bind(TypeManager.class).toInstance(context.getTypeManager()); + binder.bind(NodeManager.class).toInstance(context.getNodeManager()); + binder.bind(KinesisHandleResolver.class).toInstance(new KinesisHandleResolver()); + binder.bind(KinesisClientProvider.class).to(KinesisClientManager.class).in(Scopes.SINGLETON); + binder.bind(new TypeLiteral>>() {}).to(KinesisTableDescriptionSupplier.class).in(Scopes.SINGLETON); + }); + + Injector injector = app.strictConfig() + .doNotInitializeLogging() + .setRequiredConfigurationProperties(config) + .setOptionalConfigurationProperties(optionalConfig) + .initialize(); + + KinesisConnector connector = injector.getInstance(KinesisConnector.class); + return connector; + } + catch (Exception e) { + throwIfUnchecked(e); + throw new RuntimeException(e); + } + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisErrorCode.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisErrorCode.java new file mode 100644 index 000000000000..bb1643ed6694 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisErrorCode.java @@ -0,0 +1,48 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import io.prestosql.spi.ErrorCode; +import io.prestosql.spi.ErrorCodeSupplier; +import io.prestosql.spi.ErrorType; + +import static io.prestosql.spi.ErrorType.EXTERNAL; +import static io.prestosql.spi.ErrorType.INTERNAL_ERROR; + +/** + * Kinesis connector specific error codes. + */ +public enum KinesisErrorCode + implements ErrorCodeSupplier +{ + KINESIS_CONVERSION_NOT_SUPPORTED(0, EXTERNAL), + KINESIS_SPLIT_ERROR(1, INTERNAL_ERROR), + KINESIS_METADATA_EXCEPTION(2, INTERNAL_ERROR); + + // Connectors can use error codes starting at EXTERNAL + public static final int StartingErrorCode = 0x0200_0000; + + private final ErrorCode errorCode; + + KinesisErrorCode(int code, ErrorType errorType) + { + errorCode = new ErrorCode(code + StartingErrorCode, name(), errorType); + } + + @Override + public ErrorCode toErrorCode() + { + return errorCode; + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisFieldValueProvider.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisFieldValueProvider.java new file mode 100644 index 000000000000..7b5be0a874fe --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisFieldValueProvider.java @@ -0,0 +1,46 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import io.airlift.slice.Slice; +import io.prestosql.spi.PrestoException; + +import static io.prestosql.plugin.kinesis.KinesisErrorCode.KINESIS_CONVERSION_NOT_SUPPORTED; + +public abstract class KinesisFieldValueProvider +{ + public abstract boolean accept(KinesisColumnHandle columnHanle); + + public boolean getBoolean() + { + throw new PrestoException(KINESIS_CONVERSION_NOT_SUPPORTED, "conversion to boolean not supported"); + } + + public long getLong() + { + throw new PrestoException(KINESIS_CONVERSION_NOT_SUPPORTED, "conversion to long not supported"); + } + + public double getDouble() + { + throw new PrestoException(KINESIS_CONVERSION_NOT_SUPPORTED, "conversion to double not supported"); + } + + public Slice getSlice() + { + throw new PrestoException(KINESIS_CONVERSION_NOT_SUPPORTED, "conversion to Slice not supported"); + } + + public abstract boolean isNull(); +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisHandleResolver.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisHandleResolver.java new file mode 100644 index 000000000000..8cd440c377b0 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisHandleResolver.java @@ -0,0 +1,53 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import io.prestosql.spi.connector.ColumnHandle; +import io.prestosql.spi.connector.ConnectorHandleResolver; +import io.prestosql.spi.connector.ConnectorSplit; +import io.prestosql.spi.connector.ConnectorTableHandle; +import io.prestosql.spi.connector.ConnectorTransactionHandle; + +import javax.inject.Inject; + +public class KinesisHandleResolver + implements ConnectorHandleResolver +{ + @Inject + public KinesisHandleResolver() {} + + @Override + public Class getTableHandleClass() + { + return KinesisTableHandle.class; + } + + @Override + public Class getTransactionHandleClass() + { + return KinesisTransactionHandle.class; + } + + @Override + public Class getColumnHandleClass() + { + return KinesisColumnHandle.class; + } + + @Override + public Class getSplitClass() + { + return KinesisSplit.class; + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisInternalFieldDescription.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisInternalFieldDescription.java new file mode 100644 index 000000000000..a2b6ba540527 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisInternalFieldDescription.java @@ -0,0 +1,97 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import io.prestosql.spi.connector.ColumnMetadata; +import io.prestosql.spi.type.BigintType; +import io.prestosql.spi.type.BooleanType; +import io.prestosql.spi.type.TimestampType; +import io.prestosql.spi.type.Type; +import io.prestosql.spi.type.VarcharType; + +import java.util.Map; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Strings.isNullOrEmpty; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static java.util.Arrays.stream; +import static java.util.Objects.requireNonNull; +import static java.util.function.Function.identity; + +public enum KinesisInternalFieldDescription +{ + SHARD_ID_FIELD("_shard_id", VarcharType.VARCHAR, "Shard Id"), + SEGMENT_START_FIELD("_segment_start", VarcharType.VARCHAR, "Segment start sequence id"), + SEGMENT_END_FIELD("_shard_sequence_id", VarcharType.VARCHAR, "Segment end sequence id"), + SHARD_SEQUENCE_ID_FIELD("_shard_sequence_id_field", BigintType.BIGINT, "Segment start offset"), + SEGMENT_COUNT_FIELD("_segment_count", BigintType.BIGINT, "Running message count per segment"), + MESSAGE_VALID_FIELD("_message_valid", BooleanType.BOOLEAN, "Message data is valid"), + MESSAGE_FIELD("_message", VarcharType.VARCHAR, "Message text"), + MESSAGE_TIMESTAMP("_message_timestamp", TimestampType.TIMESTAMP, "Approximate message arrival timestamp"), + MESSAGE_LENGTH_FIELD("_message_length", BigintType.BIGINT, "Total number of message bytes"), + PARTITION_KEY_FIELD("_partition_key", VarcharType.VARCHAR, "Key text"); + + private static final Map BY_COLUMN_NAME = stream(KinesisInternalFieldDescription.values()) + .collect(toImmutableMap(KinesisInternalFieldDescription::getColumnName, identity())); + + public static KinesisInternalFieldDescription forColumnName(String columnName) + { + KinesisInternalFieldDescription description = BY_COLUMN_NAME.get(columnName); + checkArgument(description != null, "Unknown internal column name %s", columnName); + return description; + } + + private final String columnName; + private final Type type; + private final String comment; + + KinesisInternalFieldDescription( + String columnName, + Type type, + String comment) + { + checkArgument(!isNullOrEmpty(columnName), "name is null or is empty"); + this.columnName = columnName; + this.type = requireNonNull(type, "type is null"); + this.comment = requireNonNull(comment, "comment is null"); + } + + public String getColumnName() + { + return columnName; + } + + public Type getType() + { + return type; + } + + KinesisColumnHandle getColumnHandle(int index, boolean hidden) + { + return new KinesisColumnHandle( + index, + getColumnName(), + getType(), + null, + null, + null, + false, + hidden); + } + + ColumnMetadata getColumnMetadata(boolean hidden) + { + return new ColumnMetadata(columnName, type, comment, hidden); + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisMetadata.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisMetadata.java new file mode 100644 index 000000000000..92e0d14735ce --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisMetadata.java @@ -0,0 +1,212 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.inject.Inject; +import io.airlift.log.Logger; +import io.prestosql.decoder.dummy.DummyRowDecoder; +import io.prestosql.spi.connector.ColumnHandle; +import io.prestosql.spi.connector.ColumnMetadata; +import io.prestosql.spi.connector.ConnectorMetadata; +import io.prestosql.spi.connector.ConnectorSession; +import io.prestosql.spi.connector.ConnectorTableHandle; +import io.prestosql.spi.connector.ConnectorTableMetadata; +import io.prestosql.spi.connector.ConnectorTableProperties; +import io.prestosql.spi.connector.SchemaTableName; +import io.prestosql.spi.connector.SchemaTablePrefix; +import io.prestosql.spi.connector.TableNotFoundException; + +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.Supplier; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.util.Objects.requireNonNull; + +public class KinesisMetadata + implements ConnectorMetadata +{ + private static final Logger log = Logger.get(KinesisMetadata.class); + + private final KinesisHandleResolver handleResolver; + + private final Supplier> tableDescriptionSupplier; + private final Set internalFieldDescriptions; + private final boolean isHideInternalColumns; + + @Inject + public KinesisMetadata( + KinesisConfig kinesisConfig, + KinesisHandleResolver handleResolver, + Supplier> tableDescriptionSupplier, + Set internalFieldDescriptions) + { + requireNonNull(kinesisConfig, "kinesisConfig is null"); + isHideInternalColumns = kinesisConfig.isHideInternalColumns(); + this.handleResolver = requireNonNull(handleResolver, "handleResolver is null"); + this.tableDescriptionSupplier = requireNonNull(tableDescriptionSupplier); + this.internalFieldDescriptions = requireNonNull(internalFieldDescriptions, "internalFieldDescriptions is null"); + } + + @Override + public List listSchemaNames(ConnectorSession session) + { + return tableDescriptionSupplier.get().keySet().stream() + .map(SchemaTableName::getSchemaName) + .collect(toImmutableList()); + } + + @Override + public KinesisTableHandle getTableHandle(ConnectorSession session, SchemaTableName schemaTableName) + { + KinesisStreamDescription table = tableDescriptionSupplier.get().get(schemaTableName); + if (table == null) { + throw new TableNotFoundException(schemaTableName); + } + + return new KinesisTableHandle( + schemaTableName.getSchemaName(), + schemaTableName.getTableName(), + table.getStreamName(), + getDataFormat(table.getMessage())); + } + + @Override + public ConnectorTableMetadata getTableMetadata(ConnectorSession connectorSession, ConnectorTableHandle tableHandle) + { + return getTableMetadata(((KinesisTableHandle) tableHandle).toSchemaTableName()); + } + + @Override + public boolean usesLegacyTableLayouts() + { + return false; + } + + @Override + public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) + { + return new ConnectorTableProperties(); + } + + @Override + public List listTables(ConnectorSession session, Optional schemaName) + { + ImmutableList.Builder builder = ImmutableList.builder(); + for (SchemaTableName tableName : tableDescriptionSupplier.get().keySet()) { + if ((!schemaName.isPresent()) || tableName.getSchemaName().equals(schemaName.get())) { + builder.add(tableName); + } + } + + return builder.build(); + } + + @Override + public Map getColumnHandles(ConnectorSession connectorSession, ConnectorTableHandle tableHandle) + { + KinesisTableHandle kinesisTableHandle = (KinesisTableHandle) tableHandle; + + KinesisStreamDescription kinesisStreamDescription = tableDescriptionSupplier.get().get(kinesisTableHandle.toSchemaTableName()); + if (kinesisStreamDescription == null) { + throw new TableNotFoundException(kinesisTableHandle.toSchemaTableName()); + } + + ImmutableMap.Builder columnHandles = ImmutableMap.builder(); + + int index = 0; + // Note: partition key and related fields are handled by internalFieldDescriptions below + KinesisStreamFieldGroup message = kinesisStreamDescription.getMessage(); + if (message != null) { + List fields = message.getFields(); + if (fields != null) { + for (KinesisStreamFieldDescription kinesisStreamFieldDescription : fields) { + columnHandles.put(kinesisStreamFieldDescription.getName(), kinesisStreamFieldDescription.getColumnHandle(index++)); + } + } + } + + for (KinesisInternalFieldDescription kinesisInternalFieldDescription : internalFieldDescriptions) { + columnHandles.put(kinesisInternalFieldDescription.getColumnName(), kinesisInternalFieldDescription.getColumnHandle(index++, isHideInternalColumns)); + } + + return columnHandles.build(); + } + + @Override + public ColumnMetadata getColumnMetadata(ConnectorSession connectorSession, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) + { + KinesisColumnHandle kinesisColumnHandle = (KinesisColumnHandle) columnHandle; + return kinesisColumnHandle.getColumnMetadata(); + } + + @Override + public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) + { + requireNonNull(prefix, "prefix is null"); + ImmutableMap.Builder> columns = ImmutableMap.builder(); + + // NOTE: prefix.getTableName or prefix.getSchemaName can be null + List tableNames; + if (prefix.getSchema().isPresent() && prefix.getTable().isPresent()) { + tableNames = ImmutableList.of(new SchemaTableName(prefix.getSchema().get(), prefix.getTable().get())); + } + else { + tableNames = listTables(session, null); + } + + for (SchemaTableName tableName : tableNames) { + ConnectorTableMetadata tableMetadata = getTableMetadata(tableName); + if (tableMetadata != null) { + columns.put(tableName, tableMetadata.getColumns()); + } + } + return columns.build(); + } + + private static String getDataFormat(KinesisStreamFieldGroup fieldGroup) + { + return (fieldGroup == null) ? DummyRowDecoder.NAME : fieldGroup.getDataFormat(); + } + + private ConnectorTableMetadata getTableMetadata(SchemaTableName schemaTableName) + { + KinesisStreamDescription kinesisStreamDescription = tableDescriptionSupplier.get().get(schemaTableName); + if (kinesisStreamDescription == null) { + throw new TableNotFoundException(schemaTableName); + } + + ImmutableList.Builder builder = ImmutableList.builder(); + + KinesisStreamFieldGroup message = kinesisStreamDescription.getMessage(); + if (message != null) { + List fields = message.getFields(); + if (fields != null) { + for (KinesisStreamFieldDescription fieldDescription : fields) { + builder.add(fieldDescription.getColumnMetadata()); + } + } + } + + for (KinesisInternalFieldDescription fieldDescription : internalFieldDescriptions) { + builder.add(fieldDescription.getColumnMetadata(isHideInternalColumns)); + } + + return new ConnectorTableMetadata(schemaTableName, builder.build()); + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisModule.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisModule.java new file mode 100644 index 000000000000..5a332c131367 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisModule.java @@ -0,0 +1,89 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.deser.std.FromStringDeserializer; +import com.google.inject.Binder; +import com.google.inject.Module; +import com.google.inject.Scopes; +import com.google.inject.multibindings.Multibinder; +import io.prestosql.decoder.DecoderModule; +import io.prestosql.plugin.kinesis.s3config.S3TableConfigClient; +import io.prestosql.spi.type.Type; +import io.prestosql.spi.type.TypeManager; + +import javax.inject.Inject; + +import static com.google.common.base.Preconditions.checkArgument; +import static io.airlift.configuration.ConfigBinder.configBinder; +import static io.airlift.json.JsonBinder.jsonBinder; +import static io.airlift.json.JsonCodecBinder.jsonCodecBinder; +import static io.prestosql.spi.type.TypeSignature.parseTypeSignature; +import static java.util.Objects.requireNonNull; + +public class KinesisModule + implements Module +{ + @Override + public void configure(Binder binder) + { + // Note: handle resolver handled separately, along with several other classes. + binder.bind(KinesisConnector.class).in(Scopes.SINGLETON); + + binder.bind(KinesisMetadata.class).in(Scopes.SINGLETON); + binder.bind(KinesisSplitManager.class).in(Scopes.SINGLETON); + binder.bind(KinesisRecordSetProvider.class).in(Scopes.SINGLETON); + binder.bind(S3TableConfigClient.class).in(Scopes.SINGLETON); + binder.bind(KinesisSessionProperties.class).in(Scopes.SINGLETON); + + configBinder(binder).bindConfig(KinesisConfig.class); + + jsonBinder(binder).addDeserializerBinding(Type.class).to(TypeDeserializer.class); + jsonCodecBinder(binder).bindJsonCodec(KinesisStreamDescription.class); + + binder.install(new DecoderModule()); + + for (KinesisInternalFieldDescription internalFieldDescription : KinesisInternalFieldDescription.values()) { + bindInternalColumn(binder, internalFieldDescription); + } + } + + private static void bindInternalColumn(Binder binder, KinesisInternalFieldDescription fieldDescription) + { + Multibinder fieldDescriptionBinder = Multibinder.newSetBinder(binder, KinesisInternalFieldDescription.class); + fieldDescriptionBinder.addBinding().toInstance(fieldDescription); + } + + public static final class TypeDeserializer + extends FromStringDeserializer + { + private final TypeManager typeManager; + + @Inject + public TypeDeserializer(TypeManager typeManager) + { + super(Type.class); + this.typeManager = requireNonNull(typeManager, "typeManager is null"); + } + + @Override + protected Type _deserialize(String value, DeserializationContext context) + { + Type type = typeManager.getType(parseTypeSignature(value)); + checkArgument(type != null, "Unknown type %s", value); + return type; + } + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisPlugin.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisPlugin.java new file mode 100644 index 000000000000..2bbffa1a5be4 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisPlugin.java @@ -0,0 +1,40 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.google.common.collect.ImmutableList; +import io.prestosql.spi.Plugin; +import io.prestosql.spi.connector.ConnectorFactory; + +public class KinesisPlugin + implements Plugin +{ + private KinesisConnectorFactory factory; + + public KinesisPlugin() {} + + public KinesisPlugin(KinesisConnectorFactory factory) + { + this.factory = factory; + } + + @Override + public synchronized Iterable getConnectorFactories() + { + if (factory == null) { + this.factory = new KinesisConnectorFactory(); + } + return ImmutableList.of(this.factory); + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisRecordSet.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisRecordSet.java new file mode 100644 index 000000000000..8da564a4dc78 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisRecordSet.java @@ -0,0 +1,456 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; +import com.amazonaws.services.kinesis.model.GetRecordsRequest; +import com.amazonaws.services.kinesis.model.GetRecordsResult; +import com.amazonaws.services.kinesis.model.GetShardIteratorRequest; +import com.amazonaws.services.kinesis.model.GetShardIteratorResult; +import com.amazonaws.services.kinesis.model.Record; +import com.amazonaws.services.kinesis.model.ResourceNotFoundException; +import com.google.common.collect.ImmutableList; +import io.airlift.log.Logger; +import io.airlift.slice.Slice; +import io.airlift.units.Duration; +import io.prestosql.decoder.DecoderColumnHandle; +import io.prestosql.decoder.FieldValueProvider; +import io.prestosql.decoder.RowDecoder; +import io.prestosql.spi.block.Block; +import io.prestosql.spi.connector.ColumnHandle; +import io.prestosql.spi.connector.ConnectorSession; +import io.prestosql.spi.connector.RecordCursor; +import io.prestosql.spi.connector.RecordSet; +import io.prestosql.spi.type.Type; + +import java.nio.ByteBuffer; +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkArgument; +import static io.airlift.units.Duration.nanosSince; +import static io.prestosql.decoder.FieldValueProviders.booleanValueProvider; +import static io.prestosql.decoder.FieldValueProviders.bytesValueProvider; +import static io.prestosql.decoder.FieldValueProviders.longValueProvider; +import static io.prestosql.plugin.kinesis.KinesisSessionProperties.getBatchSize; +import static io.prestosql.plugin.kinesis.KinesisSessionProperties.getCheckpointLogicalName; +import static io.prestosql.plugin.kinesis.KinesisSessionProperties.getIterationNumber; +import static io.prestosql.plugin.kinesis.KinesisSessionProperties.getIteratorOffsetSeconds; +import static io.prestosql.plugin.kinesis.KinesisSessionProperties.getIteratorStartTimestamp; +import static io.prestosql.plugin.kinesis.KinesisSessionProperties.getMaxBatches; +import static io.prestosql.plugin.kinesis.KinesisSessionProperties.isCheckpointEnabled; +import static io.prestosql.plugin.kinesis.KinesisSessionProperties.isIteratorFromTimestamp; +import static java.util.Objects.requireNonNull; + +public class KinesisRecordSet + implements RecordSet +{ + /** + * Indicates how close to current we want to be before stopping the fetch of records in a query. + */ + public static final int MILLIS_BEHIND_LIMIT = 10000; + + private static final Logger log = Logger.get(KinesisRecordSet.class); + + private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; + + private final KinesisSplit split; + private final ConnectorSession session; + private final KinesisClientProvider clientManager; + + private final RowDecoder messageDecoder; + + private final List columnHandles; + private final List columnTypes; + + private final int batchSize; + private final int maxBatches; + private final int fetchAttempts; + private final long sleepTime; + + private final boolean isLogBatches; + + //for checkpointing + private final boolean checkpointEnabled; + private String lastReadSequenceNumber; + private KinesisShardCheckpointer shardCheckpointer; + + KinesisRecordSet( + KinesisSplit split, + ConnectorSession session, + KinesisClientProvider clientManager, + List columnHandles, + RowDecoder messageDecoder, + KinesisConfig kinesisConfig) + { + this.split = requireNonNull(split, "split is null"); + this.session = requireNonNull(session, "session is null"); + requireNonNull(kinesisConfig, "kinesisConfig is null"); + long dynamoReadCapacity = kinesisConfig.getDynamoReadCapacity(); + long dynamoWriteCapacity = kinesisConfig.getDynamoWriteCapacity(); + long checkPointIntervalMillis = kinesisConfig.getCheckpointInterval().toMillis(); + this.isLogBatches = kinesisConfig.isLogBatches(); + + this.clientManager = requireNonNull(clientManager, "clientManager is null"); + + this.columnHandles = requireNonNull(columnHandles, "columnHandles is null"); + this.messageDecoder = messageDecoder; + + ImmutableList.Builder typeBuilder = ImmutableList.builder(); + + for (KinesisColumnHandle handle : columnHandles) { + typeBuilder.add(handle.getType()); + } + + this.columnTypes = typeBuilder.build(); + + // Note: these default to what is in the configuration if not given in the session + this.batchSize = getBatchSize(session); + this.maxBatches = getMaxBatches(this.session); + + this.fetchAttempts = kinesisConfig.getFetchAttempts(); + this.sleepTime = kinesisConfig.getSleepTime().toMillis(); + + this.checkpointEnabled = isCheckpointEnabled(session); + this.lastReadSequenceNumber = null; + + //Intialize checkpoint related code + if (checkpointEnabled) { + AmazonDynamoDBClient dynamoDBClient = clientManager.getDynamoDbClient(); + String dynamoDBTable = split.getStreamName(); + int curIterationNumber = getIterationNumber(session); + String sessionLogicalName = getCheckpointLogicalName(session); + + String logicalProcessName = null; + if (sessionLogicalName != null) { + logicalProcessName = sessionLogicalName; + } + + this.shardCheckpointer = new KinesisShardCheckpointer( + dynamoDBClient, + dynamoDBTable, + split, + logicalProcessName, + curIterationNumber, + checkPointIntervalMillis, + dynamoReadCapacity, + dynamoWriteCapacity); + + lastReadSequenceNumber = shardCheckpointer.getLastReadSeqNumber(); + } + } + + @Override + public List getColumnTypes() + { + return columnTypes; + } + + @Override + public RecordCursor cursor() + { + return new KinesisRecordCursor(); + } + + public class KinesisRecordCursor + implements RecordCursor + { + private final FieldValueProvider[] currentRowValues = new FieldValueProvider[columnHandles.size()]; + // TODO: total bytes here only includes records we iterate through, not total read from Kinesis. + // This may not be an issue, but if total vs. completed is an important signal to Presto then + // the implementation below could be a problem. Need to investigate. + private long batchesRead; + private long messagesRead; + private long totalBytes; + private long totalMessages; + private long lastReadTime; + private String shardIterator; + private List kinesisRecords; + private Iterator listIterator; + private GetRecordsRequest getRecordsRequest; + private GetRecordsResult getRecordsResult; + + @Override + public long getCompletedBytes() + { + return totalBytes; + } + + @Override + public long getReadTimeNanos() + { + return 0; + } + + @Override + public Type getType(int field) + { + checkArgument(field < columnHandles.size(), "Invalid field index"); + return columnHandles.get(field).getType(); + } + + /** + * Advances the cursor by one position, retrieving more records from Kinesis if needed. + *

+ * We retrieve records from Kinesis in batches, using the getRecordsRequest. After a + * getRecordsRequest we keep iterating through that list of records until we run out. Then + * we will get another batch unless we've hit the limit or have caught up. + */ + @Override + public boolean advanceNextPosition() + { + if (shardIterator == null && getRecordsRequest == null) { + getIterator(); // first shard iterator + log.debug("Starting read. Retrieved first shard iterator from AWS Kinesis."); + } + + if (getRecordsRequest == null || (!listIterator.hasNext() && shouldGetMoreRecords())) { + getKinesisRecords(); + } + + if (listIterator.hasNext()) { + return nextRow(); + } + else { + log.debug("Read all of the records from the shard: %d batches and %d messages and %d total bytes.", batchesRead, totalMessages, totalBytes); + return false; + } + } + + private boolean shouldGetMoreRecords() + { + return shardIterator != null && batchesRead < maxBatches && getMillisBehindLatest() > MILLIS_BEHIND_LIMIT; + } + + /** + * Retrieves the next batch of records from Kinesis using the shard iterator. + *

+ * Most of the time this results in one getRecords call. However we allow for + * a call to return an empty list, and we'll try again if we are far enough + * away from the latest record. + */ + private void getKinesisRecords() + throws ResourceNotFoundException + { + // Normally this loop will execute once, but we have to allow for the odd Kinesis + // behavior, per the docs: + // A single call to getRecords might return an empty record list, even when the shard contains + // more records at later sequence numbers + boolean fetchedRecords = false; + int attempts = 0; + while (!fetchedRecords && attempts < fetchAttempts) { + Duration duration = nanosSince(lastReadTime); + if (duration.toMillis() <= sleepTime) { + try { + Thread.sleep(duration.toMillis()); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("thread interrupted"); + } + } + getRecordsRequest = new GetRecordsRequest(); + getRecordsRequest.setShardIterator(shardIterator); + getRecordsRequest.setLimit(batchSize); + + getRecordsResult = clientManager.getClient().getRecords(getRecordsRequest); + lastReadTime = System.nanoTime(); + + shardIterator = getRecordsResult.getNextShardIterator(); + kinesisRecords = getRecordsResult.getRecords(); + if (isLogBatches) { + log.info("Fetched %d records from Kinesis. MillisBehindLatest=%d", kinesisRecords.size(), getRecordsResult.getMillisBehindLatest()); + } + + fetchedRecords = (kinesisRecords.size() > 0 || getMillisBehindLatest() <= MILLIS_BEHIND_LIMIT); + attempts++; + } + + listIterator = kinesisRecords.iterator(); + batchesRead++; + messagesRead += kinesisRecords.size(); + } + + private boolean nextRow() + { + Record currentRecord = listIterator.next(); + String partitionKey = currentRecord.getPartitionKey(); + log.debug("Reading record with partition key %s", partitionKey); + + byte[] messageData = EMPTY_BYTE_ARRAY; + ByteBuffer message = currentRecord.getData(); + if (message != null) { + messageData = new byte[message.remaining()]; + message.get(messageData); + } + totalBytes += messageData.length; + totalMessages++; + + log.debug("Fetching %d bytes from current record. %d messages read so far", messageData.length, totalMessages); + + Optional> decodedValue = messageDecoder.decodeRow(messageData, null); + + Map currentRowValuesMap = new HashMap<>(); + for (DecoderColumnHandle columnHandle : columnHandles) { + if (columnHandle.isInternal()) { + KinesisInternalFieldDescription fieldDescription = KinesisInternalFieldDescription.forColumnName(columnHandle.getName()); + switch (fieldDescription) { + case SHARD_ID_FIELD: + currentRowValuesMap.put(columnHandle, bytesValueProvider(split.getShardId().getBytes())); + break; + case SEGMENT_START_FIELD: + currentRowValuesMap.put(columnHandle, bytesValueProvider(split.getStart().getBytes())); + break; + case SEGMENT_COUNT_FIELD: + currentRowValuesMap.put(columnHandle, longValueProvider(totalMessages)); + break; + case SHARD_SEQUENCE_ID_FIELD: + currentRowValuesMap.put(columnHandle, bytesValueProvider(currentRecord.getSequenceNumber().getBytes())); + break; + case MESSAGE_FIELD: + currentRowValuesMap.put(columnHandle, bytesValueProvider(messageData)); + break; + case MESSAGE_LENGTH_FIELD: + currentRowValuesMap.put(columnHandle, longValueProvider(messageData.length)); + break; + case MESSAGE_VALID_FIELD: + currentRowValuesMap.put(columnHandle, booleanValueProvider(!decodedValue.isPresent())); + break; + default: + throw new IllegalArgumentException("unknown internal field " + fieldDescription); + } + } + } + + decodedValue.ifPresent(currentRowValuesMap::putAll); + for (int i = 0; i < columnHandles.size(); i++) { + ColumnHandle columnHandle = columnHandles.get(i); + currentRowValues[i] = currentRowValuesMap.get(columnHandle); + } + + return true; + } + + /** + * Protect against possibly null values if this isn't set (not expected) + */ + private long getMillisBehindLatest() + { + if (getRecordsResult != null && getRecordsResult.getMillisBehindLatest() != null) { + return getRecordsResult.getMillisBehindLatest(); + } + return MILLIS_BEHIND_LIMIT + 1; + } + + @Override + public boolean getBoolean(int field) + { + return getFieldValueProvider(field, boolean.class).getBoolean(); + } + + @Override + public long getLong(int field) + { + return getFieldValueProvider(field, long.class).getLong(); + } + + @Override + public double getDouble(int field) + { + return getFieldValueProvider(field, double.class).getDouble(); + } + + @Override + public Slice getSlice(int field) + { + return getFieldValueProvider(field, Slice.class).getSlice(); + } + + @Override + public Object getObject(int field) + { + return getFieldValueProvider(field, Block.class).getBlock(); + } + + @Override + public boolean isNull(int field) + { + checkArgument(field < columnHandles.size(), "Invalid field index"); + return currentRowValues[field] == null || currentRowValues[field].isNull(); + } + + private FieldValueProvider getFieldValueProvider(int field, Class expectedType) + { + checkArgument(field < columnHandles.size(), "Invalid field index"); + checkFieldType(field, expectedType); + return currentRowValues[field]; + } + + @Override + public void close() + { + log.info("Closing cursor - read complete. Total read: %d batches %d messages, processed: %d messages and %d bytes.", + batchesRead, messagesRead, totalMessages, totalBytes); + if (checkpointEnabled && lastReadSequenceNumber != null) { + shardCheckpointer.checkpoint(lastReadSequenceNumber); + } + } + + private void checkFieldType(int field, Class expected) + { + Class actual = getType(field).getJavaType(); + checkArgument(actual == expected, "Expected field %s to be type %s but is %s", field, expected, actual); + } + + private void getIterator() + throws ResourceNotFoundException + { + GetShardIteratorRequest getShardIteratorRequest = new GetShardIteratorRequest(); + getShardIteratorRequest.setStreamName(split.getStreamName()); + getShardIteratorRequest.setShardId(split.getShardId()); + + // Explanation: when we have a sequence number from a prior read or checkpoint, always use it. + // Otherwise, decide if starting at a timestamp or the trim horizon based on configuration. + // If starting at a timestamp, use the session variable STARTING_TIMESTAMP when given, otherwise + // fallback on starting at STARTING_OFFSET_SECONDS from timestamp. + if (lastReadSequenceNumber == null) { + if (isIteratorFromTimestamp(session)) { + getShardIteratorRequest.setShardIteratorType("AT_TIMESTAMP"); + long iteratorStartTimestamp = getIteratorStartTimestamp(session); + if (iteratorStartTimestamp == 0) { + long startTimestamp = System.currentTimeMillis() - (getIteratorOffsetSeconds(session) * 1000); + getShardIteratorRequest.setTimestamp(new Date(startTimestamp)); + } + else { + getShardIteratorRequest.setTimestamp(new Date(iteratorStartTimestamp)); + } + } + else { + getShardIteratorRequest.setShardIteratorType("TRIM_HORIZON"); + } + } + else { + getShardIteratorRequest.setShardIteratorType("AFTER_SEQUENCE_NUMBER"); + getShardIteratorRequest.setStartingSequenceNumber(lastReadSequenceNumber); + } + + GetShardIteratorResult getShardIteratorResult = clientManager.getClient().getShardIterator(getShardIteratorRequest); + shardIterator = getShardIteratorResult.getShardIterator(); + } + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisRecordSetProvider.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisRecordSetProvider.java new file mode 100644 index 000000000000..8ec0cd5fa301 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisRecordSetProvider.java @@ -0,0 +1,78 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.google.common.collect.ImmutableList; +import com.google.inject.Inject; +import io.prestosql.decoder.DispatchingRowDecoderFactory; +import io.prestosql.decoder.RowDecoder; +import io.prestosql.spi.connector.ColumnHandle; +import io.prestosql.spi.connector.ConnectorRecordSetProvider; +import io.prestosql.spi.connector.ConnectorSession; +import io.prestosql.spi.connector.ConnectorSplit; +import io.prestosql.spi.connector.ConnectorTransactionHandle; +import io.prestosql.spi.connector.RecordSet; + +import java.util.HashMap; +import java.util.List; + +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static java.util.Objects.requireNonNull; + +public class KinesisRecordSetProvider + implements ConnectorRecordSetProvider +{ + private final KinesisClientProvider clientManager; + private final KinesisConfig kinesisConfig; + private final DispatchingRowDecoderFactory decoderFactory; + + @Inject + public KinesisRecordSetProvider( + DispatchingRowDecoderFactory decoderFactory, + KinesisClientProvider clientManager, + KinesisConfig kinesisConfig) + { + this.decoderFactory = requireNonNull(decoderFactory, "decoderFactory is null"); + this.clientManager = requireNonNull(clientManager, "clientManager is null"); + this.kinesisConfig = requireNonNull(kinesisConfig, "kinesisConfig is null"); + } + + @Override + public RecordSet getRecordSet( + ConnectorTransactionHandle transactionHandle, + ConnectorSession session, + ConnectorSplit split, + List columns) + { + KinesisSplit kinesisSplit = (KinesisSplit) split; + List kinesisColumns = columns.stream() + .map(x -> (KinesisColumnHandle) x) + .collect(ImmutableList.toImmutableList()); + + ImmutableList.Builder handleBuilder = ImmutableList.builder(); + + RowDecoder messageDecoder = decoderFactory.create( + kinesisSplit.getMessageDataFormat(), + new HashMap<>(), + kinesisColumns.stream() + .filter(column -> !column.isInternal()) + .collect(toImmutableSet())); + + for (ColumnHandle handle : columns) { + KinesisColumnHandle columnHandle = (KinesisColumnHandle) handle; + handleBuilder.add(columnHandle); + } + return new KinesisRecordSet(kinesisSplit, session, clientManager, handleBuilder.build(), messageDecoder, kinesisConfig); + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisSessionProperties.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisSessionProperties.java new file mode 100644 index 000000000000..c58706096373 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisSessionProperties.java @@ -0,0 +1,157 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.google.common.collect.ImmutableList; +import com.google.inject.Inject; +import io.prestosql.spi.connector.ConnectorSession; +import io.prestosql.spi.session.PropertyMetadata; + +import java.text.ParsePosition; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.List; +import java.util.TimeZone; + +import static io.prestosql.spi.session.PropertyMetadata.booleanProperty; +import static io.prestosql.spi.session.PropertyMetadata.integerProperty; +import static io.prestosql.spi.session.PropertyMetadata.stringProperty; + +public final class KinesisSessionProperties +{ + private static final String PRESTO_TIMESTAMP_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS"; + private static final String UNSET_TIMESTAMP = "2000-01-01 00:00:00.000"; + + private static final String CHECKPOINT_ENABLED = "checkpoint_enabled"; + private static final String ITERATION_NUMBER = "iteration_number"; + private static final String CHECKPOINT_LOGICAL_NAME = "checkpoint_logical_name"; + private static final String MAX_BATCHES = "max_batches"; + private static final String BATCH_SIZE = "batch_size"; + private static final String START_FROM_TIMESTAMP = "start_from_timestamp"; + private static final String STARTING_OFFSET_SECONDS = "starting_offset_seconds"; + private static final String STARTING_TIMESTAMP = "starting_timestamp"; + + private final List> sessionProperties; + + @Inject + public KinesisSessionProperties(KinesisConfig config) + { + sessionProperties = ImmutableList.>builder() + .add(booleanProperty( + CHECKPOINT_ENABLED, + "Are checkpoints used in this session?", + config.isCheckpointEnabled(), + false)) + .add(integerProperty( + ITERATION_NUMBER, + "Checkpoint iteration number", + config.getIteratorNumber(), + false)) + .add(stringProperty( + CHECKPOINT_LOGICAL_NAME, + "checkpoint logical name", + config.getLogicalProcessName(), + false)) + .add(PropertyMetadata.integerProperty( + MAX_BATCHES, + "max number of calls to Kinesis per query", + config.getMaxBatches(), + false)) + .add(PropertyMetadata.integerProperty( + BATCH_SIZE, + "Record limit in calls to Kinesis", + config.getBatchSize(), + false)) + .add(PropertyMetadata.booleanProperty( + START_FROM_TIMESTAMP, + "Start from timestamp not trim horizon", + config.isIteratorFromTimestamp(), + false)) + .add(PropertyMetadata.longProperty( + STARTING_OFFSET_SECONDS, + "Seconds before current time to start iterator", + config.getIteratorOffsetSeconds(), + false)) + .add(PropertyMetadata.stringProperty( + STARTING_TIMESTAMP, + "Timestamp in Presto format to start iterator", + UNSET_TIMESTAMP, + false)) + .build(); + } + + public List> getSessionProperties() + { + return sessionProperties; + } + + public static boolean isCheckpointEnabled(ConnectorSession session) + { + return session.getProperty(CHECKPOINT_ENABLED, Boolean.class); + } + + public static int getIterationNumber(ConnectorSession session) + { + return session.getProperty(ITERATION_NUMBER, Integer.class); + } + + public static String getCheckpointLogicalName(ConnectorSession session) + { + return session.getProperty(CHECKPOINT_LOGICAL_NAME, String.class); + } + + public static int getMaxBatches(ConnectorSession session) + { + return session.getProperty(MAX_BATCHES, Integer.class); + } + + public static int getBatchSize(ConnectorSession session) + { + return session.getProperty(BATCH_SIZE, Integer.class); + } + + public static boolean isIteratorFromTimestamp(ConnectorSession session) + { + return session.getProperty(START_FROM_TIMESTAMP, Boolean.class); + } + + public static long getIteratorOffsetSeconds(ConnectorSession session) + { + return session.getProperty(STARTING_OFFSET_SECONDS, Long.class); + } + + public static long getIteratorStartTimestamp(ConnectorSession session) + { + String value = session.getProperty(STARTING_TIMESTAMP, String.class); + if (value.equals(UNSET_TIMESTAMP)) { + return 0; + } + return getTimestampAsMillis(value, session); + } + + public static long getTimestampAsMillis(String timestampValue, ConnectorSession session) + { + // Parse this as a date and return the long timestamp value (2016-07-10 17:03:56.124). + // They will be entering timestamps in their session's timezone. Use session.getTimeZoneKey(). + SimpleDateFormat format = new SimpleDateFormat(PRESTO_TIMESTAMP_FORMAT); + + if (!session.getTimeZoneKey().getId().equals(TimeZone.getDefault().getID())) { + TimeZone sessionTimeZone = TimeZone.getTimeZone(session.getTimeZoneKey().getId()); + format.setTimeZone(sessionTimeZone); + } + + Date result = format.parse(timestampValue, new ParsePosition(0)); + return result.getTime(); + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisShardCheckpointer.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisShardCheckpointer.java new file mode 100644 index 000000000000..036d1d223cde --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisShardCheckpointer.java @@ -0,0 +1,160 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; +import com.amazonaws.services.kinesis.clientlibrary.types.ExtendedSequenceNumber; +import com.amazonaws.services.kinesis.leases.exceptions.DependencyException; +import com.amazonaws.services.kinesis.leases.exceptions.InvalidStateException; +import com.amazonaws.services.kinesis.leases.exceptions.ProvisionedThroughputException; +import com.amazonaws.services.kinesis.leases.impl.KinesisClientLease; +import com.amazonaws.services.kinesis.leases.impl.KinesisClientLeaseManager; +import io.airlift.log.Logger; + +import static com.google.common.base.Throwables.throwIfUnchecked; + +public class KinesisShardCheckpointer +{ + private static final Logger log = Logger.get(KinesisShardCheckpointer.class); + private KinesisClientLeaseManager leaseManager; + private KinesisSplit kinesisSplit; + private String logicalProcessName; + private int currentIterationNumber; + private KinesisClientLease kinesisClientLease; + private long checkpointIntervalMillis; + private long nextCheckpointTimeMillis; + + public KinesisShardCheckpointer( + AmazonDynamoDB dynamoDBClient, + String dynamoDBTable, + KinesisSplit kinesisSplit, + String logicalProcessName, + int currentIterationNumber, + long checkpointIntervalMS, + long dynamoReadCapacity, + long dynamoWriteCapacity) + { + this(new KinesisClientLeaseManager(dynamoDBTable, dynamoDBClient), + kinesisSplit, + logicalProcessName, + currentIterationNumber, + checkpointIntervalMS, + dynamoReadCapacity, + dynamoWriteCapacity); + } + + public KinesisShardCheckpointer( + KinesisClientLeaseManager leaseManager, + KinesisSplit kinesisSplit, + String logicalProcessName, + int currentIterationNumber, + long checkpointIntervalMS, + long dynamoReadCapacity, + long dynamoWriteCapacity) + { + this.leaseManager = leaseManager; + this.kinesisSplit = kinesisSplit; + this.logicalProcessName = logicalProcessName; + this.currentIterationNumber = currentIterationNumber; + this.checkpointIntervalMillis = checkpointIntervalMS; + + try { + this.leaseManager.createLeaseTableIfNotExists(dynamoReadCapacity, dynamoWriteCapacity); + + KinesisClientLease oldLease = this.leaseManager.getLease(createCheckpointKey(currentIterationNumber)); + if (oldLease != null) { + this.kinesisClientLease = oldLease; + } + else { + this.kinesisClientLease = new KinesisClientLease(); + this.kinesisClientLease.setLeaseKey(createCheckpointKey(currentIterationNumber)); + } + } + catch (ProvisionedThroughputException | InvalidStateException | DependencyException e) { + throwIfUnchecked(e); + throw new RuntimeException(e); + } + resetNextCheckpointTime(); + } + + private void resetNextCheckpointTime() + { + nextCheckpointTimeMillis = System.nanoTime() + checkpointIntervalMillis * 1_000_000; + } + + private String createCheckpointKey(int iterationNo) + { + return new StringBuilder(this.logicalProcessName) + .append("_") + .append(this.kinesisSplit.getStreamName()) + .append("_") + .append(this.kinesisSplit.getShardId()) + .append("_") + .append(String.valueOf(iterationNo)) + .toString(); + } + + // storing last read sequence no. in dynamodb table + public void checkpoint(String lastReadSequenceNumber) + { + log.info("Trying to checkpoint at " + lastReadSequenceNumber); + try { + ExtendedSequenceNumber esn = new ExtendedSequenceNumber(lastReadSequenceNumber); + kinesisClientLease.setCheckpoint(esn); + leaseManager.createLeaseIfNotExists(kinesisClientLease); + if (!leaseManager.updateLease(kinesisClientLease)) { + log.warn("Checkpointing unsuccessful"); + } + } + catch (DependencyException | InvalidStateException | ProvisionedThroughputException e) { + throwIfUnchecked(e); + throw new RuntimeException(e); + } + resetNextCheckpointTime(); + } + + //return checkpoint of previous iteration if found + public String getLastReadSeqNumber() + { + String lastReadSeqNumber = null; + KinesisClientLease oldLease = null; + if (currentIterationNumber > 0) { + try { + oldLease = leaseManager.getLease(createCheckpointKey(currentIterationNumber - 1)); + } + catch (DependencyException | InvalidStateException | ProvisionedThroughputException e) { + throwIfUnchecked(e); + throw new RuntimeException(e); + } + if (oldLease != null) { + // ExtendedSequenceNumber type in latest API: + lastReadSeqNumber = oldLease.getCheckpoint().toString(); + } + } + if (lastReadSeqNumber == null) { + log.info("Previous checkpoint not found. Starting from beginning of shard"); + } + else { + log.info("Resuming from " + lastReadSeqNumber); + } + return lastReadSeqNumber; + } + + public void checkpointIfTimeUp(String lastReadSeqNo) + { + if (System.nanoTime() >= nextCheckpointTimeMillis) { + checkpoint(lastReadSeqNo); + } + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisSplit.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisSplit.java new file mode 100644 index 000000000000..123593be2481 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisSplit.java @@ -0,0 +1,114 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import io.prestosql.spi.HostAddress; +import io.prestosql.spi.connector.ConnectorSplit; + +import java.util.List; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + +/** + * Kinesis vertion of ConnectorSplit. KinesisConnector fetch the data from kinesis stream and splits the big chunk to multiple split. + * By default, one shard data is one KinesisSplit. + */ +public class KinesisSplit + implements ConnectorSplit +{ + private final String streamName; + private final String messageDataFormat; + private final String shardId; + private final String start; + private final String end; + + @JsonCreator + public KinesisSplit( + @JsonProperty("streamName") String streamName, + @JsonProperty("messageDataFormat") String messageDataFormat, + @JsonProperty("shardId") String shardId, + @JsonProperty("start") String start, + @JsonProperty("end") String end) + { + this.streamName = requireNonNull(streamName, "streamName is null"); + this.messageDataFormat = requireNonNull(messageDataFormat, "messageDataFormat is null"); + this.shardId = shardId; + this.start = start; + this.end = end; + } + + @JsonProperty + public String getStart() + { + return start; + } + + @JsonProperty + public String getEnd() + { + return end; + } + + @JsonProperty + public String getStreamName() + { + return streamName; + } + + @JsonProperty + public String getMessageDataFormat() + { + return messageDataFormat; + } + + @JsonProperty + public String getShardId() + { + return shardId; + } + + @Override + public boolean isRemotelyAccessible() + { + return true; + } + + @Override + public List getAddresses() + { + return ImmutableList.of(); + } + + @Override + public Object getInfo() + { + return this; + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("streamName", streamName) + .add("messageDataFormat", messageDataFormat) + .add("shardId", shardId) + .add("start", start) + .add("end", end) + .toString(); + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisSplitManager.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisSplitManager.java new file mode 100644 index 000000000000..0feecbdaaa05 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisSplitManager.java @@ -0,0 +1,168 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.amazonaws.services.kinesis.model.DescribeStreamRequest; +import com.amazonaws.services.kinesis.model.DescribeStreamResult; +import com.amazonaws.services.kinesis.model.ResourceNotFoundException; +import com.amazonaws.services.kinesis.model.Shard; +import com.google.common.collect.ImmutableList; +import com.google.inject.Inject; +import io.airlift.units.Duration; +import io.prestosql.spi.connector.ConnectorSession; +import io.prestosql.spi.connector.ConnectorSplit; +import io.prestosql.spi.connector.ConnectorSplitManager; +import io.prestosql.spi.connector.ConnectorSplitSource; +import io.prestosql.spi.connector.ConnectorTableHandle; +import io.prestosql.spi.connector.ConnectorTransactionHandle; +import io.prestosql.spi.connector.FixedSplitSource; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static java.util.Objects.requireNonNull; +import static java.util.concurrent.TimeUnit.DAYS; + +/** + * Split data chunk from kinesis Stream to multiple small chunks for parallelization and distribution to multiple Presto workers. + * By default, each shard of Kinesis Stream forms one Kinesis Split + */ +public class KinesisSplitManager + implements ConnectorSplitManager +{ + public static final long MAX_CACHE_AGE_MILLIS = new Duration(1, DAYS).toMillis(); + + private final KinesisHandleResolver handleResolver; + private final KinesisClientProvider clientManager; + + private Map streamMap = Collections.synchronizedMap(new HashMap()); + + /** + * Cache the result of a Kinesis describe stream call so we don't need to retrieve + * the shards for every single query. + */ + public static class InternalStreamDescription + { + private final String streamName; + private final List shards = new ArrayList<>(); + private final long createTimeStamp; + + public InternalStreamDescription(String streamName) + { + this.streamName = requireNonNull(streamName); + this.createTimeStamp = System.currentTimeMillis(); + } + + public long getCreateTimeStamp() + { + return createTimeStamp; + } + + public String getStreamName() + { + return streamName; + } + + public List getShards() + { + return shards; + } + + public void addShard(Shard shard) + { + this.shards.add(shard); + } + + public void addAllShards(List shards) + { + this.shards.addAll(shards); + } + } + + @Inject + public KinesisSplitManager( + KinesisHandleResolver handleResolver, + KinesisClientProvider clientManager) + { + this.handleResolver = handleResolver; + this.clientManager = clientManager; + } + + @Override + public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableHandle table, ConnectorSplitManager.SplitSchedulingStrategy splitSchedulingStrategy) + { + KinesisTableHandle kinesisTableHandle = (KinesisTableHandle) table; + + InternalStreamDescription description = this.getStreamDescription(kinesisTableHandle.getStreamName()); + + ImmutableList.Builder builder = ImmutableList.builder(); + for (Shard shard : description.getShards()) { + KinesisSplit split = new KinesisSplit( + kinesisTableHandle.getStreamName(), + kinesisTableHandle.getMessageDataFormat(), + shard.getShardId(), + shard.getSequenceNumberRange().getStartingSequenceNumber(), + shard.getSequenceNumberRange().getEndingSequenceNumber()); + builder.add(split); + } + + return new FixedSplitSource(builder.build()); + } + + /** + * Internal method to retrieve the stream description and get the shards from AWS. + *

+ * Gets from the internal cache unless not yet created or too old. + */ + protected InternalStreamDescription getStreamDescription(String streamName) + { + InternalStreamDescription internalStreamDescription = this.streamMap.get(streamName); + if (internalStreamDescription == null || System.currentTimeMillis() - internalStreamDescription.getCreateTimeStamp() >= MAX_CACHE_AGE_MILLIS) { + internalStreamDescription = new InternalStreamDescription(streamName); + + DescribeStreamRequest describeStreamRequest = new DescribeStreamRequest(); + describeStreamRequest.setStreamName(streamName); + + // Collect shards from Kinesis + String exclusiveStartShardId = null; + List shards = new ArrayList<>(); + do { + describeStreamRequest.setExclusiveStartShardId(exclusiveStartShardId); + DescribeStreamResult describeStreamResult = clientManager.getClient().describeStream(describeStreamRequest); + + String streamStatus = describeStreamResult.getStreamDescription().getStreamStatus(); + if (!streamStatus.equals("ACTIVE") && !streamStatus.equals("UPDATING")) { + throw new ResourceNotFoundException("Stream not Active"); + } + + internalStreamDescription.addAllShards(describeStreamResult.getStreamDescription().getShards()); + + if (describeStreamResult.getStreamDescription().getHasMoreShards() && (shards.size() > 0)) { + exclusiveStartShardId = shards.get(shards.size() - 1).getShardId(); + } + else { + exclusiveStartShardId = null; + } + } + while (exclusiveStartShardId != null); + + this.streamMap.put(streamName, internalStreamDescription); + } + + return internalStreamDescription; + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisStreamDescription.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisStreamDescription.java new file mode 100644 index 000000000000..6b9d6f8525a7 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisStreamDescription.java @@ -0,0 +1,82 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Strings.isNullOrEmpty; +import static java.util.Objects.requireNonNull; + +/** + * This Class maintains all the details of Kinesis stream like name, fields of data, Presto table stream is mapping to, tables's schema name + */ +public class KinesisStreamDescription +{ + private final String tableName; + private final String streamName; + private final String schemaName; + private final KinesisStreamFieldGroup message; + + @JsonCreator + public KinesisStreamDescription( + @JsonProperty("tableName") String tableName, + @JsonProperty("schemaName") String schemaName, + @JsonProperty("streamName") String streamName, + @JsonProperty("message") KinesisStreamFieldGroup message) + { + checkArgument(!isNullOrEmpty(tableName), "tableName is null or is empty"); + this.tableName = tableName; + this.streamName = requireNonNull(streamName, "topicName is null"); + this.schemaName = schemaName; + this.message = message; + } + + @JsonProperty + public String getTableName() + { + return tableName; + } + + @JsonProperty + public String getStreamName() + { + return streamName; + } + + @JsonProperty + public String getSchemaName() + { + return schemaName; + } + + @JsonProperty + public KinesisStreamFieldGroup getMessage() + { + return message; + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("tableName", tableName) + .add("streamName", streamName) + .add("schemaName", schemaName) + .add("message", message) + .toString(); + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisStreamFieldDescription.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisStreamFieldDescription.java new file mode 100644 index 000000000000..72adaba8ea55 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisStreamFieldDescription.java @@ -0,0 +1,155 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.prestosql.spi.connector.ColumnMetadata; +import io.prestosql.spi.type.Type; + +import java.util.Objects; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Strings.isNullOrEmpty; +import static java.util.Objects.requireNonNull; + +public class KinesisStreamFieldDescription +{ + private final String name; + private final Type type; + private final String mapping; + private final String comment; + private final String dataFormat; + private final String formatHint; + private final boolean hidden; + + @JsonCreator + public KinesisStreamFieldDescription( + @JsonProperty("name") String name, + @JsonProperty("type") Type type, + @JsonProperty("mapping") String mapping, + @JsonProperty("comment") String comment, + @JsonProperty("dataFormat") String dataFormat, + @JsonProperty("formatHint") String formatHint, + @JsonProperty("hidden") boolean hidden) + { + checkArgument(!isNullOrEmpty(name), "name is null or is empty"); + this.name = name; + this.type = requireNonNull(type, "type is null"); + this.mapping = mapping; + this.comment = comment; + this.dataFormat = dataFormat; + this.formatHint = formatHint; + this.hidden = hidden; + } + + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public Type getType() + { + return type; + } + + @JsonProperty + public String getMapping() + { + return mapping; + } + + @JsonProperty + public String getComment() + { + return comment; + } + + @JsonProperty + public String getDataFormat() + { + return dataFormat; + } + + @JsonProperty + public String getFormatHint() + { + return formatHint; + } + + @JsonProperty + public boolean isHidden() + { + return hidden; + } + + KinesisColumnHandle getColumnHandle(int index) + { + return new KinesisColumnHandle( + index, + getName(), + getType(), + getMapping(), + getDataFormat(), + getFormatHint(), + isHidden(), + false); + } + + ColumnMetadata getColumnMetadata() + { + return new ColumnMetadata(getName(), getType(), getComment(), isHidden()); + } + + @Override + public int hashCode() + { + return Objects.hash(name, type, mapping, dataFormat, formatHint, hidden); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + + KinesisStreamFieldDescription other = (KinesisStreamFieldDescription) obj; + return Objects.equals(this.name, other.name) && + Objects.equals(this.type, other.type) && + Objects.equals(this.mapping, other.mapping) && + Objects.equals(this.dataFormat, other.dataFormat) && + Objects.equals(this.formatHint, other.formatHint) && + Objects.equals(this.hidden, other.hidden); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("name", name) + .add("type", type) + .add("mapping", mapping) + .add("dataFormat", dataFormat) + .add("formatHint", formatHint) + .add("hidden", hidden) + .toString(); + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisStreamFieldGroup.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisStreamFieldGroup.java new file mode 100644 index 000000000000..10a340ee5ede --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisStreamFieldGroup.java @@ -0,0 +1,59 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + +public class KinesisStreamFieldGroup +{ + private final String dataFormat; + private final List fields; + + @JsonCreator + public KinesisStreamFieldGroup( + @JsonProperty("dataFormat") String dataFormat, + @JsonProperty("fields") List fields) + { + this.dataFormat = requireNonNull(dataFormat, "dataFormat is null"); + this.fields = ImmutableList.copyOf(requireNonNull(fields, "fields is null")); + } + + @JsonProperty + public String getDataFormat() + { + return dataFormat; + } + + @JsonProperty + public List getFields() + { + return fields; + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("dataFormat", dataFormat) + .add("fields", fields) + .toString(); + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisTableDescriptionSupplier.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisTableDescriptionSupplier.java new file mode 100644 index 000000000000..6f0a09bc6567 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisTableDescriptionSupplier.java @@ -0,0 +1,127 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.inject.Inject; +import io.airlift.json.JsonCodec; +import io.airlift.log.Logger; +import io.prestosql.plugin.kinesis.s3config.S3TableConfigClient; +import io.prestosql.spi.PrestoException; +import io.prestosql.spi.connector.SchemaTableName; + +import javax.annotation.PreDestroy; + +import java.io.IOException; +import java.nio.file.DirectoryIteratorException; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.Map; +import java.util.function.Supplier; + +import static com.google.common.base.MoreObjects.firstNonNull; +import static com.google.common.base.Throwables.throwIfUnchecked; +import static java.util.Objects.requireNonNull; + +/** + * This class get() method reads the table description file stored in Kinesis directory + * and then creates user defined field for Presto Table. + */ +public class KinesisTableDescriptionSupplier + implements Supplier> +{ + private static final Logger log = Logger.get(KinesisTableDescriptionSupplier.class); + + private final KinesisConfig kinesisConfig; + private final JsonCodec streamDescriptionCodec; + private final S3TableConfigClient s3TableConfigClient; + + @Inject + public KinesisTableDescriptionSupplier(KinesisConfig kinesisConfig, + JsonCodec streamDescriptionCodec, + S3TableConfigClient s3TableConfigClient) + { + this.kinesisConfig = requireNonNull(kinesisConfig, "kinesisConfig is null"); + this.streamDescriptionCodec = requireNonNull(streamDescriptionCodec, "streamDescriptionCodec is null"); + this.s3TableConfigClient = requireNonNull(s3TableConfigClient, "S3 table config client is null"); + } + + @Override + public Map get() + { + if (this.s3TableConfigClient.isUsingS3()) { + return this.s3TableConfigClient.getTablesFromS3(); + } + else { + return getTablesFromPath(); + } + } + + public Map getTablesFromPath() + { + ImmutableMap.Builder builder = ImmutableMap.builder(); + try { + for (Path file : listFiles(Paths.get(kinesisConfig.getTableDescriptionLocation()))) { + if (Files.isRegularFile(file) && file.getFileName().toString().endsWith("json")) { + KinesisStreamDescription table = streamDescriptionCodec.fromJson(Files.readAllBytes(file)); + String schemaName = firstNonNull(table.getSchemaName(), kinesisConfig.getDefaultSchema()); + log.debug("Kinesis table %s %s %s", schemaName, table.getTableName(), table); + builder.put(new SchemaTableName(schemaName, table.getTableName()), table); + } + } + + Map tableDefinitions = builder.build(); + log.debug("Loaded table definitions: %s", tableDefinitions.keySet()); + + return tableDefinitions; + } + catch (IOException e) { + throwIfUnchecked(e); + throw new RuntimeException(e); + } + } + + /** + * Shutdown any periodic update jobs. + */ + @PreDestroy + public void shutdown() + { + this.s3TableConfigClient.run(); + return; + } + + private static List listFiles(Path path) + { + if (path == null || !Files.isDirectory(path)) { + throw new PrestoException(KinesisErrorCode.KINESIS_METADATA_EXCEPTION, "Table description location does not exist or is not a directory"); + } + try (DirectoryStream stream = Files.newDirectoryStream(path)) { + ImmutableList.Builder builder = ImmutableList.builder(); + for (Path file : stream) { + builder.add(file); + } + + return builder.build(); + } + catch (IOException | DirectoryIteratorException e) { + throwIfUnchecked(e); + throw new RuntimeException(e); + } + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisTableHandle.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisTableHandle.java new file mode 100644 index 000000000000..245ae3835565 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisTableHandle.java @@ -0,0 +1,125 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.prestosql.spi.connector.ConnectorTableHandle; +import io.prestosql.spi.connector.SchemaTableName; + +import java.util.Objects; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + +/** + * Class maintains all the properties of Presto Table + */ +public class KinesisTableHandle + implements ConnectorTableHandle +{ + /** + * The schema name for this table. Is set through configuration and read + * using {@link KinesisConfig#getDefaultSchema()}. Usually 'default'. + */ + private final String schemaName; + + /** + * The table name used by presto. + */ + private final String tableName; + + /** + * The stream name that is read from Kinesis + */ + private final String streamName; + + private final String messageDataFormat; + + @JsonCreator + public KinesisTableHandle( + @JsonProperty("schemaName") String schemaName, + @JsonProperty("tableName") String tableName, + @JsonProperty("streamName") String streamName, + @JsonProperty("messageDataFormat") String messageDataFormat) + { + this.schemaName = requireNonNull(schemaName, "schemaName is null"); + this.tableName = requireNonNull(tableName, "tableName is null"); + this.streamName = requireNonNull(streamName, "topicName is null"); + this.messageDataFormat = requireNonNull(messageDataFormat, "messageDataFormat is null"); + } + + @JsonProperty + public String getSchemaName() + { + return schemaName; + } + + @JsonProperty + public String getTableName() + { + return tableName; + } + + @JsonProperty + public String getStreamName() + { + return streamName; + } + + @JsonProperty + public String getMessageDataFormat() + { + return messageDataFormat; + } + + public SchemaTableName toSchemaTableName() + { + return new SchemaTableName(schemaName, tableName); + } + + @Override + public int hashCode() + { + return Objects.hash(schemaName, tableName, streamName, messageDataFormat); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + + KinesisTableHandle other = (KinesisTableHandle) obj; + return Objects.equals(this.schemaName, other.schemaName) + && Objects.equals(this.tableName, other.tableName) + && Objects.equals(this.streamName, other.streamName) + && Objects.equals(this.messageDataFormat, other.messageDataFormat); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("schemaName", schemaName) + .add("tableName", tableName) + .add("streamName", streamName) + .add("messageDataFormat", messageDataFormat) + .toString(); + } +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisTransactionHandle.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisTransactionHandle.java new file mode 100644 index 000000000000..355aa37845f7 --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/KinesisTransactionHandle.java @@ -0,0 +1,22 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import io.prestosql.spi.connector.ConnectorTransactionHandle; + +public enum KinesisTransactionHandle + implements ConnectorTransactionHandle +{ + INSTANCE +} diff --git a/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/s3config/S3TableConfigClient.java b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/s3config/S3TableConfigClient.java new file mode 100644 index 000000000000..645336597bdf --- /dev/null +++ b/presto-kinesis/src/main/java/io/prestosql/plugin/kinesis/s3config/S3TableConfigClient.java @@ -0,0 +1,214 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis.s3config; + +import com.amazonaws.AmazonClientException; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.AmazonS3URI; +import com.amazonaws.services.s3.model.GetObjectRequest; +import com.amazonaws.services.s3.model.ListObjectsRequest; +import com.amazonaws.services.s3.model.ObjectListing; +import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.model.S3ObjectSummary; +import com.google.common.collect.ImmutableMap; +import com.google.common.io.CharStreams; +import com.google.inject.Inject; +import io.airlift.json.JsonCodec; +import io.airlift.log.Logger; +import io.prestosql.plugin.kinesis.KinesisClientProvider; +import io.prestosql.plugin.kinesis.KinesisConfig; +import io.prestosql.plugin.kinesis.KinesisStreamDescription; +import io.prestosql.spi.connector.SchemaTableName; + +import javax.annotation.PostConstruct; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; + +import static com.google.common.base.Throwables.throwIfUnchecked; +import static java.util.Objects.requireNonNull; + +/** + * Utility class to retrieve table definitions from a common place on Amazon S3. + *

+ * This is so that we can add new tables in a central "metastore" location without + * having to update every single node with the files. + *

+ * This makes calls to Amazon AWS using the S3 client. + */ +public class S3TableConfigClient + implements Runnable +{ + private static final Logger log = Logger.get(S3TableConfigClient.class); + + public final KinesisConfig kinesisConfig; + private final KinesisClientProvider clientManager; + private final JsonCodec streamDescriptionCodec; + + private final Optional bucketUrl; + private long lastCheck; + private ScheduledFuture updateTaskHandle; + + private Map internalMap = Collections.synchronizedMap(new HashMap()); + + @Inject + public S3TableConfigClient( + KinesisConfig connectorConfig, + KinesisClientProvider clientManager, + JsonCodec jsonCodec) + { + this.kinesisConfig = requireNonNull(connectorConfig, "connector configuration object is null"); + this.clientManager = requireNonNull(clientManager, "client manager object is null"); + this.streamDescriptionCodec = requireNonNull(jsonCodec, "JSON codec object is null"); + + // If using S3 start thread that periodically looks for updates + if (kinesisConfig.getTableDescriptionLocation().startsWith("s3://")) { + this.bucketUrl = Optional.of(kinesisConfig.getTableDescriptionLocation()); + } + else { + this.bucketUrl = Optional.empty(); + } + } + + @PostConstruct + protected void startS3Updates() + { + //TODO: fi required make the update interval configurable + if (this.bucketUrl.isPresent()) { + ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(); + this.updateTaskHandle = + scheduler.scheduleAtFixedRate(() -> updateTablesFromS3(), 5, 600, TimeUnit.SECONDS); + } + } + + /** + * Indicates this class is being used and actively reading table definitions from S3. + */ + public boolean isUsingS3() + { + return bucketUrl.isPresent() && (bucketUrl.get().startsWith("s3://")); + } + + /** + * Main entry point to get table definitions from S3 using bucket and object directory + * given in the configuration. + *

+ * For safety, an immutable copy built from the internal map is returned. + */ + public Map getTablesFromS3() + { + updateTablesFromS3(); + Collection streamValues = this.internalMap.values(); + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (KinesisStreamDescription stream : streamValues) { + builder.put(new SchemaTableName(stream.getSchemaName(), stream.getTableName()), stream); + } + return builder.build(); + } + + @Override + public void run() + { + if (isUsingS3() && updateTaskHandle != null) { + updateTaskHandle.cancel(true); + } + return; + } + + /** + * Call S3 to get the most recent object list. + *

+ * This is an object list request to AWS in the given "directory". + */ + protected List getObjectSummaries() + { + AmazonS3Client s3client = this.clientManager.getS3Client(); + AmazonS3URI directoryURI = new AmazonS3URI(this.bucketUrl.get()); + + List result = new ArrayList<>(); + try { + log.info("Getting the listing of objects in the S3 table config directory: bucket %s prefix %s :", directoryURI.getBucket(), directoryURI.getKey()); + ListObjectsRequest request = new ListObjectsRequest() + .withBucketName(directoryURI.getBucket()) + .withPrefix(directoryURI.getKey() + "/") + .withDelimiter("/") + .withMaxKeys(25); + ObjectListing response; + + do { + response = s3client.listObjects(request); + + result.addAll(response.getObjectSummaries()); + request.setMarker(response.getNextMarker()); + } + while (response.isTruncated()); + + log.info("Completed getting S3 object listing."); + } + catch (AmazonClientException e) { + log.error("Skipping update as faced error fetching table descriptions from S3 " + e.toString()); + } + return result; + } + + /** + * Connect to S3 directory to look for new or updated table definitions and then + * update the map. + */ + protected void updateTablesFromS3() + { + long now = System.currentTimeMillis(); + + List summaries = this.getObjectSummaries(); + AmazonS3Client s3client = clientManager.getS3Client(); + + for (S3ObjectSummary summary : summaries) { + if (!this.internalMap.containsKey(summary.getKey()) || summary.getLastModified().getTime() >= this.lastCheck) { + // New or updated file, so we must read from AWS + if (summary.getKey().endsWith("/")) { + continue; + } + + log.info("Getting : %s - %s", summary.getBucketName(), summary.getKey()); + S3Object object = s3client.getObject(new GetObjectRequest(summary.getBucketName(), summary.getKey())); + + try (BufferedReader reader = new BufferedReader(new InputStreamReader(object.getObjectContent()))) { + KinesisStreamDescription table = streamDescriptionCodec.fromJson(CharStreams.toString(reader)); + internalMap.put(summary.getKey(), table); + log.info("Put table description into the map from %s", summary.getKey()); + } + catch (IOException iox) { + log.error("Problem reading input stream from object.", iox); + throwIfUnchecked(iox); + throw new RuntimeException(iox); + } + } + } + + log.info("Completed updating table definitions from S3."); + this.lastCheck = now; + } +} diff --git a/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestKinesisConfig.java b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestKinesisConfig.java new file mode 100644 index 000000000000..4af1604aaacd --- /dev/null +++ b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestKinesisConfig.java @@ -0,0 +1,99 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.google.common.collect.ImmutableMap; +import io.airlift.configuration.testing.ConfigAssertions; +import io.airlift.units.Duration; +import org.testng.annotations.Test; + +import java.util.Map; +import java.util.concurrent.TimeUnit; + +public class TestKinesisConfig +{ + @Test + public void testDefaults() + { + ConfigAssertions.assertRecordedDefaults(ConfigAssertions.recordDefaults(KinesisConfig.class) + .setDefaultSchema("default") + .setHideInternalColumns(true) + .setTableDescriptionLocation("etc/kinesis/") + .setAccessKey(null) + .setSecretKey(null) + .setAwsRegion("us-east-1") + .setSleepTime(new Duration(1000, TimeUnit.MILLISECONDS)) + .setFetchAttempts(2) + .setMaxBatches(600) + .setBatchSize(10000) + .setLogBatches(true) + .setIteratorFromTimestamp(true) + .setIteratorOffsetSeconds(86400) + .setCheckpointEnabled(false) + .setDynamoReadCapacity(50) + .setDynamoWriteCapacity(10) + .setCheckpointInterval(new Duration(60000, TimeUnit.MILLISECONDS)) + .setLogicalProcessName("process1") + .setIteratorNumber(0)); + } + + @Test + public void testExplicitPropertyMappings() + { + Map properties = new ImmutableMap.Builder() + .put("kinesis.table-description-location", "/var/lib/kinesis") + .put("kinesis.default-schema", "kinesis") + .put("kinesis.hide-internal-columns", "false") + .put("kinesis.access-key", "kinesis.accessKey") + .put("kinesis.secret-key", "kinesis.secretKey") + .put("kinesis.fetch-attempts", "3") + .put("kinesis.max-batches", "500") + .put("kinesis.aws-region", "us-west-1") + .put("kinesis.sleep-time", "100ms") + .put("kinesis.batch-size", "9000") + .put("kinesis.log-batches", "false") + .put("kinesis.iterator-from-timestamp", "false") + .put("kinesis.iterator-offset-seconds", "36000") + .put("kinesis.checkpoint-enabled", "true") + .put("kinesis.dynamo-read-capacity", "100") + .put("kinesis.dynamo-write-capacity", "20") + .put("kinesis.checkpoint-interval", "50000ms") + .put("kinesis.checkpoint-logical-name", "process") + .put("kinesis.iterator-number", "1") + .build(); + + KinesisConfig expected = new KinesisConfig() + .setTableDescriptionLocation("/var/lib/kinesis") + .setDefaultSchema("kinesis") + .setHideInternalColumns(false) + .setAccessKey("kinesis.accessKey") + .setSecretKey("kinesis.secretKey") + .setAwsRegion("us-west-1") + .setFetchAttempts(3) + .setMaxBatches(500) + .setSleepTime(new Duration(100, TimeUnit.MILLISECONDS)) + .setBatchSize(9000) + .setLogBatches(false) + .setIteratorFromTimestamp(false) + .setIteratorOffsetSeconds(36000) + .setCheckpointEnabled(true) + .setDynamoReadCapacity(100) + .setDynamoWriteCapacity(20) + .setCheckpointInterval(new Duration(50000, TimeUnit.MILLISECONDS)) + .setLogicalProcessName("process") + .setIteratorNumber(1); + + ConfigAssertions.assertFullMapping(properties, expected); + } +} diff --git a/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestKinesisPlugin.java b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestKinesisPlugin.java new file mode 100644 index 000000000000..9fafb44b17e8 --- /dev/null +++ b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestKinesisPlugin.java @@ -0,0 +1,85 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.google.common.collect.ImmutableMap; +import io.prestosql.plugin.kinesis.util.TestUtils; +import io.prestosql.spi.connector.Connector; +import io.prestosql.spi.connector.ConnectorFactory; +import io.prestosql.spi.connector.ConnectorMetadata; +import io.prestosql.spi.connector.ConnectorTransactionHandle; +import io.prestosql.testing.TestingConnectorContext; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +import static io.prestosql.spi.transaction.IsolationLevel.READ_COMMITTED; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +/** + * Test that the plug in API is satisfied and all of the required objects can be created. + *

+ * This will not make any calls to AWS, it merely checks that all of the Plug in SPI + * objects are in place. + */ +public class TestKinesisPlugin +{ + @Test + public ConnectorFactory testConnectorExists() + { + KinesisPlugin plugin = new KinesisPlugin(); + + // Create factory manually to double check everything is done right + Iterable iteratable = plugin.getConnectorFactories(); + + List factories = new ArrayList<>(); + for (ConnectorFactory connectorFactory : iteratable) { + factories.add(connectorFactory); + } + assertNotNull(factories); + assertEquals(factories.size(), 1); + ConnectorFactory factory = factories.get(0); + assertNotNull(factory); + return factory; + } + + @Test + public void testSpinUp() + { + String accessKey = "kinesis.accessKey"; + String secretKey = "kinesis.secretKey"; + ConnectorFactory factory = testConnectorExists(); + // Important: this has to be created before we setup the injector in the factory: + assertNotNull(factory.getHandleResolver()); + + Connector c = factory.create("kinesis.test-connector", ImmutableMap.builder() + .put("kinesis.hide-internal-columns", "false") + .put("kinesis.access-key", TestUtils.noneToBlank(accessKey)) + .put("kinesis.secret-key", TestUtils.noneToBlank(secretKey)) + .build(), new TestingConnectorContext() {}); + assertNotNull(c); + + // Verify that the key objects have been created on the connector + assertNotNull(c.getRecordSetProvider()); + assertNotNull(c.getSplitManager()); + ConnectorMetadata md = c.getMetadata(KinesisTransactionHandle.INSTANCE); + assertNotNull(md); + + ConnectorTransactionHandle handle = c.beginTransaction(READ_COMMITTED, true); + assertTrue(handle != null && handle instanceof KinesisTransactionHandle); + } +} diff --git a/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestKinesisTableDescriptionSupplier.java b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestKinesisTableDescriptionSupplier.java new file mode 100644 index 000000000000..6e2c2e0d12fd --- /dev/null +++ b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestKinesisTableDescriptionSupplier.java @@ -0,0 +1,121 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.google.common.collect.ImmutableMap; +import io.prestosql.plugin.kinesis.util.KinesisTestClientManager; +import io.prestosql.plugin.kinesis.util.MockKinesisClient; +import io.prestosql.plugin.kinesis.util.TestUtils; +import io.prestosql.spi.connector.ColumnHandle; +import io.prestosql.spi.connector.ColumnMetadata; +import io.prestosql.spi.connector.ConnectorTableMetadata; +import io.prestosql.spi.connector.ConnectorTransactionHandle; +import io.prestosql.spi.connector.SchemaTableName; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.List; +import java.util.Map; + +import static io.prestosql.testing.TestingConnectorSession.SESSION; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +/** + * Unit test for the TableDescriptionSupplier and related classes + */ +public class TestKinesisTableDescriptionSupplier +{ + private KinesisPlugin kinesisPlugin; + private KinesisConnector connector; + + @BeforeClass + public void start() + { + // Create dependent objects, including the minimal config needed for this test + Map properties = new ImmutableMap.Builder() + .put("kinesis.table-description-location", "etc/kinesis") + .put("kinesis.default-schema", "kinesis") + .put("kinesis.hide-internal-columns", "true") + .build(); + + KinesisTestClientManager kinesisTestClientManager = new KinesisTestClientManager(); + MockKinesisClient mockClient = (MockKinesisClient) kinesisTestClientManager.getClient(); + mockClient.createStream("test123", 2); + mockClient.createStream("sampleTable", 2); + KinesisConnectorFactory kinesisConnectorFactory = new TestingKinesisConnectorFactory(kinesisTestClientManager); + + kinesisPlugin = new KinesisPlugin(kinesisConnectorFactory); + connector = TestUtils.createConnector(kinesisPlugin, properties, true); + } + + @Test + public void testTableDefinition() + { + KinesisMetadata metadata = (KinesisMetadata) connector.getMetadata(new ConnectorTransactionHandle() {}); + SchemaTableName tblName = new SchemaTableName("prod", "test_table"); + KinesisTableHandle tableHandle = metadata.getTableHandle(SESSION, tblName); + assertNotNull(metadata); + SchemaTableName tableSchemaName = tableHandle.toSchemaTableName(); + assertEquals(tableSchemaName.getSchemaName(), "prod"); + assertEquals(tableSchemaName.getTableName(), "test_table"); + assertEquals(tableHandle.getStreamName(), "test_kinesis_stream"); + assertEquals(tableHandle.getMessageDataFormat(), "json"); + Map columnHandles = metadata.getColumnHandles(SESSION, tableHandle); + assertEquals(columnHandles.size(), 14); + assertEquals(columnHandles.values().stream().filter(x -> ((KinesisColumnHandle) x).isInternal()).count(), 10); + } + + @Test + public void testRelatedObjects() + { + KinesisMetadata metadata = (KinesisMetadata) connector.getMetadata(new ConnectorTransactionHandle() {}); + assertNotNull(metadata); + + SchemaTableName tblName = new SchemaTableName("prod", "test_table"); + List schemas = metadata.listSchemaNames(null); + assertEquals(schemas.size(), 1); + assertEquals(schemas.get(0), "prod"); + + KinesisTableHandle tblHandle = metadata.getTableHandle(null, tblName); + assertNotNull(tblHandle); + assertEquals(tblHandle.getSchemaName(), "prod"); + assertEquals(tblHandle.getTableName(), "test_table"); + assertEquals(tblHandle.getStreamName(), "test_kinesis_stream"); + assertEquals(tblHandle.getMessageDataFormat(), "json"); + + ConnectorTableMetadata tblMeta = metadata.getTableMetadata(null, tblHandle); + assertNotNull(tblMeta); + assertEquals(tblMeta.getTable().getSchemaName(), "prod"); + assertEquals(tblMeta.getTable().getTableName(), "test_table"); + List columnList = tblMeta.getColumns(); + assertNotNull(columnList); + + boolean foundServiceType = false; + boolean foundPartitionKey = false; + for (ColumnMetadata column : columnList) { + if (column.getName().equals("service_type")) { + foundServiceType = true; + assertEquals(column.getType().getDisplayName(), "varchar(20)"); + } + if (column.getName().equals("_partition_key")) { + foundPartitionKey = true; + assertEquals(column.getType().getDisplayName(), "varchar"); + } + } + assertTrue(foundServiceType); + assertTrue(foundPartitionKey); + } +} diff --git a/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestMinimalFunctionality.java b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestMinimalFunctionality.java new file mode 100644 index 000000000000..aaaf5c864735 --- /dev/null +++ b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestMinimalFunctionality.java @@ -0,0 +1,191 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.amazonaws.services.kinesis.model.PutRecordsRequest; +import com.amazonaws.services.kinesis.model.PutRecordsRequestEntry; +import com.amazonaws.services.kinesis.model.PutRecordsResult; +import io.airlift.log.Logger; +import io.prestosql.Session; +import io.prestosql.metadata.QualifiedObjectName; +import io.prestosql.metadata.SessionPropertyManager; +import io.prestosql.metadata.TableHandle; +import io.prestosql.plugin.kinesis.util.EmbeddedKinesisStream; +import io.prestosql.plugin.kinesis.util.TestUtils; +import io.prestosql.security.AllowAllAccessControl; +import io.prestosql.spi.QueryId; +import io.prestosql.spi.security.Identity; +import io.prestosql.spi.type.BigintType; +import io.prestosql.testing.MaterializedResult; +import io.prestosql.tests.StandaloneQueryRunner; +import org.testng.annotations.AfterClass; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Parameters; +import org.testng.annotations.Test; + +import java.io.File; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static io.prestosql.spi.type.TimeZoneKey.UTC_KEY; +import static io.prestosql.testing.assertions.Assert.assertEquals; +import static io.prestosql.transaction.TransactionBuilder.transaction; +import static java.util.Locale.ENGLISH; +import static org.testng.Assert.assertTrue; + +/** + * Note: this is an integration test that connects to AWS Kinesis. + *

+ * Only run if you have an account setup where you can create streams and put/get records. + * You may incur AWS charges if you run this test. You probably want to setup an IAM + * user for your CI server to use. + */ +@Test(singleThreaded = true) +public class TestMinimalFunctionality +{ + private static final Logger log = Logger.get(TestMinimalFunctionality.class); + + public static final Session SESSION = Session.builder(new SessionPropertyManager()) + .setIdentity(new Identity("user", Optional.empty())) + .setSource("source") + .setCatalog("kinesis") + .setSchema("default") + .setTimeZoneKey(UTC_KEY) + .setLocale(ENGLISH) + .setQueryId(new QueryId("dummy")) + .build(); + + private EmbeddedKinesisStream embeddedKinesisStream; + private String streamName; + private StandaloneQueryRunner queryRunner; + + @Parameters({ + "kinesis.awsAccessKey", + "kinesis.awsSecretKey" + }) + @BeforeClass + public void start(String accessKey, String secretKey) + throws Exception + { + embeddedKinesisStream = new EmbeddedKinesisStream(TestUtils.noneToBlank(accessKey), TestUtils.noneToBlank(secretKey)); + } + + @AfterClass + public void stop() + throws Exception + { + embeddedKinesisStream.close(); + } + + @Parameters({ + "kinesis.awsAccessKey", + "kinesis.awsSecretKey" + }) + @BeforeMethod + public void spinUp(String accessKey, String secretKey) + throws Exception + { + System.setProperty("com.amazonaws.sdk.disableCbor", "true"); + streamName = "test_" + UUID.randomUUID().toString().replaceAll("-", "_"); + + embeddedKinesisStream.createStream(2, streamName); + this.queryRunner = new StandaloneQueryRunner(SESSION); + Path tempDir = Files.createTempDirectory("tempdir"); + File baseFile = new File("src/test/resources/tableDescriptions/EmptyTable.json"); + File file = new File(tempDir.toAbsolutePath().toString() + "/" + streamName + ".json"); + + try (Stream lines = Files.lines(baseFile.toPath())) { + List replaced = lines + .map(line -> line.replaceAll("TABLE_NAME", streamName)) + .map(line -> line.replaceAll("STREAM_NAME", streamName)) + .collect(Collectors.toList()); + Files.write(file.toPath(), replaced); + } + TestUtils.installKinesisPlugin(queryRunner, tempDir.toAbsolutePath().toString(), + TestUtils.noneToBlank(accessKey), TestUtils.noneToBlank(secretKey)); + } + + private void createMessages(String streamName, long count) + throws Exception + { + PutRecordsRequest putRecordsRequest = new PutRecordsRequest(); + putRecordsRequest.setStreamName(streamName); + List putRecordsRequestEntryList = new ArrayList<>(); + for (int i = 0; i < count; i++) { + PutRecordsRequestEntry putRecordsRequestEntry = new PutRecordsRequestEntry(); + putRecordsRequestEntry.setData(ByteBuffer.wrap(UUID.randomUUID().toString().getBytes())); + putRecordsRequestEntry.setPartitionKey(Long.toString(i)); + putRecordsRequestEntryList.add(putRecordsRequestEntry); + } + + putRecordsRequest.setRecords(putRecordsRequestEntryList); + PutRecordsResult result = embeddedKinesisStream.getKinesisClient().putRecords(putRecordsRequest); + } + + @Test + public void testStreamExists() + throws Exception + { + // TODO: Was QualifiedTableName, is this OK: + QualifiedObjectName name = new QualifiedObjectName("kinesis", "default", streamName); + + transaction(queryRunner.getTransactionManager(), new AllowAllAccessControl()) + .singleStatement() + .execute(SESSION, session -> { + Optional handle = queryRunner.getServer().getMetadata().getTableHandle(session, name); + assertTrue(handle.isPresent()); + }); + } + + @Test + public void testStreamHasData() + throws Exception + { + MaterializedResult result = queryRunner.execute("SELECT COUNT(1) FROM " + streamName); + + MaterializedResult expected = MaterializedResult.resultBuilder(SESSION, BigintType.BIGINT) + .row(0L) + .build(); + + assertEquals(result, expected); + + long count = 500L; + createMessages(streamName, count); + + result = queryRunner.execute("SELECT COUNT(1) FROM " + streamName); + + expected = MaterializedResult.resultBuilder(SESSION, BigintType.BIGINT) + .row(count) + .build(); + Thread.sleep(5000); + assertEquals(result, expected); + } + + @AfterMethod + public void tearDown() + throws Exception + { + embeddedKinesisStream.delteStream(streamName); + queryRunner.close(); + } +} diff --git a/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestRecordAccess.java b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestRecordAccess.java new file mode 100644 index 000000000000..0cb34fce5298 --- /dev/null +++ b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestRecordAccess.java @@ -0,0 +1,194 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.amazonaws.services.kinesis.model.PutRecordsRequest; +import com.amazonaws.services.kinesis.model.PutRecordsRequestEntry; +import io.airlift.log.Logger; +import io.prestosql.Session; +import io.prestosql.metadata.QualifiedObjectName; +import io.prestosql.metadata.TableHandle; +import io.prestosql.plugin.kinesis.util.MockKinesisClient; +import io.prestosql.plugin.kinesis.util.TestUtils; +import io.prestosql.security.AllowAllAccessControl; +import io.prestosql.spi.type.BigintType; +import io.prestosql.spi.type.Type; +import io.prestosql.testing.MaterializedResult; +import io.prestosql.testing.MaterializedRow; +import io.prestosql.tests.StandaloneQueryRunner; +import org.testng.annotations.AfterClass; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.UUID; + +import static io.prestosql.testing.TestingSession.testSessionBuilder; +import static io.prestosql.transaction.TransactionBuilder.transaction; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +/** + * Test record access and querying along with all associated setup. + *

+ * This is a lighter weight integration test that exercises more parts of + * the plug in without requiring an actual Kinesis connection. It uses the mock + * kinesis client so no AWS activity will occur. + */ +@Test(singleThreaded = true) +public class TestRecordAccess +{ + private static final Logger log = Logger.get(TestRecordAccess.class); + + private static final Session SESSION = testSessionBuilder() + .setCatalog("kinesis") + .setSchema("default") + .build(); + + private String dummyStreamName; + private String jsonStreamName; + private StandaloneQueryRunner queryRunner; + private MockKinesisClient mockClient; + + @BeforeClass + public void start() + throws Exception + { + dummyStreamName = "test123"; + jsonStreamName = "sampleTable"; + this.queryRunner = new StandaloneQueryRunner(SESSION); + mockClient = TestUtils.installKinesisPlugin(queryRunner); + } + + @AfterClass + public void stop() + throws Exception + { + queryRunner.close(); + } + + private void createDummyMessages(String streamName, int count) + throws Exception + { + PutRecordsRequest putRecordsRequest = new PutRecordsRequest(); + putRecordsRequest.setStreamName(streamName); + List putRecordsRequestEntryList = new ArrayList<>(); + for (int i = 0; i < count; i++) { + PutRecordsRequestEntry putRecordsRequestEntry = new PutRecordsRequestEntry(); + putRecordsRequestEntry.setData(ByteBuffer.wrap(UUID.randomUUID().toString().getBytes())); + putRecordsRequestEntry.setPartitionKey(Long.toString(i)); + putRecordsRequestEntryList.add(putRecordsRequestEntry); + } + + putRecordsRequest.setRecords(putRecordsRequestEntryList); + mockClient.putRecords(putRecordsRequest); + } + + private void createJsonMessages(String streamName, int count, int idStart) + throws Exception + { + String jsonFormat = "{\"id\" : %d, \"name\" : \"%s\"}"; + PutRecordsRequest putRecordsRequest = new PutRecordsRequest(); + putRecordsRequest.setStreamName(streamName); + List putRecordsRequestEntryList = new ArrayList<>(); + for (int i = 0; i < count; i++) { + PutRecordsRequestEntry putRecordsRequestEntry = new PutRecordsRequestEntry(); + long id = idStart + i; + String name = UUID.randomUUID().toString(); + String jsonVal = String.format(jsonFormat, id, name); + + // ? with StandardCharsets.UTF_8 + putRecordsRequestEntry.setData(ByteBuffer.wrap(jsonVal.getBytes())); + putRecordsRequestEntry.setPartitionKey(Long.toString(id)); + putRecordsRequestEntryList.add(putRecordsRequestEntry); + } + + putRecordsRequest.setRecords(putRecordsRequestEntryList); + mockClient.putRecords(putRecordsRequest); + } + + @Test + public void testStreamExists() + throws Exception + { + QualifiedObjectName name = new QualifiedObjectName("kinesis", "default", dummyStreamName); + + transaction(queryRunner.getTransactionManager(), new AllowAllAccessControl()) + .singleStatement() + .execute(SESSION, session -> { + Optional handle = queryRunner.getServer().getMetadata().getTableHandle(session, name); + assertTrue(handle.isPresent()); + }); + log.info("Completed first test (access table handle)"); + } + + @Test + public void testStreamHasData() + throws Exception + { + MaterializedResult result = queryRunner.execute("Select count(1) from " + dummyStreamName); + MaterializedResult expected = MaterializedResult.resultBuilder(SESSION, BigintType.BIGINT) + .row(0) + .build(); + + assertEquals(result.getRowCount(), expected.getRowCount()); + + int count = 500; + createDummyMessages(dummyStreamName, count); + + result = queryRunner.execute("SELECT count(1) from " + dummyStreamName); + + expected = MaterializedResult.resultBuilder(SESSION, BigintType.BIGINT) + .row(count) + .build(); + + assertEquals(result.getRowCount(), expected.getRowCount()); + log.info("Completed second test (select counts)"); + } + + @Test + public void testJsonStream() + throws Exception + { + // Simple case: add a few specific items, query object and internal fields: + createJsonMessages(jsonStreamName, 4, 100); + + MaterializedResult result = queryRunner.execute("Select id, name, _shard_id, _message_length, _message from " + jsonStreamName + " where _message_length >= 1"); + assertEquals(result.getRowCount(), 4); + + List types = result.getTypes(); + assertEquals(types.size(), 5); + assertEquals(types.get(0).toString(), "bigint"); + assertEquals(types.get(1).toString(), "varchar"); + log.info("Types : " + types.toString()); + + List rows = result.getMaterializedRows(); + assertEquals(rows.size(), 4); + for (MaterializedRow row : rows) { + assertEquals(row.getFieldCount(), 5); + log.info("ROW: " + row.toString()); + } + } + + @AfterMethod + public void tearDown() + throws Exception + { + // If desired clear messages or streams depending on the test being conducted! + } +} diff --git a/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestingKinesisConnectorFactory.java b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestingKinesisConnectorFactory.java new file mode 100644 index 000000000000..b1c83f3ff707 --- /dev/null +++ b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/TestingKinesisConnectorFactory.java @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis; + +import com.google.inject.Injector; +import com.google.inject.Scopes; +import com.google.inject.TypeLiteral; +import io.airlift.bootstrap.Bootstrap; +import io.airlift.json.JsonModule; +import io.prestosql.spi.NodeManager; +import io.prestosql.spi.connector.Connector; +import io.prestosql.spi.connector.ConnectorContext; +import io.prestosql.spi.connector.SchemaTableName; +import io.prestosql.spi.type.TypeManager; + +import java.util.Map; +import java.util.function.Supplier; + +import static com.google.common.base.Throwables.throwIfUnchecked; +import static java.util.Objects.requireNonNull; + +public class TestingKinesisConnectorFactory + extends KinesisConnectorFactory +{ + KinesisClientProvider kinesisClientProvider; + + public TestingKinesisConnectorFactory(KinesisClientProvider kinesisClientProvider) + { + super(); + this.kinesisClientProvider = kinesisClientProvider; + } + + @Override + public Connector create(String catalogName, Map config, ConnectorContext context) + { + requireNonNull(catalogName, "connectorId is null"); + requireNonNull(config, "config is null"); + + try { + Bootstrap app = new Bootstrap( + new JsonModule(), + new KinesisModule(), + binder -> { + binder.bind(TypeManager.class).toInstance(context.getTypeManager()); + binder.bind(NodeManager.class).toInstance(context.getNodeManager()); + binder.bind(KinesisHandleResolver.class).toInstance(new KinesisHandleResolver()); + binder.bind(KinesisClientProvider.class).toInstance(kinesisClientProvider); + binder.bind(new TypeLiteral>>() {}).to(KinesisTableDescriptionSupplier.class).in(Scopes.SINGLETON); + }); + + Injector injector = app.strictConfig() + .doNotInitializeLogging() + .setRequiredConfigurationProperties(config) + .initialize(); + + KinesisConnector connector = injector.getInstance(KinesisConnector.class); + return connector; + } + catch (Exception e) { + throwIfUnchecked(e); + throw new RuntimeException(e); + } + } +} diff --git a/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/s3config/TestS3TableConfigClient.java b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/s3config/TestS3TableConfigClient.java new file mode 100644 index 000000000000..4f14abb91691 --- /dev/null +++ b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/s3config/TestS3TableConfigClient.java @@ -0,0 +1,123 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis.s3config; + +import com.amazonaws.services.s3.AmazonS3URI; +import com.google.common.collect.ImmutableMap; +import io.airlift.log.Logger; +import io.prestosql.plugin.kinesis.KinesisConnector; +import io.prestosql.plugin.kinesis.KinesisMetadata; +import io.prestosql.plugin.kinesis.KinesisPlugin; +import io.prestosql.plugin.kinesis.KinesisTableHandle; +import io.prestosql.plugin.kinesis.util.TestUtils; +import io.prestosql.spi.connector.ColumnHandle; +import io.prestosql.spi.connector.ConnectorTransactionHandle; +import io.prestosql.spi.connector.SchemaTableName; +import org.testng.annotations.Parameters; +import org.testng.annotations.Test; + +import java.util.Map; + +import static io.prestosql.testing.TestingConnectorSession.SESSION; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +@Test(singleThreaded = true) +public class TestS3TableConfigClient +{ + private static final Logger log = Logger.get(TestS3TableConfigClient.class); + + @Test + public void testS3URIValues() + { + // Verify that S3URI values will work: + AmazonS3URI uri1 = new AmazonS3URI("s3://our.data.warehouse/prod/client_actions"); + assertNotNull(uri1.getKey()); + assertNotNull(uri1.getBucket()); + + assertEquals(uri1.toString(), "s3://our.data.warehouse/prod/client_actions"); + assertEquals(uri1.getBucket(), "our.data.warehouse"); + assertEquals(uri1.getKey(), "prod/client_actions"); + assertTrue(uri1.getRegion() == null); + + // show info: + log.info("Tested out URI1 : " + uri1.toString()); + + AmazonS3URI uri2 = new AmazonS3URI("s3://some.big.bucket/long/complex/path"); + assertNotNull(uri2.getKey()); + assertNotNull(uri2.getBucket()); + + assertEquals(uri2.toString(), "s3://some.big.bucket/long/complex/path"); + assertEquals(uri2.getBucket(), "some.big.bucket"); + assertEquals(uri2.getKey(), "long/complex/path"); + assertTrue(uri2.getRegion() == null); + + // info: + log.info("Tested out URI2 : " + uri2.toString()); + + AmazonS3URI uri3 = new AmazonS3URI("s3://presto.kinesis.config/unit-test/presto-kinesis"); + assertNotNull(uri3.getKey()); + assertNotNull(uri3.getBucket()); + + assertEquals(uri3.toString(), "s3://presto.kinesis.config/unit-test/presto-kinesis"); + assertEquals(uri3.getBucket(), "presto.kinesis.config"); + assertEquals(uri3.getKey(), "unit-test/presto-kinesis"); + } + + @Parameters({ + "kinesis.test-table-description-location", + "kinesis.awsAccessKey", + "kinesis.awsSecretKey" + }) + @Test + public void testTableReading(String tableDescriptionS3, String accessKey, String secretKey) + { + // To run this test: setup an S3 bucket with a folder for unit testing, and put + // MinimalTable.json in that folder. + + // Create dependent objects, including the minimal config needed for this test + Map properties = new ImmutableMap.Builder() + .put("kinesis.table-description-location", tableDescriptionS3) + .put("kinesis.default-schema", "kinesis") + .put("kinesis.hide-internal-columns", "false") + .put("kinesis.access-key", TestUtils.noneToBlank(accessKey)) + .put("kinesis.secret-key", TestUtils.noneToBlank(secretKey)) + .build(); + + KinesisPlugin kinesisPlugin = new KinesisPlugin(); + KinesisConnector kinesisConnector = TestUtils.createConnector(kinesisPlugin, properties, false); + + // Sleep for 10 seconds to ensure that we've loaded the tables: + try { + Thread.sleep(10000); + log.info("done sleeping, will now try to read the tables."); + } + catch (InterruptedException e) { + log.error("interrupted ..."); + } + + KinesisMetadata metadata = (KinesisMetadata) kinesisConnector.getMetadata(new ConnectorTransactionHandle() {}); + SchemaTableName tblName = new SchemaTableName("default", "test123"); + KinesisTableHandle tableHandle = metadata.getTableHandle(SESSION, tblName); + assertNotNull(metadata); + SchemaTableName tableSchemaName = tableHandle.toSchemaTableName(); + assertEquals(tableSchemaName.getSchemaName(), "default"); + assertEquals(tableSchemaName.getTableName(), "test123"); + assertEquals(tableHandle.getStreamName(), "test123"); + assertEquals(tableHandle.getMessageDataFormat(), "json"); + Map columnHandles = metadata.getColumnHandles(SESSION, tableHandle); + assertEquals(columnHandles.size(), 12); + } +} diff --git a/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/EmbeddedKinesisStream.java b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/EmbeddedKinesisStream.java new file mode 100644 index 000000000000..e8c3b9989c81 --- /dev/null +++ b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/EmbeddedKinesisStream.java @@ -0,0 +1,102 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis.util; + +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.services.kinesis.AmazonKinesisClient; +import com.amazonaws.services.kinesis.model.CreateStreamRequest; +import com.amazonaws.services.kinesis.model.DeleteStreamRequest; +import com.amazonaws.services.kinesis.model.DescribeStreamRequest; +import com.amazonaws.services.kinesis.model.StreamDescription; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static java.util.concurrent.TimeUnit.MILLISECONDS; + +public class EmbeddedKinesisStream + implements Closeable +{ + private BasicAWSCredentials awsCredentials; + private AmazonKinesisClient amazonKinesisClient; + private List streamsCreated = new ArrayList(); + + public EmbeddedKinesisStream(String accessKey, String secretKey) + { + this.awsCredentials = new BasicAWSCredentials(accessKey, secretKey); + this.amazonKinesisClient = new AmazonKinesisClient(awsCredentials); + } + + @Override + public void close() + throws IOException + { + } + + public void createStreams(String... streamNames) + { + createStreams(2, streamNames); + } + + public void createStreams(int shardCount, String... streamNames) + { + for (String streamName : streamNames) { + createStream(shardCount, streamName); + } + } + + private String checkStreamStatus(String streamName) + { + DescribeStreamRequest describeStreamRequest = new DescribeStreamRequest(); + describeStreamRequest.setStreamName(streamName); + + StreamDescription streamDescription = amazonKinesisClient.describeStream(describeStreamRequest).getStreamDescription(); + return streamDescription.getStreamStatus(); + } + + public void createStream(int shardCount, String streamName) + { + CreateStreamRequest createStreamRequest = new CreateStreamRequest(); + createStreamRequest.setStreamName(streamName); + createStreamRequest.setShardCount(shardCount); + + amazonKinesisClient.createStream(createStreamRequest); + try { + while (checkStreamStatus(streamName).equals("ACTIVE") == false) { + MILLISECONDS.sleep(1000); + } + } + catch (Exception e) { + } + + streamsCreated.add(streamName); + } + + public AmazonKinesisClient getKinesisClient() + { + return amazonKinesisClient; + } + + public void delteStream(String streamName) + { + DeleteStreamRequest deleteStreamRequest = new DeleteStreamRequest(); + deleteStreamRequest.setStreamName(streamName); + amazonKinesisClient.deleteStream(deleteStreamRequest); + if (streamsCreated.contains(streamName)) { + streamsCreated.remove(streamName); + } + } +} diff --git a/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/KinesisTestClientManager.java b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/KinesisTestClientManager.java new file mode 100644 index 000000000000..aaa44ce0b9e6 --- /dev/null +++ b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/KinesisTestClientManager.java @@ -0,0 +1,54 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis.util; + +import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; +import com.amazonaws.services.kinesis.AmazonKinesisClient; +import com.amazonaws.services.s3.AmazonS3Client; +import io.prestosql.plugin.kinesis.KinesisClientProvider; + +/** + * Test implementation of KinesisClientProvider that incorporates a mock Kinesis client. + */ +public class KinesisTestClientManager + implements KinesisClientProvider +{ + private AmazonKinesisClient client = new MockKinesisClient(); + private final AmazonDynamoDBClient dynamoDBClient; + private final AmazonS3Client amazonS3Client; + + public KinesisTestClientManager() + { + this.dynamoDBClient = new AmazonDynamoDBClient(); + this.amazonS3Client = new AmazonS3Client(); + } + + @Override + public AmazonKinesisClient getClient() + { + return client; + } + + @Override + public AmazonDynamoDBClient getDynamoDbClient() + { + return this.dynamoDBClient; + } + + @Override + public AmazonS3Client getS3Client() + { + return amazonS3Client; + } +} diff --git a/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/MockKinesisClient.java b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/MockKinesisClient.java new file mode 100644 index 000000000000..fb57a6d180c3 --- /dev/null +++ b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/MockKinesisClient.java @@ -0,0 +1,561 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis.util; + +import com.amazonaws.AmazonClientException; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.AmazonWebServiceRequest; +import com.amazonaws.ResponseMetadata; +import com.amazonaws.services.kinesis.AmazonKinesisClient; +import com.amazonaws.services.kinesis.model.CreateStreamRequest; +import com.amazonaws.services.kinesis.model.CreateStreamResult; +import com.amazonaws.services.kinesis.model.DescribeStreamRequest; +import com.amazonaws.services.kinesis.model.DescribeStreamResult; +import com.amazonaws.services.kinesis.model.GetRecordsRequest; +import com.amazonaws.services.kinesis.model.GetRecordsResult; +import com.amazonaws.services.kinesis.model.GetShardIteratorRequest; +import com.amazonaws.services.kinesis.model.GetShardIteratorResult; +import com.amazonaws.services.kinesis.model.ListStreamsRequest; +import com.amazonaws.services.kinesis.model.ListStreamsResult; +import com.amazonaws.services.kinesis.model.ListTagsForStreamRequest; +import com.amazonaws.services.kinesis.model.ListTagsForStreamResult; +import com.amazonaws.services.kinesis.model.PutRecordRequest; +import com.amazonaws.services.kinesis.model.PutRecordResult; +import com.amazonaws.services.kinesis.model.PutRecordsRequest; +import com.amazonaws.services.kinesis.model.PutRecordsRequestEntry; +import com.amazonaws.services.kinesis.model.PutRecordsResult; +import com.amazonaws.services.kinesis.model.PutRecordsResultEntry; +import com.amazonaws.services.kinesis.model.Record; +import com.amazonaws.services.kinesis.model.SequenceNumberRange; +import com.amazonaws.services.kinesis.model.Shard; +import com.amazonaws.services.kinesis.model.StreamDescription; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; + +/** + * Mock kinesis client for testing that is primarily used for reading from the + * stream as we do here in Presto. + *

+ * This is to help prove that the API is being used correctly and debug any + * issues that arise without incurring AWS load and charges. It is far from a complete + * implementation of Kinesis. + *

+ */ +public class MockKinesisClient + extends AmazonKinesisClient +{ + private List streams = new ArrayList(); + + public static class InternalShard + extends Shard + { + private List recs = new ArrayList<>(); + private String streamName = ""; + private int index; + + public InternalShard(String streamName, int index) + { + super(); + this.streamName = streamName; + this.index = index; + this.setShardId(this.streamName + "_" + this.index); + } + + public List getRecords() + { + return recs; + } + + public List getRecordsFrom(ShardIterator iterator) + { + List returnRecords = new ArrayList(); + + for (Record record : this.recs) { + if (Integer.valueOf(record.getSequenceNumber()) >= iterator.recordIndex) { + returnRecords.add(record); + } + } + + return returnRecords; + } + + public String getStreamName() + { + return streamName; + } + + public int getIndex() + { + return index; + } + + public void addRecord(Record rec) + { + recs.add(rec); + } + + public void clearRecords() + { + recs.clear(); + } + } + + public static class InternalStream + { + private String streamName = ""; + private String streamAmazonResourceName = ""; + private String streamStatus = "CREATING"; + private int retentionPeriodHours = 24; + private List shards = new ArrayList(); + private int sequenceNo = 100; + private int nextShard; + + public InternalStream(String streamName, int shardCount, boolean isActive) + { + this.streamName = streamName; + this.streamAmazonResourceName = "local:fake.stream:" + streamName; + if (isActive) { + this.streamStatus = "ACTIVE"; + } + + for (int i = 0; i < shardCount; i++) { + InternalShard newShard = new InternalShard(this.streamName, i); + newShard.setSequenceNumberRange((new SequenceNumberRange()).withStartingSequenceNumber("100").withEndingSequenceNumber("999")); + this.shards.add(newShard); + } + } + + public String getStreamName() + { + return streamName; + } + + public String getStreamAmazonResourceName() + { + return streamAmazonResourceName; + } + + public String getStreamStatus() + { + return streamStatus; + } + + public List getShards() + { + return shards; + } + + public List getShardsFrom(String afterShardId) + { + String[] comps = afterShardId.split("_"); + if (comps.length == 2) { + List returnArray = new ArrayList(); + int afterIndex = Integer.parseInt(comps[1]); + if (shards.size() > afterIndex + 1) { + for (InternalShard shard : shards) { + if (shard.getIndex() > afterIndex) { + returnArray.add(shard); + } + } + } + + return returnArray; + } + else { + return new ArrayList(); + } + } + + public void activate() + { + this.streamStatus = "ACTIVE"; + } + + public PutRecordResult putRecord(ByteBuffer data, String partitionKey) + { + // Create record and insert into the shards. Initially just do it + // on a round robin basis. + long timestamp = System.currentTimeMillis() - 50000; + Record record = new Record(); + record = record.withData(data).withPartitionKey(partitionKey).withSequenceNumber(String.valueOf(sequenceNo)); + record.setApproximateArrivalTimestamp(new Date(timestamp)); + + if (nextShard == shards.size()) { + nextShard = 0; + } + InternalShard shard = shards.get(nextShard); + shard.addRecord(record); + + PutRecordResult result = new PutRecordResult(); + result.setSequenceNumber(String.valueOf(sequenceNo)); + result.setShardId(shard.getShardId()); + + nextShard++; + sequenceNo++; + + return result; + } + + public void clearRecords() + { + for (InternalShard shard : this.shards) { + shard.clearRecords(); + } + } + } + + public static class ShardIterator + { + public String streamId = ""; + public int shardIndex; + public int recordIndex; + + public ShardIterator(String streamId, int shardIndex, int recordIndex) + { + this.streamId = streamId; + this.shardIndex = shardIndex; + this.recordIndex = recordIndex; + } + + public String makeString() + { + return this.streamId + "_" + this.shardIndex + "_" + this.recordIndex; + } + + public static ShardIterator fromStreamAndShard(String streamName, String shardId) + { + ShardIterator newInst = null; + String[] comps = shardId.split("_"); + if (streamName.equals(comps[0]) && comps[1].matches("[0-9]+")) { + newInst = new ShardIterator(comps[0], Integer.parseInt(comps[1]), 0); + } + + return newInst; + } + + public static ShardIterator fromString(String input) + { + ShardIterator newInst = null; + String[] comps = input.split("_"); + if (comps.length == 3) { + if (comps[1].matches("[0-9]+") && comps[2].matches("[0-9]+")) { + newInst = new ShardIterator(comps[0], Integer.parseInt(comps[1]), Integer.parseInt(comps[2])); + } + } + + return newInst; + } + } + + public MockKinesisClient() + { + super(); + } + + protected InternalStream getStream(String name) + { + InternalStream foundStream = null; + for (InternalStream stream : this.streams) { + if (stream.getStreamName().equals(name)) { + foundStream = stream; + break; + } + } + return foundStream; + } + + protected List getShards(InternalStream theStream) + { + List externalList = new ArrayList(); + for (InternalShard intshard : theStream.getShards()) { + externalList.add(intshard); + } + + return externalList; + } + + protected List getShards(InternalStream theStream, String fromShardId) + { + List externalList = new ArrayList(); + for (InternalShard intshard : theStream.getShardsFrom(fromShardId)) { + externalList.add(intshard); + } + + return externalList; + } + + @Override + public PutRecordResult putRecord(PutRecordRequest putRecordRequest) + throws AmazonClientException + { + // Setup method to add a new record: + InternalStream theStream = this.getStream(putRecordRequest.getStreamName()); + if (theStream != null) { + return theStream.putRecord(putRecordRequest.getData(), putRecordRequest.getPartitionKey()); + } + else { + throw new AmazonClientException("This stream does not exist!"); + } + } + + @Override + public CreateStreamResult createStream(CreateStreamRequest createStreamRequest) + throws AmazonClientException + { + // Setup method to create a new stream: + InternalStream stream = new InternalStream(createStreamRequest.getStreamName(), createStreamRequest.getShardCount(), true); + this.streams.add(stream); + return new CreateStreamResult(); + } + + @Override + public CreateStreamResult createStream(String streamName, Integer integer) + throws AmazonClientException + { + return this.createStream((new CreateStreamRequest()).withStreamName(streamName).withShardCount(integer)); + } + + @Override + public PutRecordsResult putRecords(PutRecordsRequest putRecordsRequest) + throws AmazonClientException + { + // Setup method to add a batch of new records: + InternalStream theStream = this.getStream(putRecordsRequest.getStreamName()); + if (theStream != null) { + PutRecordsResult result = new PutRecordsResult(); + List resultList = new ArrayList(); + for (PutRecordsRequestEntry entry : putRecordsRequest.getRecords()) { + PutRecordResult putResult = theStream.putRecord(entry.getData(), entry.getPartitionKey()); + resultList.add((new PutRecordsResultEntry()).withShardId(putResult.getShardId()).withSequenceNumber(putResult.getSequenceNumber())); + } + + result.setRecords(resultList); + return result; + } + else { + throw new AmazonClientException("This stream does not exist!"); + } + } + + @Override + public DescribeStreamResult describeStream(DescribeStreamRequest describeStreamRequest) + throws AmazonClientException + { + InternalStream theStream = this.getStream(describeStreamRequest.getStreamName()); + if (theStream != null) { + StreamDescription desc = new StreamDescription(); + desc = desc.withStreamName(theStream.getStreamName()).withStreamStatus(theStream.getStreamStatus()).withStreamARN(theStream.getStreamAmazonResourceName()); + + if (describeStreamRequest.getExclusiveStartShardId() == null || describeStreamRequest.getExclusiveStartShardId().isEmpty()) { + desc.setShards(this.getShards(theStream)); + desc.setHasMoreShards(false); + } + else { + // Filter from given shard Id, or may not have any more + String startId = describeStreamRequest.getExclusiveStartShardId(); + desc.setShards(this.getShards(theStream, startId)); + desc.setHasMoreShards(false); + } + + DescribeStreamResult result = new DescribeStreamResult(); + result = result.withStreamDescription(desc); + return result; + } + else { + throw new AmazonClientException("This stream does not exist!"); + } + } + + @Override + public GetShardIteratorResult getShardIterator(GetShardIteratorRequest getShardIteratorRequest) + throws AmazonClientException + { + ShardIterator iter = ShardIterator.fromStreamAndShard(getShardIteratorRequest.getStreamName(), getShardIteratorRequest.getShardId()); + if (iter != null) { + InternalStream theStream = this.getStream(iter.streamId); + if (theStream != null) { + String seqAsString = getShardIteratorRequest.getStartingSequenceNumber(); + if (seqAsString != null && !seqAsString.isEmpty() && getShardIteratorRequest.getShardIteratorType().equals("AFTER_SEQUENCE_NUMBER")) { + int sequence = Integer.parseInt(seqAsString); + iter.recordIndex = sequence + 1; + } + else { + iter.recordIndex = 100; + } + + GetShardIteratorResult result = new GetShardIteratorResult(); + return result.withShardIterator(iter.makeString()); + } + else { + throw new AmazonClientException("Unknown stream or bad shard iterator!"); + } + } + else { + throw new AmazonClientException("Bad stream or shard iterator!"); + } + } + + @Override + public GetRecordsResult getRecords(GetRecordsRequest getRecordsRequest) + throws AmazonClientException + { + ShardIterator iterator = ShardIterator.fromString(getRecordsRequest.getShardIterator()); + if (iterator == null) { + throw new AmazonClientException("Bad shard iterator."); + } + + // TODO: incorporate maximum batch size (getRecordsRequest.getLimit) + GetRecordsResult result = null; + InternalStream stream = this.getStream(iterator.streamId); + if (stream != null) { + InternalShard shard = stream.getShards().get(iterator.shardIndex); + + if (iterator.recordIndex == 100) { + result = new GetRecordsResult(); + List recs = shard.getRecords(); + result.setRecords(recs); // NOTE: getting all for now + result.setNextShardIterator(getNextShardIterator(iterator, recs).makeString()); + result.setMillisBehindLatest(100L); + } + else { + result = new GetRecordsResult(); + List recs = shard.getRecordsFrom(iterator); + result.setRecords(recs); // may be empty + result.setNextShardIterator(getNextShardIterator(iterator, recs).makeString()); + result.setMillisBehindLatest(100L); + } + } + else { + throw new AmazonClientException("Unknown stream or bad shard iterator."); + } + + return result; + } + + protected ShardIterator getNextShardIterator(ShardIterator previousIter, List records) + { + ShardIterator newIter = null; + if (records.size() == 0) { + newIter = previousIter; + } + else { + Record rec = records.get(records.size() - 1); + int lastSeq = Integer.valueOf(rec.getSequenceNumber()); + newIter = new ShardIterator(previousIter.streamId, previousIter.shardIndex, lastSeq + 1); + } + + return newIter; + } + + //// Unsupported methods + + @Override + public ListTagsForStreamResult listTagsForStream(ListTagsForStreamRequest listTagsForStreamRequest) + throws AmazonClientException + { + return null; + } + + @Override + public ListStreamsResult listStreams(ListStreamsRequest listStreamsRequest) + throws AmazonClientException + { + return null; + } + + @Override + public ListStreamsResult listStreams() + throws AmazonServiceException, AmazonClientException + { + return null; + } + + @Override + public PutRecordResult putRecord(String s, ByteBuffer byteBuffer, String s1) + throws AmazonServiceException, AmazonClientException + { + throw new UnsupportedOperationException("MockKinesisClient doesn't support this."); + } + + @Override + public PutRecordResult putRecord(String s, ByteBuffer byteBuffer, String s1, String s2) + throws AmazonServiceException, AmazonClientException + { + throw new UnsupportedOperationException("MockKinesisClient doesn't support this."); + } + + @Override + public DescribeStreamResult describeStream(String streamName) + throws AmazonServiceException, AmazonClientException + { + return null; + } + + @Override + public DescribeStreamResult describeStream(String streamName, String exclusiveStartShardId) + throws AmazonServiceException, AmazonClientException + { + return null; + } + + @Override + public DescribeStreamResult describeStream(String streamName, Integer integer, String exclusiveStartShardId) + throws AmazonServiceException, AmazonClientException + { + return null; + } + + @Override + public GetShardIteratorResult getShardIterator(String streamName, String shardId, String shardIteratorType) + throws AmazonServiceException, AmazonClientException + { + return null; + } + + @Override + public GetShardIteratorResult getShardIterator(String streamName, String shardId, String shardIteratorType, String startingSequenceNumber) + throws AmazonServiceException, AmazonClientException + { + return null; + } + + @Override + public ListStreamsResult listStreams(String exclusiveStartStreamName) + throws AmazonServiceException, AmazonClientException + { + return null; + } + + @Override + public ListStreamsResult listStreams(Integer limit, String exclusiveStartStreamName) + throws AmazonServiceException, AmazonClientException + { + return null; + } + + @Override + public void shutdown() + { + return; // Nothing to shutdown here + } + + @Override + public ResponseMetadata getCachedResponseMetadata(AmazonWebServiceRequest amazonWebServiceRequest) + { + return null; + } +} diff --git a/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/TestUtils.java b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/TestUtils.java new file mode 100644 index 000000000000..2dcc950ee6bb --- /dev/null +++ b/presto-kinesis/src/test/java/io/prestosql/plugin/kinesis/util/TestUtils.java @@ -0,0 +1,143 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.kinesis.util; + +import com.google.common.collect.ImmutableMap; +import io.prestosql.plugin.kinesis.KinesisClientProvider; +import io.prestosql.plugin.kinesis.KinesisConnector; +import io.prestosql.plugin.kinesis.KinesisConnectorFactory; +import io.prestosql.plugin.kinesis.KinesisPlugin; +import io.prestosql.plugin.kinesis.KinesisStreamDescription; +import io.prestosql.plugin.kinesis.KinesisStreamFieldDescription; +import io.prestosql.plugin.kinesis.KinesisStreamFieldGroup; +import io.prestosql.plugin.kinesis.TestingKinesisConnectorFactory; +import io.prestosql.spi.connector.Connector; +import io.prestosql.spi.connector.ConnectorFactory; +import io.prestosql.spi.connector.SchemaTableName; +import io.prestosql.spi.type.BigintType; +import io.prestosql.spi.type.VarcharType; +import io.prestosql.testing.QueryRunner; +import io.prestosql.testing.TestingConnectorContext; + +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static java.util.Objects.requireNonNull; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +public class TestUtils +{ + public static final String NONE_KEY = "NONE"; + + private TestUtils() {} + + public static KinesisConnector createConnector(KinesisPlugin plugin, Map properties, boolean withMockClient) + { + requireNonNull(plugin, "Plugin instance should not be null"); + requireNonNull(properties, "Properties map should not be null (can be empty)"); + ConnectorFactory factory = plugin.getConnectorFactories().iterator().next(); + assertNotNull(factory); + + Connector connector = factory.create("kinesis", properties, new TestingConnectorContext() {}); + return (KinesisConnector) connector; + } + + /** + * Install the plugin into the given query runner, using the mock client and the given table descriptions. + */ + public static MockKinesisClient installKinesisPlugin(QueryRunner queryRunner) + { + KinesisTestClientManager kinesisTestClientManager = new KinesisTestClientManager(); + MockKinesisClient mockClient = (MockKinesisClient) kinesisTestClientManager.getClient(); + mockClient.createStream("test123", 2); + mockClient.createStream("sampleTable", 2); + KinesisConnectorFactory kinesisConnectorFactory = new TestingKinesisConnectorFactory(kinesisTestClientManager); + + KinesisPlugin kinesisPlugin = new KinesisPlugin(kinesisConnectorFactory); + queryRunner.installPlugin(kinesisPlugin); + + Map kinesisConfig = ImmutableMap.of( + "kinesis.default-schema", "default", + "kinesis.access-key", "", + "kinesis.secret-key", "", + "kinesis.table-description-location", "src/test/resources/tableDescriptions"); + queryRunner.createCatalog("kinesis", "kinesis", kinesisConfig); + + return mockClient; + } + + /** + * Install the plug in into the given query runner, using normal setup but with the given table descriptions. + *

+ * Note that this uses the actual client and will incur charges from AWS when run. Mainly for full + * integration tests. + * + * @param queryRunner + * @param tableDescriptionLocation + * @param accessKey + * @param secretKey + */ + public static void installKinesisPlugin(QueryRunner queryRunner, String tableDescriptionLocation, String accessKey, String secretKey) + { + KinesisPlugin kinesisPlugin = new KinesisPlugin(); + queryRunner.installPlugin(kinesisPlugin); + + Map kinesisConfig = ImmutableMap.of( + "kinesis.default-schema", "default", + "kinesis.access-key", accessKey, + "kinesis.secret-key", secretKey, + "kinesis.table-description-location", tableDescriptionLocation); + + queryRunner.createCatalog("kinesis", "kinesis", kinesisConfig); + } + + public static Map.Entry createEmptyStreamDescription(String streamName, SchemaTableName schemaTableName) + { + return new AbstractMap.SimpleImmutableEntry<>( + schemaTableName, + new KinesisStreamDescription(schemaTableName.getTableName(), schemaTableName.getSchemaName(), streamName, null)); + } + + public static Map.Entry createSimpleJsonStreamDescription(String streamName, SchemaTableName schemaTableName) + { + // Format: {"id" : 1324, "name" : "some string"} + List fieldList = new ArrayList(); + fieldList.add(new KinesisStreamFieldDescription("id", BigintType.BIGINT, "id", "comment", null, null, false)); + fieldList.add(new KinesisStreamFieldDescription("name", VarcharType.VARCHAR, "name", "comment", null, null, false)); + KinesisStreamFieldGroup group = new KinesisStreamFieldGroup("json", fieldList); + + KinesisStreamDescription streamDescription = new KinesisStreamDescription(schemaTableName.getTableName(), schemaTableName.getSchemaName(), streamName, group); + return new AbstractMap.SimpleImmutableEntry<>(schemaTableName, streamDescription); + } + + public static String noneToBlank(String awsValue) + { + if (awsValue.equals(NONE_KEY)) { + return ""; + } + else { + return awsValue; + } + } + + public static KinesisTestClientManager getTestClientManager(KinesisClientProvider kinesisClientProvider) + { + requireNonNull(kinesisClientProvider, "Injector is missing in getTestClientManager"); + assertTrue(kinesisClientProvider instanceof KinesisTestClientManager); + return (KinesisTestClientManager) kinesisClientProvider; + } +} diff --git a/presto-kinesis/src/test/resources/decoder/json/event.json b/presto-kinesis/src/test/resources/decoder/json/event.json new file mode 100644 index 000000000000..b5bfa9ab5275 --- /dev/null +++ b/presto-kinesis/src/test/resources/decoder/json/event.json @@ -0,0 +1,32 @@ +{ + "created_at": "Mon Jul 28 20:38:07 +0000 2016", + "id": 493857959588286460, + "timestamp": 1450214872847, + "source": "otherworld", + "environment": { + "windowDims": { + "w": 661, + "h": 904 + }, + "screenDims": { + "w": 1680, + "h": 1050 + }, + "browser": { + "ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36", + "name": "Chrome", + "version": "47.0.2526.80", + "os": "OS X", + "osVersion": "10.11.2" + } + }, + "user": { + "email": "joeblow@wherever.com", + "handle": "joeblow" + }, + "tags": [ + "tag1", + "tag2", + "tag3" + ] +} \ No newline at end of file diff --git a/presto-kinesis/src/test/resources/decoder/json/message.json b/presto-kinesis/src/test/resources/decoder/json/message.json new file mode 100644 index 000000000000..bbab321b1bfd --- /dev/null +++ b/presto-kinesis/src/test/resources/decoder/json/message.json @@ -0,0 +1,80 @@ +{ + "created_at": "Mon Jul 28 20:38:07 +0000 2014", + "id": 493857959588286460, + "id_str": "493857959588286465", + "text": "Lots of Important Preseason Football Dates on the Horizon - EKU Sports: Lots of Important Preseason Football D... http://t.co/F7iz6APFTW", + "source": "twitterfeed", + "truncated": false, + "in_reply_to_status_id": null, + "in_reply_to_status_id_str": null, + "in_reply_to_user_id": null, + "in_reply_to_user_id_str": null, + "in_reply_to_screen_name": null, + "user": { + "id": 98247748, + "id_str": "98247748", + "name": "Eastern KY News", + "screen_name": "EKentuckyNews", + "location": "Eastern Kentucky", + "url": null, + "description": "Your Eastern Kentucky News Source.", + "protected": false, + "verified": false, + "followers_count": 305, + "friends_count": 186, + "listed_count": 8, + "favourites_count": 0, + "statuses_count": 7630, + "created_at": "Mon Dec 21 01:17:22 +0000 2009", + "utc_offset": -14400, + "time_zone": "Eastern Time (US & Canada)", + "geo_enabled": true, + "lang": "en", + "contributors_enabled": false, + "is_translator": false, + "profile_background_color": "C6E2EE", + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", + "profile_background_tile": false, + "profile_link_color": "1F98C7", + "profile_sidebar_border_color": "C6E2EE", + "profile_sidebar_fill_color": "DAECF4", + "profile_text_color": "8F433C", + "profile_use_background_image": true, + "profile_image_url": "http://pbs.twimg.com/profile_images/1297233295/Kentucky_at_Work_logo_normal.jpeg", + "profile_image_url_https": "https://pbs.twimg.com/profile_images/1297233295/Kentucky_at_Work_logo_normal.jpeg", + "default_profile": false, + "default_profile_image": false, + "following": null, + "follow_request_sent": null, + "notifications": null + }, + "geo": null, + "coordinates": null, + "place": null, + "contributors": null, + "retweet_count": 0, + "favorite_count": 0, + "entities": { + "hashtags": [], + "trends": [], + "urls": [ + { + "url": "http://t.co/F7iz6APFTW", + "expanded_url": "http://bit.ly/1rTEYRM", + "display_url": "bit.ly/1rTEYRM", + "indices": [ + 114, + 136 + ] + } + ], + "user_mentions": [], + "symbols": [] + }, + "favorited": false, + "retweeted": false, + "possibly_sensitive": false, + "filter_level": "medium", + "lang": "en" +} diff --git a/presto-kinesis/src/test/resources/tableDescriptions/EmptyTable.json b/presto-kinesis/src/test/resources/tableDescriptions/EmptyTable.json new file mode 100644 index 000000000000..e1d61dd1bc8e --- /dev/null +++ b/presto-kinesis/src/test/resources/tableDescriptions/EmptyTable.json @@ -0,0 +1,5 @@ +{ + "tableName": "TABLE_NAME", + "schemaName": "default", + "streamName": "STREAM_NAME" +} \ No newline at end of file diff --git a/presto-kinesis/src/test/resources/tableDescriptions/MinimalTable.json b/presto-kinesis/src/test/resources/tableDescriptions/MinimalTable.json new file mode 100644 index 000000000000..700a0ddc60b5 --- /dev/null +++ b/presto-kinesis/src/test/resources/tableDescriptions/MinimalTable.json @@ -0,0 +1,23 @@ +{ + "tableName": "test123", + "schemaName": "default", + "streamName": "test123", + "message": { + "dataFormat": "json", + "fields": [{ + "name": "id", + "type": "BIGINT", + "mapping": "id", + "comment": "comment", + "hidden": "false" + }, + { + "name": "name", + "type": "VARCHAR", + "mapping": "name", + "comment": "comment", + "hidden": "false" + } + ] + } +} \ No newline at end of file diff --git a/presto-kinesis/src/test/resources/tableDescriptions/SampleTable.json b/presto-kinesis/src/test/resources/tableDescriptions/SampleTable.json new file mode 100644 index 000000000000..5ad525a2bb85 --- /dev/null +++ b/presto-kinesis/src/test/resources/tableDescriptions/SampleTable.json @@ -0,0 +1,23 @@ +{ + "tableName": "sampleTable", + "schemaName": "default", + "streamName": "sampleTable", + "message": { + "dataFormat": "json", + "fields": [{ + "name": "id", + "type": "BIGINT", + "mapping": "id", + "comment": "comment", + "hidden": "false" + }, + { + "name": "name", + "type": "VARCHAR", + "mapping": "name", + "comment": "comment", + "hidden": "false" + } + ] + } +} \ No newline at end of file diff --git a/presto-kinesis/src/test/resources/tableDescriptions/TableWithMessage.json b/presto-kinesis/src/test/resources/tableDescriptions/TableWithMessage.json new file mode 100644 index 000000000000..6d7b6ab7eb42 --- /dev/null +++ b/presto-kinesis/src/test/resources/tableDescriptions/TableWithMessage.json @@ -0,0 +1,23 @@ +{ + "tableName": "test_table", + "schemaName": "prod", + "streamName": "test_kinesis_stream", + "message": { + "dataFormat": "json", + "fields": [{ + "name": "id", + "type": "BIGINT", + "mapping": "id", + "comment": "comment", + "hidden": "false" + }, + { + "name": "name", + "type": "VARCHAR", + "mapping": "name", + "comment": "comment", + "hidden": "false" + } + ] + } +} \ No newline at end of file diff --git a/presto-server/src/main/provisio/presto.xml b/presto-server/src/main/provisio/presto.xml index a3e9249104b8..197701deff9c 100644 --- a/presto-server/src/main/provisio/presto.xml +++ b/presto-server/src/main/provisio/presto.xml @@ -211,4 +211,10 @@ + + + + + +