From aa25176e77c7643d8c7ca5c6ded797d9fe315aee Mon Sep 17 00:00:00 2001 From: Josep Prat Date: Fri, 21 May 2021 17:27:42 +0200 Subject: [PATCH] MINOR: Kafka Streams code samples formating unification (#10651) Code samples are now unified and correctly formatted. Samples under Streams use consistently the prism library. Reviewers: Bruno Cadonna --- .../developer-guide/app-reset-tool.html | 28 +- .../developer-guide/config-streams.html | 299 ++- docs/streams/developer-guide/datatypes.html | 62 +- docs/streams/developer-guide/dsl-api.html | 1896 ++++++++--------- .../developer-guide/dsl-topology-naming.html | 186 +- .../developer-guide/interactive-queries.html | 426 ++-- docs/streams/developer-guide/memory-mgmt.html | 99 +- .../developer-guide/processor-api.html | 208 +- docs/streams/developer-guide/running-app.html | 7 +- docs/streams/developer-guide/security.html | 63 +- docs/streams/developer-guide/testing.html | 42 +- .../developer-guide/write-streams.html | 140 +- docs/streams/index.html | 172 +- docs/streams/tutorial.html | 491 +++-- 14 files changed, 1994 insertions(+), 2125 deletions(-) diff --git a/docs/streams/developer-guide/app-reset-tool.html b/docs/streams/developer-guide/app-reset-tool.html index d6d07c27c97f2..597b662a5296f 100644 --- a/docs/streams/developer-guide/app-reset-tool.html +++ b/docs/streams/developer-guide/app-reset-tool.html @@ -78,44 +78,43 @@

Step 1: Run the application reset tool

Invoke the application reset tool from the command line

Warning! This tool makes irreversible changes to your application. It is strongly recommended that you run this once with --dry-run to preview your changes before making them.

-
<path-to-kafka>/bin/kafka-streams-application-reset
-
+
<path-to-kafka>/bin/kafka-streams-application-reset

The tool accepts the following parameters:

-
Option (* = required)                 Description
+            
Option (* = required)                 Description
 ---------------------                 -----------
 * --application-id <String: id>       The Kafka Streams application ID
-                                        (application.id).
+                                        (application.id).
 --bootstrap-servers <String: urls>    Comma-separated list of broker urls with
                                         format: HOST1:PORT1,HOST2:PORT2
-                                        (default: localhost:9092)
---by-duration <String: urls>      Reset offsets to offset by duration from
-                                        current timestamp. Format: 'PnDTnHnMnS'
+                                        (default: localhost:9092)
+--by-duration <String: urls>          Reset offsets to offset by duration from
+                                        current timestamp. Format: 'PnDTnHnMnS'
 --config-file <String: file name>     Property file containing configs to be
                                         passed to admin clients and embedded
                                         consumer.
 --dry-run                             Display the actions that would be
                                         performed without executing the reset
                                         commands.
---from-file <String: urls>        Reset offsets to values defined in CSV
+--from-file <String: urls>            Reset offsets to values defined in CSV
                                         file.
 --input-topics <String: list>         Comma-separated list of user input
                                         topics. For these topics, the tool will
                                         reset the offset to the earliest
                                         available offset.
 --intermediate-topics <String: list>  Comma-separated list of intermediate user
-                                        topics (topics used in the through()
-                                        method). For these topics, the tool
+                                        topics (topics used in the through()
+                                        method). For these topics, the tool
                                         will skip to the end.
 --internal-topics <String: list>      Comma-separated list of internal topics
                                         to delete. Must be a subset of the
                                         internal topics marked for deletion by
                                         the default behaviour (do a dry-run without
                                         this option to view these topics).
---shift-by <Long: number-of-offsets> Reset offsets shifting current offset by
-                                        'n', where 'n' can be positive or
+--shift-by <Long: number-of-offsets>  Reset offsets shifting current offset by
+                                        'n', where 'n' can be positive or
                                         negative
 --to-datetime <String>                Reset offsets to offset from datetime.
-                                        Format: 'YYYY-MM-DDTHH:mm:SS.sss'
+                                        Format: 'YYYY-MM-DDTHH:mm:SS.sss'
 --to-earliest                         Reset offsets to earliest offset.
 --to-latest                           Reset offsets to latest offset.
 --to-offset <Long>                    Reset offsets to a specific offset.
@@ -125,8 +124,7 @@ 

Step 1: Run the application reset toolCreate a java.util.Properties instance.

  • Set the parameters. For example:

    -
    import java.util.Properties;
    -import org.apache.kafka.streams.StreamsConfig;
    -
    -Properties settings = new Properties();
    -// Set a few key parameters
    -settings.put(StreamsConfig.APPLICATION_ID_CONFIG, "my-first-streams-application");
    -settings.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka-broker1:9092");
    -// Any further settings
    -settings.put(... , ...);
    -
    +
    import java.util.Properties;
    +import org.apache.kafka.streams.StreamsConfig;
    +
    +Properties settings = new Properties();
    +// Set a few key parameters
    +settings.put(StreamsConfig.APPLICATION_ID_CONFIG, "my-first-streams-application");
    +settings.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka-broker1:9092");
    +// Any further settings
    +settings.put(... , ...);
  • @@ -396,31 +395,31 @@

    acceptable.recovery.lag -
                  public class SendToDeadLetterQueueExceptionHandler implements DeserializationExceptionHandler {
    -                  KafkaProducer<byte[], byte[]> dlqProducer;
    -                  String dlqTopic;
    +              
    public class SendToDeadLetterQueueExceptionHandler implements DeserializationExceptionHandler {
    +    KafkaProducer<byte[], byte[]> dlqProducer;
    +    String dlqTopic;
     
    -                  @Override
    -                  public DeserializationHandlerResponse handle(final ProcessorContext context,
    -                                                               final ConsumerRecord<byte[], byte[]> record,
    -                                                               final Exception exception) {
    +    @Override
    +    public DeserializationHandlerResponse handle(final ProcessorContext context,
    +                                                 final ConsumerRecord<byte[], byte[]> record,
    +                                                 final Exception exception) {
     
    -                      log.warn("Exception caught during Deserialization, sending to the dead queue topic; " +
    -                          "taskId: {}, topic: {}, partition: {}, offset: {}",
    -                          context.taskId(), record.topic(), record.partition(), record.offset(),
    -                          exception);
    +        log.warn("Exception caught during Deserialization, sending to the dead queue topic; " +
    +            "taskId: {}, topic: {}, partition: {}, offset: {}",
    +            context.taskId(), record.topic(), record.partition(), record.offset(),
    +            exception);
     
    -                      dlqProducer.send(new ProducerRecord<>(dlqTopic, record.timestamp(), record.key(), record.value(), record.headers())).get();
    +        dlqProducer.send(new ProducerRecord<>(dlqTopic, record.timestamp(), record.key(), record.value(), record.headers())).get();
     
    -                      return DeserializationHandlerResponse.CONTINUE;
    -                  }
    +        return DeserializationHandlerResponse.CONTINUE;
    +    }
     
    -                  @Override
    -                  public void configure(final Map<String, ?> configs) {
    -                      dlqProducer = .. // get a producer from the configs map
    -                      dlqTopic = .. // get the topic name from the configs map
    -                  }
    -              }
    + @Override + public void configure(final Map<String, ?> configs) { + dlqProducer = .. // get a producer from the configs map + dlqTopic = .. // get the topic name from the configs map + } +}

    @@ -434,32 +433,31 @@

    acceptable.recovery.lag

    acceptable.recovery.lagpreviousTimestamp (i.e., a Kafka Streams timestamp estimation). Here is an example of a custom TimestampExtractor implementation:

    -
    import org.apache.kafka.clients.consumer.ConsumerRecord;
    -import org.apache.kafka.streams.processor.TimestampExtractor;
    -
    -// Extracts the embedded timestamp of a record (giving you "event-time" semantics).
    -public class MyEventTimeExtractor implements TimestampExtractor {
    -
    -  @Override
    -  public long extract(final ConsumerRecord<Object, Object> record, final long previousTimestamp) {
    -    // `Foo` is your own custom class, which we assume has a method that returns
    -    // the embedded timestamp (milliseconds since midnight, January 1, 1970 UTC).
    -    long timestamp = -1;
    -    final Foo myPojo = (Foo) record.value();
    -    if (myPojo != null) {
    -      timestamp = myPojo.getTimestampInMillis();
    -    }
    -    if (timestamp < 0) {
    -      // Invalid timestamp!  Attempt to estimate a new timestamp,
    -      // otherwise fall back to wall-clock time (processing-time).
    -      if (previousTimestamp >= 0) {
    -        return previousTimestamp;
    -      } else {
    -        return System.currentTimeMillis();
    -      }
    -    }
    -  }
    -
    -}
    -
    -
    +
    import org.apache.kafka.clients.consumer.ConsumerRecord;
    +import org.apache.kafka.streams.processor.TimestampExtractor;
    +
    +// Extracts the embedded timestamp of a record (giving you "event-time" semantics).
    +public class MyEventTimeExtractor implements TimestampExtractor {
    +
    +  @Override
    +  public long extract(final ConsumerRecord<Object, Object> record, final long previousTimestamp) {
    +    // `Foo` is your own custom class, which we assume has a method that returns
    +    // the embedded timestamp (milliseconds since midnight, January 1, 1970 UTC).
    +    long timestamp = -1;
    +    final Foo myPojo = (Foo) record.value();
    +    if (myPojo != null) {
    +      timestamp = myPojo.getTimestampInMillis();
    +    }
    +    if (timestamp < 0) {
    +      // Invalid timestamp!  Attempt to estimate a new timestamp,
    +      // otherwise fall back to wall-clock time (processing-time).
    +      if (previousTimestamp >= 0) {
    +        return previousTimestamp;
    +      } else {
    +        return System.currentTimeMillis();
    +      }
    +    }
    +  }
    +
    +}

    You would then define the custom timestamp extractor in your Streams configuration as follows:

    -
    import java.util.Properties;
    -import org.apache.kafka.streams.StreamsConfig;
    +              
    import java.util.Properties;
    +import org.apache.kafka.streams.StreamsConfig;
     
    -Properties streamsConfiguration = new Properties();
    -streamsConfiguration.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, MyEventTimeExtractor.class);
    -
    +Properties streamsConfiguration = new Properties(); +streamsConfiguration.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, MyEventTimeExtractor.class);

    @@ -707,38 +702,33 @@

    probing.rebalance.interval.ms

    The RocksDB configuration. Kafka Streams uses RocksDB as the default storage engine for persistent stores. To change the default configuration for RocksDB, you can implement RocksDBConfigSetter and provide your custom class via rocksdb.config.setter.

    Here is an example that adjusts the memory size consumed by RocksDB.

    -
    -
    -public static class CustomRocksDBConfig implements RocksDBConfigSetter {
    -    // This object should be a member variable so it can be closed in RocksDBConfigSetter#close.
    -    private org.rocksdb.Cache cache = new org.rocksdb.LRUCache(16 * 1024L * 1024L);
    -
    -    @Override
    -    public void setConfig(final String storeName, final Options options, final Map<String, Object> configs) {
    -        // See #1 below.
    -        BlockBasedTableConfig tableConfig = (BlockBasedTableConfig) options.tableFormatConfig();
    -        tableConfig.setBlockCache(cache);
    -        // See #2 below.
    -        tableConfig.setBlockSize(16 * 1024L);
    -        // See #3 below.
    -        tableConfig.setCacheIndexAndFilterBlocks(true);
    -        options.setTableFormatConfig(tableConfig);
    -        // See #4 below.
    -        options.setMaxWriteBufferNumber(2);
    -    }
    -
    -    @Override
    -    public void close(final String storeName, final Options options) {
    -        // See #5 below.
    -        cache.close();
    -    }
    -}
    -
    -Properties streamsSettings = new Properties();
    -streamsConfig.put(StreamsConfig.ROCKSDB_CONFIG_SETTER_CLASS_CONFIG, CustomRocksDBConfig.class);
    -
    -
    -
    +
    public static class CustomRocksDBConfig implements RocksDBConfigSetter {
    +    // This object should be a member variable so it can be closed in RocksDBConfigSetter#close.
    +    private org.rocksdb.Cache cache = new org.rocksdb.LRUCache(16 * 1024L * 1024L);
    +
    +    @Override
    +    public void setConfig(final String storeName, final Options options, final Map<String, Object> configs) {
    +        // See #1 below.
    +        BlockBasedTableConfig tableConfig = (BlockBasedTableConfig) options.tableFormatConfig();
    +        tableConfig.setBlockCache(cache);
    +        // See #2 below.
    +        tableConfig.setBlockSize(16 * 1024L);
    +        // See #3 below.
    +        tableConfig.setCacheIndexAndFilterBlocks(true);
    +        options.setTableFormatConfig(tableConfig);
    +        // See #4 below.
    +        options.setMaxWriteBufferNumber(2);
    +    }
    +
    +    @Override
    +    public void close(final String storeName, final Options options) {
    +        // See #5 below.
    +        cache.close();
    +    }
    +}
    +
    +Properties streamsSettings = new Properties();
    +streamsConfig.put(StreamsConfig.ROCKSDB_CONFIG_SETTER_CLASS_CONFIG, CustomRocksDBConfig.class);
    Notes for example:
      @@ -798,12 +788,12 @@

      Kafka consumers, producer and admin clie and admin client that are used internally. The consumer, producer and admin client settings are defined by specifying parameters in a StreamsConfig instance.

      In this example, the Kafka consumer session timeout is configured to be 60000 milliseconds in the Streams settings:

      -
      Properties streamsSettings = new Properties();
      -// Example of a "normal" setting for Kafka Streams
      -streamsSettings.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka-broker-01:9092");
      -// Customize the Kafka consumer settings of your Streams application
      -streamsSettings.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, 60000);
      -
      +
      Properties streamsSettings = new Properties();
      +// Example of a "normal" setting for Kafka Streams
      +streamsSettings.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka-broker-01:9092");
      +// Customize the Kafka consumer settings of your Streams application
      +streamsSettings.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, 60000);
      +

      Naming

      @@ -811,18 +801,17 @@

      Namingreceive.buffer.bytes are used to configure TCP buffers; request.timeout.ms and retry.backoff.ms control retries for client request; retries are used to configure how many retries are allowed when handling retriable errors from broker request responses. You can avoid duplicate names by prefix parameter names with consumer., producer., or admin. (e.g., consumer.send.buffer.bytes and producer.send.buffer.bytes).

      -
      Properties streamsSettings = new Properties();
      -// same value for consumer, producer, and admin client
      -streamsSettings.put("PARAMETER_NAME", "value");
      -// different values for consumer and producer
      -streamsSettings.put("consumer.PARAMETER_NAME", "consumer-value");
      -streamsSettings.put("producer.PARAMETER_NAME", "producer-value");
      -streamsSettings.put("admin.PARAMETER_NAME", "admin-value");
      -// alternatively, you can use
      -streamsSettings.put(StreamsConfig.consumerPrefix("PARAMETER_NAME"), "consumer-value");
      -streamsSettings.put(StreamsConfig.producerPrefix("PARAMETER_NAME"), "producer-value");
      -streamsSettings.put(StreamsConfig.adminClientPrefix("PARAMETER_NAME"), "admin-value");
      -
      +
      Properties streamsSettings = new Properties();
      +// same value for consumer, producer, and admin client
      +streamsSettings.put("PARAMETER_NAME", "value");
      +// different values for consumer and producer
      +streamsSettings.put("consumer.PARAMETER_NAME", "consumer-value");
      +streamsSettings.put("producer.PARAMETER_NAME", "producer-value");
      +streamsSettings.put("admin.PARAMETER_NAME", "admin-value");
      +// alternatively, you can use
      +streamsSettings.put(StreamsConfig.consumerPrefix("PARAMETER_NAME"), "consumer-value");
      +streamsSettings.put(StreamsConfig.producerPrefix("PARAMETER_NAME"), "producer-value");
      +streamsSettings.put(StreamsConfig.adminClientPrefix("PARAMETER_NAME"), "admin-value");

      You could further separate consumer configuration by adding different prefixes:

      For example, if you only want to set restore consumer config without touching other consumers' settings, you could simply use restore.consumer. to set the config.

      -
      Properties streamsSettings = new Properties();
      -// same config value for all consumer types
      -streamsSettings.put("consumer.PARAMETER_NAME", "general-consumer-value");
      -// set a different restore consumer config. This would make restore consumer take restore-consumer-value,
      -// while main consumer and global consumer stay with general-consumer-value
      -streamsSettings.put("restore.consumer.PARAMETER_NAME", "restore-consumer-value");
      -// alternatively, you can use
      -streamsSettings.put(StreamsConfig.restoreConsumerPrefix("PARAMETER_NAME"), "restore-consumer-value");
      -
      -
      +
      Properties streamsSettings = new Properties();
      +// same config value for all consumer types
      +streamsSettings.put("consumer.PARAMETER_NAME", "general-consumer-value");
      +// set a different restore consumer config. This would make restore consumer take restore-consumer-value,
      +// while main consumer and global consumer stay with general-consumer-value
      +streamsSettings.put("restore.consumer.PARAMETER_NAME", "restore-consumer-value");
      +// alternatively, you can use
      +streamsSettings.put(StreamsConfig.restoreConsumerPrefix("PARAMETER_NAME"), "restore-consumer-value");

      Same applied to main.consumer. and main.consumer., if you only want to specify one consumer type config.

      Additionally, to configure the internal repartition/changelog topics, you could use the topic. prefix, followed by any of the standard topic configs.

      -
      Properties streamsSettings = new Properties();
      -// Override default for both changelog and repartition topics
      -streamsSettings.put("topic.PARAMETER_NAME", "topic-value");
      -// alternatively, you can use
      -streamsSettings.put(StreamsConfig.topicPrefix("PARAMETER_NAME"), "topic-value");
      -
      -
      +
      Properties streamsSettings = new Properties();
      +// Override default for both changelog and repartition topics
      +streamsSettings.put("topic.PARAMETER_NAME", "topic-value");
      +// alternatively, you can use
      +streamsSettings.put(StreamsConfig.topicPrefix("PARAMETER_NAME"), "topic-value");

      @@ -977,11 +962,11 @@

      acksreplication.factor

      -
      Properties streamsSettings = new Properties();
      -streamsSettings.put(StreamsConfig.REPLICATION_FACTOR_CONFIG, 3);
      -streamsSettings.put(StreamsConfig.topicPrefix(TopicConfig.MIN_IN_SYNC_REPLICAS_CONFIG), 2);
      -streamsSettings.put(StreamsConfig.producerPrefix(ProducerConfig.ACKS_CONFIG), "all");
      -
      +
      Properties streamsSettings = new Properties();
      +streamsSettings.put(StreamsConfig.REPLICATION_FACTOR_CONFIG, 3);
      +streamsSettings.put(StreamsConfig.topicPrefix(TopicConfig.MIN_IN_SYNC_REPLICAS_CONFIG), 2);
      +streamsSettings.put(StreamsConfig.producerPrefix(ProducerConfig.ACKS_CONFIG), "all");
      +
      diff --git a/docs/streams/developer-guide/datatypes.html b/docs/streams/developer-guide/datatypes.html index 2201b5b69d35a..f527021dc4911 100644 --- a/docs/streams/developer-guide/datatypes.html +++ b/docs/streams/developer-guide/datatypes.html @@ -55,40 +55,37 @@

      Configuring SerDes

      SerDes specified in the Streams configuration are used as the default in your Kafka Streams application.

      -
      import org.apache.kafka.common.serialization.Serdes;
      -import org.apache.kafka.streams.StreamsConfig;
      -
      -Properties settings = new Properties();
      -// Default serde for keys of data records (here: built-in serde for String type)
      -settings.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
      -// Default serde for values of data records (here: built-in serde for Long type)
      -settings.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Long().getClass().getName());
      -
      +
      import org.apache.kafka.common.serialization.Serdes;
      +import org.apache.kafka.streams.StreamsConfig;
      +
      +Properties settings = new Properties();
      +// Default serde for keys of data records (here: built-in serde for String type)
      +settings.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
      +// Default serde for values of data records (here: built-in serde for Long type)
      +settings.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Long().getClass().getName());

      Overriding default SerDes

      You can also specify SerDes explicitly by passing them to the appropriate API methods, which overrides the default serde settings:

      -
      import org.apache.kafka.common.serialization.Serde;
      -import org.apache.kafka.common.serialization.Serdes;
      +      
      import org.apache.kafka.common.serialization.Serde;
      +import org.apache.kafka.common.serialization.Serdes;
       
      -final Serde<String> stringSerde = Serdes.String();
      -final Serde<Long> longSerde = Serdes.Long();
      +final Serde<String> stringSerde = Serdes.String();
      +final Serde<Long> longSerde = Serdes.Long();
       
      -// The stream userCountByRegion has type `String` for record keys (for region)
      -// and type `Long` for record values (for user counts).
      -KStream<String, Long> userCountByRegion = ...;
      -userCountByRegion.to("RegionCountsTopic", Produced.with(stringSerde, longSerde));
      -
      +// The stream userCountByRegion has type `String` for record keys (for region) +// and type `Long` for record values (for user counts). +KStream<String, Long> userCountByRegion = ...; +userCountByRegion.to("RegionCountsTopic", Produced.with(stringSerde, longSerde));

      If you want to override serdes selectively, i.e., keep the defaults for some fields, then don’t specify the serde whenever you want to leverage the default settings:

      -
      import org.apache.kafka.common.serialization.Serde;
      -import org.apache.kafka.common.serialization.Serdes;
      -
      -// Use the default serializer for record keys (here: region as String) by not specifying the key serde,
      -// but override the default serializer for record values (here: userCount as Long).
      -final Serde<Long> longSerde = Serdes.Long();
      -KStream<String, Long> userCountByRegion = ...;
      -userCountByRegion.to("RegionCountsTopic", Produced.valueSerde(Serdes.Long()));
      -
      +
      import org.apache.kafka.common.serialization.Serde;
      +import org.apache.kafka.common.serialization.Serdes;
      +
      +// Use the default serializer for record keys (here: region as String) by not specifying the key serde,
      +// but override the default serializer for record values (here: userCount as Long).
      +final Serde<Long> longSerde = Serdes.Long();
      +KStream<String, Long> userCountByRegion = ...;
      +userCountByRegion.to("RegionCountsTopic", Produced.valueSerde(Serdes.Long()));

      If some of your incoming records are corrupted or ill-formatted, they will cause the deserializer class to report an error. Since 1.0.x we have introduced an DeserializationExceptionHandler interface which allows you to customize how to handle such records. The customized implementation of the interface can be specified via the StreamsConfig. @@ -101,12 +98,11 @@

      Overriding default SerDes

      Apache Kafka includes several built-in serde implementations for Java primitives and basic types such as byte[] in its kafka-clients Maven artifact:

      -
      <dependency>
      -    <groupId>org.apache.kafka</groupId>
      -    <artifactId>kafka-clients</artifactId>
      -    <version>{{fullDotVersion}}</version>
      -</dependency>
      -
      +
      <dependency>
      +    <groupId>org.apache.kafka</groupId>
      +    <artifactId>kafka-clients</artifactId>
      +    <version>2.8.0</version>
      +</dependency>

      This artifact provides the following serde implementations under the package org.apache.kafka.common.serialization, which you can leverage when e.g., defining default serializers in your Streams configuration.

      diff --git a/docs/streams/developer-guide/dsl-api.html b/docs/streams/developer-guide/dsl-api.html index 2add551d61137..d2bce047868c7 100644 --- a/docs/streams/developer-guide/dsl-api.html +++ b/docs/streams/developer-guide/dsl-api.html @@ -242,19 +242,18 @@

      import org.apache.kafka.common.serialization.Serdes;
      -import org.apache.kafka.streams.StreamsBuilder;
      -import org.apache.kafka.streams.kstream.KStream;
      -
      -StreamsBuilder builder = new StreamsBuilder();
      -
      -KStream<String, Long> wordCounts = builder.stream(
      -    "word-counts-input-topic", /* input topic */
      -    Consumed.with(
      -      Serdes.String(), /* key serde */
      -      Serdes.Long()   /* value serde */
      -    );
      - +
      import org.apache.kafka.common.serialization.Serdes;
      +import org.apache.kafka.streams.StreamsBuilder;
      +import org.apache.kafka.streams.kstream.KStream;
      +
      +StreamsBuilder builder = new StreamsBuilder();
      +
      +KStream<String, Long> wordCounts = builder.stream(
      +    "word-counts-input-topic", /* input topic */
      +    Consumed.with(
      +      Serdes.String(), /* key serde */
      +      Serdes.Long()   /* value serde */
      +    );

      If you do not specify SerDes explicitly, the default SerDes from the configuration are used.

      You must specify SerDes explicitly if the key or value types of the records in the Kafka input @@ -303,20 +302,19 @@

      state store that backs the table). This is required for supporting interactive queries against the table. When a name is not provided the table will not be queryable and an internal name will be provided for the state store.

      -
      import org.apache.kafka.common.serialization.Serdes;
      -import org.apache.kafka.streams.StreamsBuilder;
      -import org.apache.kafka.streams.kstream.GlobalKTable;
      -
      -StreamsBuilder builder = new StreamsBuilder();
      -
      -GlobalKTable<String, Long> wordCounts = builder.globalTable(
      -    "word-counts-input-topic",
      -    Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as(
      -      "word-counts-global-store" /* table/store name */)
      -      .withKeySerde(Serdes.String()) /* key serde */
      -      .withValueSerde(Serdes.Long()) /* value serde */
      -    );
      -
      +
      import org.apache.kafka.common.serialization.Serdes;
      +import org.apache.kafka.streams.StreamsBuilder;
      +import org.apache.kafka.streams.kstream.GlobalKTable;
      +
      +StreamsBuilder builder = new StreamsBuilder();
      +
      +GlobalKTable<String, Long> wordCounts = builder.globalTable(
      +    "word-counts-input-topic",
      +    Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as(
      +      "word-counts-global-store" /* table/store name */)
      +      .withKeySerde(Serdes.String()) /* key serde */
      +      .withValueSerde(Serdes.Long()) /* value serde */
      +    );

      You must specify SerDes explicitly if the key or value types of the records in the Kafka input topics do not match the configured default SerDes. For information about configuring default SerDes, available SerDes, and implementing your own custom SerDes see Data Types and Serialization.

      @@ -371,23 +369,21 @@

      -KStream stream = ...; -Map> branches = - stream.split(Named.as("Branch-")) - .branch((key, value) -> key.startsWith("A"), /* first predicate */ - Branched.as("A")) - .branch((key, value) -> key.startsWith("B"), /* second predicate */ - Branched.as("B")) -.defaultBranch(Branched.as("C")) +
      KStream<String, Long> stream = ...;
      +Map<String, KStream<String, Long>> branches =
      +    stream.split(Named.as("Branch-"))
      +        .branch((key, value) -> key.startsWith("A"),  /* first predicate  */
      +             Branched.as("A"))
      +        .branch((key, value) -> key.startsWith("B"),  /* second predicate */
      +             Branched.as("B"))
      +.defaultBranch(Branched.as("C"))
       );
       
      -// KStream branches.get("Branch-A") contains all records whose keys start with "A"
      -// KStream branches.get("Branch-B") contains all records whose keys start with "B"
      -// KStream branches.get("Branch-C") contains all other records
      +// KStream branches.get("Branch-A") contains all records whose keys start with "A"
      +// KStream branches.get("Branch-B") contains all records whose keys start with "B"
      +// KStream branches.get("Branch-C") contains all other records
       
      -// Java 7 example: cf. `filter` for how to create `Predicate` instances     
      -                            
      +// Java 7 example: cf. `filter` for how to create `Predicate` instances

      @@ -861,9 +838,8 @@

      repartition() operation always triggers repartitioning of the stream, as a result it can be used with embedded Processor API methods (like transform() et al.) that do not trigger auto repartitioning when key changing operation is performed beforehand. -
      KStream<byte[], String> stream = ... ;
      -KStream<byte[], String> repartitionedStream = stream.repartition(Repartitioned.numberOfPartitions(10));
      -
      +
      KStream<byte[], String> stream = ... ;
      +KStream<byte[], String> repartitionedStream = stream.repartition(Repartitioned.numberOfPartitions(10));

      @@ -898,45 +874,43 @@

      // Assume the record values represent lines of text.  For the sake of this example, you can ignore
      -// whatever may be stored in the record keys.
      -KStream<String, String> textLines = ...;
      -
      -KStream<String, Long> wordCounts = textLines
      -    // Split each text line, by whitespace, into words.  The text lines are the record
      -    // values, i.e. you can ignore whatever data is in the record keys and thus invoke
      -    // `flatMapValues` instead of the more generic `flatMap`.
      -    .flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
      -    // Group the stream by word to ensure the key of the record is the word.
      -    .groupBy((key, word) -> word)
      -    // Count the occurrences of each word (record key).
      -    //
      -    // This will change the stream type from `KGroupedStream<String, String>` to
      -    // `KTable<String, Long>` (word -> count).
      -    .count()
      -    // Convert the `KTable<String, Long>` into a `KStream<String, Long>`.
      -    .toStream();
      - +
      // Assume the record values represent lines of text.  For the sake of this example, you can ignore
      +// whatever may be stored in the record keys.
      +KStream<String, String> textLines = ...;
      +
      +KStream<String, Long> wordCounts = textLines
      +    // Split each text line, by whitespace, into words.  The text lines are the record
      +    // values, i.e. you can ignore whatever data is in the record keys and thus invoke
      +    // `flatMapValues` instead of the more generic `flatMap`.
      +    .flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
      +    // Group the stream by word to ensure the key of the record is the word.
      +    .groupBy((key, word) -> word)
      +    // Count the occurrences of each word (record key).
      +    //
      +    // This will change the stream type from `KGroupedStream<String, String>` to
      +    // `KTable<String, Long>` (word -> count).
      +    .count()
      +    // Convert the `KTable<String, Long>` into a `KStream<String, Long>`.
      +    .toStream();

      WordCount example in Java 7:

      -
      // Code below is equivalent to the previous Java 8+ example above.
      -KStream<String, String> textLines = ...;
      -
      -KStream<String, Long> wordCounts = textLines
      -    .flatMapValues(new ValueMapper<String, Iterable<String>>() {
      -        @Override
      -        public Iterable<String> apply(String value) {
      -            return Arrays.asList(value.toLowerCase().split("\\W+"));
      -        }
      -    })
      -    .groupBy(new KeyValueMapper<String, String, String>>() {
      -        @Override
      -        public String apply(String key, String word) {
      -            return word;
      -        }
      -    })
      -    .count()
      -    .toStream();
      -
      +
      // Code below is equivalent to the previous Java 8+ example above.
      +KStream<String, String> textLines = ...;
      +
      +KStream<String, Long> wordCounts = textLines
      +    .flatMapValues(new ValueMapper<String, Iterable<String>>() {
      +        @Override
      +        public Iterable<String> apply(String value) {
      +            return Arrays.asList(value.toLowerCase().split("\\W+"));
      +        }
      +    })
      +    .groupBy(new KeyValueMapper<String, String, String>>() {
      +        @Override
      +        public String apply(String key, String word) {
      +            return word;
      +        }
      +    })
      +    .count()
      +    .toStream();

      Aggregating

      After records are grouped by key via groupByKey or @@ -973,69 +947,68 @@

      aggValue = 0)

      Several variants of aggregate exist, see Javadocs for details.

      -
      KGroupedStream<byte[], String> groupedStream = ...;
      -KGroupedTable<byte[], String> groupedTable = ...;
      -
      -// Java 8+ examples, using lambda expressions
      -
      -// Aggregating a KGroupedStream (note how the value type changes from String to Long)
      -KTable<byte[], Long> aggregatedStream = groupedStream.aggregate(
      -    () -> 0L, /* initializer */
      -    (aggKey, newValue, aggValue) -> aggValue + newValue.length(), /* adder */
      -    Materialized.as("aggregated-stream-store") /* state store name */
      -        .withValueSerde(Serdes.Long()); /* serde for aggregate value */
      -
      -// Aggregating a KGroupedTable (note how the value type changes from String to Long)
      -KTable<byte[], Long> aggregatedTable = groupedTable.aggregate(
      -    () -> 0L, /* initializer */
      -    (aggKey, newValue, aggValue) -> aggValue + newValue.length(), /* adder */
      -    (aggKey, oldValue, aggValue) -> aggValue - oldValue.length(), /* subtractor */
      -    Materialized.as("aggregated-table-store") /* state store name */
      -	.withValueSerde(Serdes.Long()) /* serde for aggregate value */
      -
      -
      -// Java 7 examples
      -
      -// Aggregating a KGroupedStream (note how the value type changes from String to Long)
      -KTable<byte[], Long> aggregatedStream = groupedStream.aggregate(
      -    new Initializer<Long>() { /* initializer */
      -      @Override
      -      public Long apply() {
      -        return 0L;
      -      }
      -    },
      -    new Aggregator<byte[], String, Long>() { /* adder */
      -      @Override
      -      public Long apply(byte[] aggKey, String newValue, Long aggValue) {
      -        return aggValue + newValue.length();
      -      }
      -    },
      -    Materialized.as("aggregated-stream-store")
      -        .withValueSerde(Serdes.Long());
      -
      -// Aggregating a KGroupedTable (note how the value type changes from String to Long)
      -KTable<byte[], Long> aggregatedTable = groupedTable.aggregate(
      -    new Initializer<Long>() { /* initializer */
      -      @Override
      -      public Long apply() {
      -        return 0L;
      -      }
      -    },
      -    new Aggregator<byte[], String, Long>() { /* adder */
      -      @Override
      -      public Long apply(byte[] aggKey, String newValue, Long aggValue) {
      -        return aggValue + newValue.length();
      -      }
      -    },
      -    new Aggregator<byte[], String, Long>() { /* subtractor */
      -      @Override
      -      public Long apply(byte[] aggKey, String oldValue, Long aggValue) {
      -        return aggValue - oldValue.length();
      -      }
      -    },
      -    Materialized.as("aggregated-stream-store")
      -        .withValueSerde(Serdes.Long());
      -
      +
      KGroupedStream<byte[], String> groupedStream = ...;
      +KGroupedTable<byte[], String> groupedTable = ...;
      +
      +// Java 8+ examples, using lambda expressions
      +
      +// Aggregating a KGroupedStream (note how the value type changes from String to Long)
      +KTable<byte[], Long> aggregatedStream = groupedStream.aggregate(
      +    () -> 0L, /* initializer */
      +    (aggKey, newValue, aggValue) -> aggValue + newValue.length(), /* adder */
      +    Materialized.as("aggregated-stream-store") /* state store name */
      +        .withValueSerde(Serdes.Long()); /* serde for aggregate value */
      +
      +// Aggregating a KGroupedTable (note how the value type changes from String to Long)
      +KTable<byte[], Long> aggregatedTable = groupedTable.aggregate(
      +    () -> 0L, /* initializer */
      +    (aggKey, newValue, aggValue) -> aggValue + newValue.length(), /* adder */
      +    (aggKey, oldValue, aggValue) -> aggValue - oldValue.length(), /* subtractor */
      +    Materialized.as("aggregated-table-store") /* state store name */
      +	.withValueSerde(Serdes.Long()) /* serde for aggregate value */
      +
      +
      +// Java 7 examples
      +
      +// Aggregating a KGroupedStream (note how the value type changes from String to Long)
      +KTable<byte[], Long> aggregatedStream = groupedStream.aggregate(
      +    new Initializer<Long>() { /* initializer */
      +      @Override
      +      public Long apply() {
      +        return 0L;
      +      }
      +    },
      +    new Aggregator<byte[], String, Long>() { /* adder */
      +      @Override
      +      public Long apply(byte[] aggKey, String newValue, Long aggValue) {
      +        return aggValue + newValue.length();
      +      }
      +    },
      +    Materialized.as("aggregated-stream-store")
      +        .withValueSerde(Serdes.Long());
      +
      +// Aggregating a KGroupedTable (note how the value type changes from String to Long)
      +KTable<byte[], Long> aggregatedTable = groupedTable.aggregate(
      +    new Initializer<Long>() { /* initializer */
      +      @Override
      +      public Long apply() {
      +        return 0L;
      +      }
      +    },
      +    new Aggregator<byte[], String, Long>() { /* adder */
      +      @Override
      +      public Long apply(byte[] aggKey, String newValue, Long aggValue) {
      +        return aggValue + newValue.length();
      +      }
      +    },
      +    new Aggregator<byte[], String, Long>() { /* subtractor */
      +      @Override
      +      public Long apply(byte[] aggKey, String oldValue, Long aggValue) {
      +        return aggValue - oldValue.length();
      +      }
      +    },
      +    Materialized.as("aggregated-stream-store")
      +        .withValueSerde(Serdes.Long());

      Detailed behavior of KGroupedStream:

      Filter

      @@ -399,21 +395,20 @@

      Evaluates a boolean function for each element and retains those for which the function returns true. (KStream details, KTable details)

      -
      KStream<String, Long> stream = ...;
      -
      -// A filter that selects (keeps) only positive numbers
      -// Java 8+ example, using lambda expressions
      -KStream<String, Long> onlyPositives = stream.filter((key, value) -> value > 0);
      -
      -// Java 7 example
      -KStream<String, Long> onlyPositives = stream.filter(
      -    new Predicate<String, Long>() {
      -      @Override
      -      public boolean test(String key, Long value) {
      -        return value > 0;
      -      }
      -    });
      -
      +
      KStream<String, Long> stream = ...;
      +
      +// A filter that selects (keeps) only positive numbers
      +// Java 8+ example, using lambda expressions
      +KStream<String, Long> onlyPositives = stream.filter((key, value) -> value > 0);
      +
      +// Java 7 example
      +KStream<String, Long> onlyPositives = stream.filter(
      +    new Predicate<String, Long>() {
      +      @Override
      +      public boolean test(String key, Long value) {
      +        return value > 0;
      +      }
      +    });

      Inverse Filter

      @@ -425,21 +420,20 @@

      Evaluates a boolean function for each element and drops those for which the function returns true. (KStream details, KTable details)

      -
      KStream<String, Long> stream = ...;
      -
      -// An inverse filter that discards any negative numbers or zero
      -// Java 8+ example, using lambda expressions
      -KStream<String, Long> onlyPositives = stream.filterNot((key, value) -> value <= 0);
      -
      -// Java 7 example
      -KStream<String, Long> onlyPositives = stream.filterNot(
      -    new Predicate<String, Long>() {
      -      @Override
      -      public boolean test(String key, Long value) {
      -        return value <= 0;
      -      }
      -    });
      -
      +
      KStream<String, Long> stream = ...;
      +
      +// An inverse filter that discards any negative numbers or zero
      +// Java 8+ example, using lambda expressions
      +KStream<String, Long> onlyPositives = stream.filterNot((key, value) -> value <= 0);
      +
      +// Java 7 example
      +KStream<String, Long> onlyPositives = stream.filterNot(
      +    new Predicate<String, Long>() {
      +      @Override
      +      public boolean test(String key, Long value) {
      +        return value <= 0;
      +      }
      +    });

      FlatMap

      @@ -453,21 +447,20 @@

      flatMap will result in re-partitioning of the records. If possible use flatMapValues instead, which will not cause data re-partitioning.

      -
      KStream<Long, String> stream = ...;
      -KStream<String, Integer> transformed = stream.flatMap(
      -     // Here, we generate two output records for each input record.
      -     // We also change the key and value types.
      -     // Example: (345L, "Hello") -> ("HELLO", 1000), ("hello", 9000)
      -    (key, value) -> {
      -      List<KeyValue<String, Integer>> result = new LinkedList<>();
      -      result.add(KeyValue.pair(value.toUpperCase(), 1000));
      -      result.add(KeyValue.pair(value.toLowerCase(), 9000));
      -      return result;
      -    }
      -  );
      -
      -// Java 7 example: cf. `map` for how to create `KeyValueMapper` instances
      -
      +
      KStream<Long, String> stream = ...;
      +KStream<String, Integer> transformed = stream.flatMap(
      +     // Here, we generate two output records for each input record.
      +     // We also change the key and value types.
      +     // Example: (345L, "Hello") -> ("HELLO", 1000), ("hello", 9000)
      +    (key, value) -> {
      +      List<KeyValue<String, Integer>> result = new LinkedList<>();
      +      result.add(KeyValue.pair(value.toUpperCase(), 1000));
      +      result.add(KeyValue.pair(value.toLowerCase(), 9000));
      +      return result;
      +    }
      +  );
      +
      +// Java 7 example: cf. `map` for how to create `KeyValueMapper` instances

      FlatMap (values only)

      @@ -480,12 +473,11 @@

      details)

      flatMapValues is preferable to flatMap because it will not cause data re-partitioning. However, you cannot modify the key or key type like flatMap does.

      -
      // Split a sentence into words.
      -KStream<byte[], String> sentences = ...;
      -KStream<byte[], String> words = sentences.flatMapValues(value -> Arrays.asList(value.split("\\s+")));
      +                            
      // Split a sentence into words.
      +KStream<byte[], String> sentences = ...;
      +KStream<byte[], String> words = sentences.flatMapValues(value -> Arrays.asList(value.split("\\s+")));
       
      -// Java 7 example: cf. `mapValues` for how to create `ValueMapper` instances
      -
      +// Java 7 example: cf. `mapValues` for how to create `ValueMapper` instances

      Foreach

      @@ -501,21 +493,20 @@

      peek, which is not a terminal operation).

      Note on processing guarantees: Any side effects of an action (such as writing to external systems) are not trackable by Kafka, which means they will typically not benefit from Kafka’s processing guarantees.

      -
      KStream<String, Long> stream = ...;
      -
      -// Print the contents of the KStream to the local console.
      -// Java 8+ example, using lambda expressions
      -stream.foreach((key, value) -> System.out.println(key + " => " + value));
      -
      -// Java 7 example
      -stream.foreach(
      -    new ForeachAction<String, Long>() {
      -      @Override
      -      public void apply(String key, Long value) {
      -        System.out.println(key + " => " + value);
      -      }
      -    });
      -
      +
      KStream<String, Long> stream = ...;
      +
      +// Print the contents of the KStream to the local console.
      +// Java 8+ example, using lambda expressions
      +stream.foreach((key, value) -> System.out.println(key + " => " + value));
      +
      +// Java 7 example
      +stream.foreach(
      +    new ForeachAction<String, Long>() {
      +      @Override
      +      public void apply(String key, Long value) {
      +        System.out.println(key + " => " + value);
      +      }
      +    });

      GroupByKey

      @@ -543,20 +534,19 @@

      groupByKey is preferable to groupBy because it re-partitions data only if the stream was already marked for re-partitioning. However, groupByKey does not allow you to modify the key or key type like groupBy does.

      -
      KStream<byte[], String> stream = ...;
      -
      -// Group by the existing key, using the application's configured
      -// default serdes for keys and values.
      -KGroupedStream<byte[], String> groupedStream = stream.groupByKey();
      -
      -// When the key and/or value types do not match the configured
      -// default serdes, we must explicitly specify serdes.
      -KGroupedStream<byte[], String> groupedStream = stream.groupByKey(
      -    Grouped.with(
      -      Serdes.ByteArray(), /* key */
      -      Serdes.String())     /* value */
      -  );
      -
      +
      KStream<byte[], String> stream = ...;
      +
      +// Group by the existing key, using the application's configured
      +// default serdes for keys and values.
      +KGroupedStream<byte[], String> groupedStream = stream.groupByKey();
      +
      +// When the key and/or value types do not match the configured
      +// default serdes, we must explicitly specify serdes.
      +KGroupedStream<byte[], String> groupedStream = stream.groupByKey(
      +    Grouped.with(
      +      Serdes.ByteArray(), /* key */
      +      Serdes.String())     /* value */
      +  );

      GroupBy

      @@ -586,56 +576,55 @@

      groupBy always causes data re-partitioning. If possible use groupByKey instead, which will re-partition data only if required.

      -
      KStream<byte[], String> stream = ...;
      -KTable<byte[], String> table = ...;
      -
      -// Java 8+ examples, using lambda expressions
      -
      -// Group the stream by a new key and key type
      -KGroupedStream<String, String> groupedStream = stream.groupBy(
      -    (key, value) -> value,
      -    Grouped.with(
      -      Serdes.String(), /* key (note: type was modified) */
      -      Serdes.String())  /* value */
      -  );
      -
      -// Group the table by a new key and key type, and also modify the value and value type.
      -KGroupedTable<String, Integer> groupedTable = table.groupBy(
      -    (key, value) -> KeyValue.pair(value, value.length()),
      -    Grouped.with(
      -      Serdes.String(), /* key (note: type was modified) */
      -      Serdes.Integer()) /* value (note: type was modified) */
      -  );
      -
      -
      -// Java 7 examples
      -
      -// Group the stream by a new key and key type
      -KGroupedStream<String, String> groupedStream = stream.groupBy(
      -    new KeyValueMapper<byte[], String, String>>() {
      -      @Override
      -      public String apply(byte[] key, String value) {
      -        return value;
      -      }
      -    },
      -    Grouped.with(
      -      Serdes.String(), /* key (note: type was modified) */
      -      Serdes.String())  /* value */
      -  );
      -
      -// Group the table by a new key and key type, and also modify the value and value type.
      -KGroupedTable<String, Integer> groupedTable = table.groupBy(
      -    new KeyValueMapper<byte[], String, KeyValue<String, Integer>>() {
      -      @Override
      -      public KeyValue<String, Integer> apply(byte[] key, String value) {
      -        return KeyValue.pair(value, value.length());
      -      }
      -    },
      -    Grouped.with(
      -      Serdes.String(), /* key (note: type was modified) */
      -      Serdes.Integer()) /* value (note: type was modified) */
      -  );
      -
      +
      KStream<byte[], String> stream = ...;
      +KTable<byte[], String> table = ...;
      +
      +// Java 8+ examples, using lambda expressions
      +
      +// Group the stream by a new key and key type
      +KGroupedStream<String, String> groupedStream = stream.groupBy(
      +    (key, value) -> value,
      +    Grouped.with(
      +      Serdes.String(), /* key (note: type was modified) */
      +      Serdes.String())  /* value */
      +  );
      +
      +// Group the table by a new key and key type, and also modify the value and value type.
      +KGroupedTable<String, Integer> groupedTable = table.groupBy(
      +    (key, value) -> KeyValue.pair(value, value.length()),
      +    Grouped.with(
      +      Serdes.String(), /* key (note: type was modified) */
      +      Serdes.Integer()) /* value (note: type was modified) */
      +  );
      +
      +
      +// Java 7 examples
      +
      +// Group the stream by a new key and key type
      +KGroupedStream<String, String> groupedStream = stream.groupBy(
      +    new KeyValueMapper<byte[], String, String>>() {
      +      @Override
      +      public String apply(byte[] key, String value) {
      +        return value;
      +      }
      +    },
      +    Grouped.with(
      +      Serdes.String(), /* key (note: type was modified) */
      +      Serdes.String())  /* value */
      +  );
      +
      +// Group the table by a new key and key type, and also modify the value and value type.
      +KGroupedTable<String, Integer> groupedTable = table.groupBy(
      +    new KeyValueMapper<byte[], String, KeyValue<String, Integer>>() {
      +      @Override
      +      public KeyValue<String, Integer> apply(byte[] key, String value) {
      +        return KeyValue.pair(value, value.length());
      +      }
      +    },
      +    Grouped.with(
      +      Serdes.String(), /* key (note: type was modified) */
      +      Serdes.Integer()) /* value (note: type was modified) */
      +  );

      Cogroup

      @@ -650,19 +639,18 @@

      windowed before it is aggregated.

      Cogroup does not cause a repartition as it has the prerequisite that the input streams are grouped. In the process of creating these groups they will have already been repartitioned if the stream was already marked for repartitioning.

      -
      KStream<byte[], String> stream = ...;
      -                        KStream<byte[], String> stream2 = ...;
      +                            
      KStream<byte[], String> stream = ...;
      +                        KStream<byte[], String> stream2 = ...;
       
      -// Group by the existing key, using the application's configured
      -// default serdes for keys and values.
      -KGroupedStream<byte[], String> groupedStream = stream.groupByKey();
      -KGroupedStream<byte[], String> groupedStream2 = stream2.groupByKey();
      -CogroupedKStream<byte[], String> cogroupedStream = groupedStream.cogroup(aggregator1).cogroup(groupedStream2, aggregator2);
      +// Group by the existing key, using the application's configured
      +// default serdes for keys and values.
      +KGroupedStream<byte[], String> groupedStream = stream.groupByKey();
      +KGroupedStream<byte[], String> groupedStream2 = stream2.groupByKey();
      +CogroupedKStream<byte[], String> cogroupedStream = groupedStream.cogroup(aggregator1).cogroup(groupedStream2, aggregator2);
       
      -KTable<byte[], String> table = cogroupedStream.aggregate(initializer);
      +KTable<byte[], String> table = cogroupedStream.aggregate(initializer);
       
      -KTable<byte[], String> table2 = cogroupedStream.windowedBy(TimeWindows.duration(500ms)).aggregate(initializer);
      -
      +KTable<byte[], String> table2 = cogroupedStream.windowedBy(TimeWindows.duration(500ms)).aggregate(initializer);

      Map

      @@ -675,23 +663,22 @@

      map will result in re-partitioning of the records. If possible use mapValues instead, which will not cause data re-partitioning.

      -
      KStream<byte[], String> stream = ...;
      -
      -// Java 8+ example, using lambda expressions
      -// Note how we change the key and the key type (similar to `selectKey`)
      -// as well as the value and the value type.
      -KStream<String, Integer> transformed = stream.map(
      -    (key, value) -> KeyValue.pair(value.toLowerCase(), value.length()));
      -
      -// Java 7 example
      -KStream<String, Integer> transformed = stream.map(
      -    new KeyValueMapper<byte[], String, KeyValue<String, Integer>>() {
      -      @Override
      -      public KeyValue<String, Integer> apply(byte[] key, String value) {
      -        return new KeyValue<>(value.toLowerCase(), value.length());
      -      }
      -    });
      -
      +
      KStream<byte[], String> stream = ...;
      +
      +// Java 8+ example, using lambda expressions
      +// Note how we change the key and the key type (similar to `selectKey`)
      +// as well as the value and the value type.
      +KStream<String, Integer> transformed = stream.map(
      +    (key, value) -> KeyValue.pair(value.toLowerCase(), value.length()));
      +
      +// Java 7 example
      +KStream<String, Integer> transformed = stream.map(
      +    new KeyValueMapper<byte[], String, KeyValue<String, Integer>>() {
      +      @Override
      +      public KeyValue<String, Integer> apply(byte[] key, String value) {
      +        return new KeyValue<>(value.toLowerCase(), value.length());
      +      }
      +    });

      Map (values only)

      @@ -706,20 +693,19 @@

      KTable details)

      mapValues is preferable to map because it will not cause data re-partitioning. However, it does not allow you to modify the key or key type like map does.

      -
      KStream<byte[], String> stream = ...;
      -
      -// Java 8+ example, using lambda expressions
      -KStream<byte[], String> uppercased = stream.mapValues(value -> value.toUpperCase());
      -
      -// Java 7 example
      -KStream<byte[], String> uppercased = stream.mapValues(
      -    new ValueMapper<String>() {
      -      @Override
      -      public String apply(String s) {
      -        return s.toUpperCase();
      -      }
      -    });
      -
      +
      KStream<byte[], String> stream = ...;
      +
      +// Java 8+ example, using lambda expressions
      +KStream<byte[], String> uppercased = stream.mapValues(value -> value.toUpperCase());
      +
      +// Java 7 example
      +KStream<byte[], String> uppercased = stream.mapValues(
      +    new ValueMapper<String>() {
      +      @Override
      +      public String apply(String s) {
      +        return s.toUpperCase();
      +      }
      +    });

      Merge

      @@ -732,15 +718,11 @@

      details)

      There is no ordering guarantee between records from different streams in the merged stream. Relative order is preserved within each input stream though (ie, records within the same input stream are processed in order)

      -
      -
      -
      KStream<byte[], String> stream1 = ...;
      +                            
      KStream<byte[], String> stream1 = ...;
       
      -KStream<byte[], String> stream2 = ...;
      +KStream<byte[], String> stream2 = ...;
       
      -KStream<byte[], String> merged = stream1.merge(stream2);
      -
      -
      +KStream<byte[], String> merged = stream1.merge(stream2);

      Peek

      @@ -756,21 +738,20 @@

      peek is helpful for use cases such as logging or tracking metrics or for debugging and troubleshooting.

      Note on processing guarantees: Any side effects of an action (such as writing to external systems) are not trackable by Kafka, which means they will typically not benefit from Kafka’s processing guarantees.

      -
      KStream<byte[], String> stream = ...;
      -
      -// Java 8+ example, using lambda expressions
      -KStream<byte[], String> unmodifiedStream = stream.peek(
      -    (key, value) -> System.out.println("key=" + key + ", value=" + value));
      -
      -// Java 7 example
      -KStream<byte[], String> unmodifiedStream = stream.peek(
      -    new ForeachAction<byte[], String>() {
      -      @Override
      -      public void apply(byte[] key, String value) {
      -        System.out.println("key=" + key + ", value=" + value);
      -      }
      -    });
      -
      +
      KStream<byte[], String> stream = ...;
      +
      +// Java 8+ example, using lambda expressions
      +KStream<byte[], String> unmodifiedStream = stream.peek(
      +    (key, value) -> System.out.println("key=" + key + ", value=" + value));
      +
      +// Java 7 example
      +KStream<byte[], String> unmodifiedStream = stream.peek(
      +    new ForeachAction<byte[], String>() {
      +      @Override
      +      public void apply(byte[] key, String value) {
      +        System.out.println("key=" + key + ", value=" + value);
      +      }
      +    });

      Print

      @@ -783,13 +764,12 @@

      details)

      Calling print() is the same as calling foreach((key, value) -> System.out.println(key + ", " + value))

      print is mainly for debugging/testing purposes, and it will try to flush on each record print. Hence it should not be used for production usage if performance requirements are concerned.

      -
      KStream<byte[], String> stream = ...;
      -// print to sysout
      -stream.print();
      +                            
      KStream<byte[], String> stream = ...;
      +// print to sysout
      +stream.print();
       
      -// print to file with a custom label
      -stream.print(Printed.toFile("streams.out").withLabel("streams"));
      -
      +// print to file with a custom label +stream.print(Printed.toFile("streams.out").withLabel("streams"));

      SelectKey

      @@ -802,21 +782,20 @@

      selectKey(mapper) is the same as calling map((key, value) -> mapper(key, value), value).

      Marks the stream for data re-partitioning: Applying a grouping or a join after selectKey will result in re-partitioning of the records.

      -
      KStream<byte[], String> stream = ...;
      -
      -// Derive a new record key from the record's value.  Note how the key type changes, too.
      -// Java 8+ example, using lambda expressions
      -KStream<String, String> rekeyed = stream.selectKey((key, value) -> value.split(" ")[0])
      -
      -// Java 7 example
      -KStream<String, String> rekeyed = stream.selectKey(
      -    new KeyValueMapper<byte[], String, String>() {
      -      @Override
      -      public String apply(byte[] key, String value) {
      -        return value.split(" ")[0];
      -      }
      -    });
      -
      +
      KStream<byte[], String> stream = ...;
      +
      +// Derive a new record key from the record's value.  Note how the key type changes, too.
      +// Java 8+ example, using lambda expressions
      +KStream<String, String> rekeyed = stream.selectKey((key, value) -> value.split(" ")[0])
      +
      +// Java 7 example
      +KStream<String, String> rekeyed = stream.selectKey(
      +    new KeyValueMapper<byte[], String, String>() {
      +      @Override
      +      public String apply(byte[] key, String value) {
      +        return value.split(" ")[0];
      +      }
      +    });

      Table to Stream

      @@ -826,12 +805,11 @@

      Get the changelog stream of this table. (details)

      -
      KTable<byte[], String> table = ...;
      +                            
      KTable<byte[], String> table = ...;
       
      -// Also, a variant of `toStream` exists that allows you
      -// to select a new key for the resulting stream.
      -KStream<byte[], String> stream = table.toStream();
      -
      +// Also, a variant of `toStream` exists that allows you +// to select a new key for the resulting stream. +KStream<byte[], String> stream = table.toStream();

      Stream to Table

      @@ -841,10 +819,9 @@

      Convert an event stream into a table, or say a changelog stream. (details)

      -
      KStream<byte[], String> stream = ...;
      +                            
      KStream<byte[], String> stream = ...;
       
      -KTable<byte[], String> table = stream.toTable();
      -
      +KTable<byte[], String> table = stream.toTable();

      @@ -1827,35 +1792,34 @@

      co-partitioned.

      Causes data re-partitioning of a stream if and only if the stream was marked for re-partitioning (if both are marked, both are re-partitioned).

      Several variants of join exists, see the Javadocs for details.

      -
      import java.time.Duration;
      -KStream<String, Long> left = ...;
      -KStream<String, Double> right = ...;
      -
      -// Java 8+ example, using lambda expressions
      -KStream<String, String> joined = left.join(right,
      -    (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue, /* ValueJoiner */
      -    JoinWindows.of(Duration.ofMinutes(5)),
      -    Joined.with(
      -      Serdes.String(), /* key */
      -      Serdes.Long(),   /* left value */
      -      Serdes.Double())  /* right value */
      -  );
      -
      -// Java 7 example
      -KStream<String, String> joined = left.join(right,
      -    new ValueJoiner<Long, Double, String>() {
      -      @Override
      -      public String apply(Long leftValue, Double rightValue) {
      -        return "left=" + leftValue + ", right=" + rightValue;
      -      }
      -    },
      -    JoinWindows.of(Duration.ofMinutes(5)),
      -    Joined.with(
      -      Serdes.String(), /* key */
      -      Serdes.Long(),   /* left value */
      -      Serdes.Double())  /* right value */
      -  );
      -
      +
      import java.time.Duration;
      +KStream<String, Long> left = ...;
      +KStream<String, Double> right = ...;
      +
      +// Java 8+ example, using lambda expressions
      +KStream<String, String> joined = left.join(right,
      +    (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue, /* ValueJoiner */
      +    JoinWindows.of(Duration.ofMinutes(5)),
      +    Joined.with(
      +      Serdes.String(), /* key */
      +      Serdes.Long(),   /* left value */
      +      Serdes.Double())  /* right value */
      +  );
      +
      +// Java 7 example
      +KStream<String, String> joined = left.join(right,
      +    new ValueJoiner<Long, Double, String>() {
      +      @Override
      +      public String apply(Long leftValue, Double rightValue) {
      +        return "left=" + leftValue + ", right=" + rightValue;
      +      }
      +    },
      +    JoinWindows.of(Duration.ofMinutes(5)),
      +    Joined.with(
      +      Serdes.String(), /* key */
      +      Serdes.Long(),   /* left value */
      +      Serdes.Double())  /* right value */
      +  );

      Detailed behavior:

      • The join is key-based, i.e. with the join predicate leftRecord.key == rightRecord.key, and window-based, i.e. two input records are joined if and only if their @@ -1885,35 +1849,34 @@

        co-partitioned.

        Causes data re-partitioning of a stream if and only if the stream was marked for re-partitioning (if both are marked, both are re-partitioned).

        Several variants of leftJoin exists, see the Javadocs for details.

        -
        import java.time.Duration;
        -KStream<String, Long> left = ...;
        -KStream<String, Double> right = ...;
        -
        -// Java 8+ example, using lambda expressions
        -KStream<String, String> joined = left.leftJoin(right,
        -    (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue, /* ValueJoiner */
        -    JoinWindows.of(Duration.ofMinutes(5)),
        -    Joined.with(
        -      Serdes.String(), /* key */
        -      Serdes.Long(),   /* left value */
        -      Serdes.Double())  /* right value */
        -  );
        -
        -// Java 7 example
        -KStream<String, String> joined = left.leftJoin(right,
        -    new ValueJoiner<Long, Double, String>() {
        -      @Override
        -      public String apply(Long leftValue, Double rightValue) {
        -        return "left=" + leftValue + ", right=" + rightValue;
        -      }
        -    },
        -    JoinWindows.of(Duration.ofMinutes(5)),
        -    Joined.with(
        -      Serdes.String(), /* key */
        -      Serdes.Long(),   /* left value */
        -      Serdes.Double())  /* right value */
        -  );
        -
        +
        import java.time.Duration;
        +KStream<String, Long> left = ...;
        +KStream<String, Double> right = ...;
        +
        +// Java 8+ example, using lambda expressions
        +KStream<String, String> joined = left.leftJoin(right,
        +    (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue, /* ValueJoiner */
        +    JoinWindows.of(Duration.ofMinutes(5)),
        +    Joined.with(
        +      Serdes.String(), /* key */
        +      Serdes.Long(),   /* left value */
        +      Serdes.Double())  /* right value */
        +  );
        +
        +// Java 7 example
        +KStream<String, String> joined = left.leftJoin(right,
        +    new ValueJoiner<Long, Double, String>() {
        +      @Override
        +      public String apply(Long leftValue, Double rightValue) {
        +        return "left=" + leftValue + ", right=" + rightValue;
        +      }
        +    },
        +    JoinWindows.of(Duration.ofMinutes(5)),
        +    Joined.with(
        +      Serdes.String(), /* key */
        +      Serdes.Long(),   /* left value */
        +      Serdes.Double())  /* right value */
        +  );

        Detailed behavior:

        • The join is key-based, i.e. with the join predicate leftRecord.key == rightRecord.key, and window-based, i.e. two input records are joined if and only if their @@ -1946,35 +1909,34 @@

          co-partitioned.

          Causes data re-partitioning of a stream if and only if the stream was marked for re-partitioning (if both are marked, both are re-partitioned).

          Several variants of outerJoin exists, see the Javadocs for details.

          -
          import java.time.Duration;
          -KStream<String, Long> left = ...;
          -KStream<String, Double> right = ...;
          -
          -// Java 8+ example, using lambda expressions
          -KStream<String, String> joined = left.outerJoin(right,
          -    (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue, /* ValueJoiner */
          -    JoinWindows.of(Duration.ofMinutes(5)),
          -    Joined.with(
          -      Serdes.String(), /* key */
          -      Serdes.Long(),   /* left value */
          -      Serdes.Double())  /* right value */
          -  );
          -
          -// Java 7 example
          -KStream<String, String> joined = left.outerJoin(right,
          -    new ValueJoiner<Long, Double, String>() {
          -      @Override
          -      public String apply(Long leftValue, Double rightValue) {
          -        return "left=" + leftValue + ", right=" + rightValue;
          -      }
          -    },
          -    JoinWindows.of(Duration.ofMinutes(5)),
          -    Joined.with(
          -      Serdes.String(), /* key */
          -      Serdes.Long(),   /* left value */
          -      Serdes.Double())  /* right value */
          -  );
          -
          +
          import java.time.Duration;
          +KStream<String, Long> left = ...;
          +KStream<String, Double> right = ...;
          +
          +// Java 8+ example, using lambda expressions
          +KStream<String, String> joined = left.outerJoin(right,
          +    (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue, /* ValueJoiner */
          +    JoinWindows.of(Duration.ofMinutes(5)),
          +    Joined.with(
          +      Serdes.String(), /* key */
          +      Serdes.Long(),   /* left value */
          +      Serdes.Double())  /* right value */
          +  );
          +
          +// Java 7 example
          +KStream<String, String> joined = left.outerJoin(right,
          +    new ValueJoiner<Long, Double, String>() {
          +      @Override
          +      public String apply(Long leftValue, Double rightValue) {
          +        return "left=" + leftValue + ", right=" + rightValue;
          +      }
          +    },
          +    JoinWindows.of(Duration.ofMinutes(5)),
          +    Joined.with(
          +      Serdes.String(), /* key */
          +      Serdes.Long(),   /* left value */
          +      Serdes.Double())  /* right value */
          +  );

          Detailed behavior:

          • The join is key-based, i.e. with the join predicate leftRecord.key == rightRecord.key, and window-based, i.e. two input records are joined if and only if their @@ -2131,15 +2093,14 @@

            table duals. The join result is a new KTable that represents the changelog stream of the join operation.

            Join output records are effectively created as follows, leveraging the user-supplied ValueJoiner:

            -
            KeyValue<K, LV> leftRecord = ...;
            -KeyValue<K, RV> rightRecord = ...;
            -ValueJoiner<LV, RV, JV> joiner = ...;
            -
            -KeyValue<K, JV> joinOutputRecord = KeyValue.pair(
            -    leftRecord.key, /* by definition, leftRecord.key == rightRecord.key */
            -    joiner.apply(leftRecord.value, rightRecord.value)
            -  );
            -
            +
            KeyValue<K, LV> leftRecord = ...;
            +KeyValue<K, RV> rightRecord = ...;
            +ValueJoiner<LV, RV, JV> joiner = ...;
            +
            +KeyValue<K, JV> joinOutputRecord = KeyValue.pair(
            +    leftRecord.key, /* by definition, leftRecord.key == rightRecord.key */
            +    joiner.apply(leftRecord.value, rightRecord.value)
            +  );

      @@ -2161,23 +2122,22 @@

      (details)

      Data must be co-partitioned: The input data for both sides must be co-partitioned.

      -
      KTable<String, Long> left = ...;
      -KTable<String, Double> right = ...;
      -
      -// Java 8+ example, using lambda expressions
      -KTable<String, String> joined = left.join(right,
      -    (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue /* ValueJoiner */
      -  );
      -
      -// Java 7 example
      -KTable<String, String> joined = left.join(right,
      -    new ValueJoiner<Long, Double, String>() {
      -      @Override
      -      public String apply(Long leftValue, Double rightValue) {
      -        return "left=" + leftValue + ", right=" + rightValue;
      -      }
      -    });
      -
      +
      KTable<String, Long> left = ...;
      +KTable<String, Double> right = ...;
      +
      +// Java 8+ example, using lambda expressions
      +KTable<String, String> joined = left.join(right,
      +    (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue /* ValueJoiner */
      +  );
      +
      +// Java 7 example
      +KTable<String, String> joined = left.join(right,
      +    new ValueJoiner<Long, Double, String>() {
      +      @Override
      +      public String apply(Long leftValue, Double rightValue) {
      +        return "left=" + leftValue + ", right=" + rightValue;
      +      }
      +    });

      Detailed behavior:

      @@ -2770,28 +2727,27 @@
      KTable-KTable Foreign-Key

      Data must be co-partitioned: The input data for both sides must be co-partitioned.

      Causes data re-partitioning of the stream if and only if the stream was marked for re-partitioning.

      Several variants of join exists, see the Javadocs for details.

      -
      KStream<String, Long> left = ...;
      -KTable<String, Double> right = ...;
      -
      -// Java 8+ example, using lambda expressions
      -KStream<String, String> joined = left.join(right,
      -    (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue, /* ValueJoiner */
      -    Joined.keySerde(Serdes.String()) /* key */
      -      .withValueSerde(Serdes.Long()) /* left value */
      -  );
      -
      -// Java 7 example
      -KStream<String, String> joined = left.join(right,
      -    new ValueJoiner<Long, Double, String>() {
      -      @Override
      -      public String apply(Long leftValue, Double rightValue) {
      -        return "left=" + leftValue + ", right=" + rightValue;
      -      }
      -    },
      -    Joined.keySerde(Serdes.String()) /* key */
      -      .withValueSerde(Serdes.Long()) /* left value */
      -  );
      -
      +
      KStream<String, Long> left = ...;
      +KTable<String, Double> right = ...;
      +
      +// Java 8+ example, using lambda expressions
      +KStream<String, String> joined = left.join(right,
      +    (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue, /* ValueJoiner */
      +    Joined.keySerde(Serdes.String()) /* key */
      +      .withValueSerde(Serdes.Long()) /* left value */
      +  );
      +
      +// Java 7 example
      +KStream<String, String> joined = left.join(right,
      +    new ValueJoiner<Long, Double, String>() {
      +      @Override
      +      public String apply(Long leftValue, Double rightValue) {
      +        return "left=" + leftValue + ", right=" + rightValue;
      +      }
      +    },
      +    Joined.keySerde(Serdes.String()) /* key */
      +      .withValueSerde(Serdes.Long()) /* left value */
      +  );

      Detailed behavior:

      @@ -3038,30 +2992,29 @@
      KTable-KTable Foreign-Key

      The GlobalKTable is fully bootstrapped upon (re)start of a KafkaStreams instance, which means the table is fully populated with all the data in the underlying topic that is available at the time of the startup. The actual data processing begins only once the bootstrapping has completed.

      Causes data re-partitioning of the stream if and only if the stream was marked for re-partitioning.

      -
      KStream<String, Long> left = ...;
      -GlobalKTable<Integer, Double> right = ...;
      -
      -// Java 8+ example, using lambda expressions
      -KStream<String, String> joined = left.join(right,
      -    (leftKey, leftValue) -> leftKey.length(), /* derive a (potentially) new key by which to lookup against the table */
      -    (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue /* ValueJoiner */
      -  );
      -
      -// Java 7 example
      -KStream<String, String> joined = left.join(right,
      -    new KeyValueMapper<String, Long, Integer>() { /* derive a (potentially) new key by which to lookup against the table */
      -      @Override
      -      public Integer apply(String key, Long value) {
      -        return key.length();
      -      }
      -    },
      -    new ValueJoiner<Long, Double, String>() {
      -      @Override
      -      public String apply(Long leftValue, Double rightValue) {
      -        return "left=" + leftValue + ", right=" + rightValue;
      -      }
      -    });
      -
      +
      KStream<String, Long> left = ...;
      +GlobalKTable<Integer, Double> right = ...;
      +
      +// Java 8+ example, using lambda expressions
      +KStream<String, String> joined = left.join(right,
      +    (leftKey, leftValue) -> leftKey.length(), /* derive a (potentially) new key by which to lookup against the table */
      +    (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue /* ValueJoiner */
      +  );
      +
      +// Java 7 example
      +KStream<String, String> joined = left.join(right,
      +    new KeyValueMapper<String, Long, Integer>() { /* derive a (potentially) new key by which to lookup against the table */
      +      @Override
      +      public Integer apply(String key, Long value) {
      +        return key.length();
      +      }
      +    },
      +    new ValueJoiner<Long, Double, String>() {
      +      @Override
      +      public String apply(Long leftValue, Double rightValue) {
      +        return "left=" + leftValue + ", right=" + rightValue;
      +      }
      +    });

      Detailed behavior:

      The class/interface hierarchy for your custom store might look something like:

      -
      public class MyCustomStore<K,V> implements StateStore, MyWriteableCustomStore<K,V> {
      -  // implementation of the actual store
      -}
      -
      -// Read-write interface for MyCustomStore
      -public interface MyWriteableCustomStore<K,V> extends MyReadableCustomStore<K,V> {
      -  void write(K Key, V value);
      -}
      -
      -// Read-only interface for MyCustomStore
      -public interface MyReadableCustomStore<K,V> {
      -  V read(K key);
      -}
      -
      -public class MyCustomStoreBuilder implements StoreBuilder {
      -  // implementation of the supplier for MyCustomStore
      -}
      -
      +
      public class MyCustomStore<K,V> implements StateStore, MyWriteableCustomStore<K,V> {
      +  // implementation of the actual store
      +}
      +
      +// Read-write interface for MyCustomStore
      +public interface MyWriteableCustomStore<K,V> extends MyReadableCustomStore<K,V> {
      +  void write(K Key, V value);
      +}
      +
      +// Read-only interface for MyCustomStore
      +public interface MyReadableCustomStore<K,V> {
      +  V read(K key);
      +}
      +
      +public class MyCustomStoreBuilder implements StoreBuilder {
      +  // implementation of the supplier for MyCustomStore
      +}

      To make this store queryable you must:

      • Provide an implementation of QueryableStoreType.
      • Provide a wrapper class that has access to all of the underlying instances of the store and is used for querying.

      Here is how to implement QueryableStoreType:

      -
      public class MyCustomStoreType<K,V> implements QueryableStoreType<MyReadableCustomStore<K,V>> {
      +                
      public class MyCustomStoreType<K,V> implements QueryableStoreType<MyReadableCustomStore<K,V>> {
       
      -  // Only accept StateStores that are of type MyCustomStore
      -  public boolean accepts(final StateStore stateStore) {
      -    return stateStore instanceOf MyCustomStore;
      -  }
      +  // Only accept StateStores that are of type MyCustomStore
      +  public boolean accepts(final StateStore stateStore) {
      +    return stateStore instanceOf MyCustomStore;
      +  }
       
      -  public MyReadableCustomStore<K,V> create(final StateStoreProvider storeProvider, final String storeName) {
      -      return new MyCustomStoreTypeWrapper(storeProvider, storeName, this);
      -  }
      +  public MyReadableCustomStore<K,V> create(final StateStoreProvider storeProvider, final String storeName) {
      +      return new MyCustomStoreTypeWrapper(storeProvider, storeName, this);
      +  }
       
      -}
      -
      +}

      A wrapper class is required because each instance of a Kafka Streams application may run multiple stream tasks and manage multiple local instances of a particular state store. The wrapper class hides this complexity and lets you query a “logical” state store by name without having to know about all of the underlying local instances of that state store.

      @@ -279,56 +269,53 @@ StateStoreProvider#stores(String storeName, QueryableStoreType<T> queryableStoreType) returns a List of state stores with the given storeName and of the type as defined by queryableStoreType.

      Here is an example implementation of the wrapper follows (Java 8+):

      -
      // We strongly recommended implementing a read-only interface
      -// to restrict usage of the store to safe read operations!
      -public class MyCustomStoreTypeWrapper<K,V> implements MyReadableCustomStore<K,V> {
      -
      -  private final QueryableStoreType<MyReadableCustomStore<K, V>> customStoreType;
      -  private final String storeName;
      -  private final StateStoreProvider provider;
      -
      -  public CustomStoreTypeWrapper(final StateStoreProvider provider,
      -                              final String storeName,
      -                              final QueryableStoreType<MyReadableCustomStore<K, V>> customStoreType) {
      -
      -    // ... assign fields ...
      -  }
      -
      -  // Implement a safe read method
      -  @Override
      -  public V read(final K key) {
      -    // Get all the stores with storeName and of customStoreType
      -    final List<MyReadableCustomStore<K, V>> stores = provider.getStores(storeName, customStoreType);
      -    // Try and find the value for the given key
      -    final Optional<V> value = stores.stream().filter(store -> store.read(key) != null).findFirst();
      -    // Return the value if it exists
      -    return value.orElse(null);
      -  }
      -
      -}
      -
      +
      // We strongly recommended implementing a read-only interface
      +// to restrict usage of the store to safe read operations!
      +public class MyCustomStoreTypeWrapper<K,V> implements MyReadableCustomStore<K,V> {
      +
      +  private final QueryableStoreType<MyReadableCustomStore<K, V>> customStoreType;
      +  private final String storeName;
      +  private final StateStoreProvider provider;
      +
      +  public CustomStoreTypeWrapper(final StateStoreProvider provider,
      +                              final String storeName,
      +                              final QueryableStoreType<MyReadableCustomStore<K, V>> customStoreType) {
      +
      +    // ... assign fields ...
      +  }
      +
      +  // Implement a safe read method
      +  @Override
      +  public V read(final K key) {
      +    // Get all the stores with storeName and of customStoreType
      +    final List<MyReadableCustomStore<K, V>> stores = provider.getStores(storeName, customStoreType);
      +    // Try and find the value for the given key
      +    final Optional<V> value = stores.stream().filter(store -> store.read(key) != null).findFirst();
      +    // Return the value if it exists
      +    return value.orElse(null);
      +  }
      +
      +}

      You can now find and query your custom store:

      -
      
      -Topology topology = ...;
      -ProcessorSupplier processorSuppler = ...;
      -
      -// Create CustomStoreSupplier for store name the-custom-store
      -MyCustomStoreBuilder customStoreBuilder = new MyCustomStoreBuilder("the-custom-store") //...;
      -// Add the source topic
      -topology.addSource("input", "inputTopic");
      -// Add a custom processor that reads from the source topic
      -topology.addProcessor("the-processor", processorSupplier, "input");
      -// Connect your custom state store to the custom processor above
      -topology.addStateStore(customStoreBuilder, "the-processor");
      -
      -KafkaStreams streams = new KafkaStreams(topology, config);
      -streams.start();
      -
      -// Get access to the custom store
      -MyReadableCustomStore<String,String> store = streams.store("the-custom-store", new MyCustomStoreType<String,String>());
      -// Query the store
      -String value = store.read("key");
      -
      +
      Topology topology = ...;
      +ProcessorSupplier processorSuppler = ...;
      +
      +// Create CustomStoreSupplier for store name the-custom-store
      +MyCustomStoreBuilder customStoreBuilder = new MyCustomStoreBuilder("the-custom-store") //...;
      +// Add the source topic
      +topology.addSource("input", "inputTopic");
      +// Add a custom processor that reads from the source topic
      +topology.addProcessor("the-processor", processorSupplier, "input");
      +// Connect your custom state store to the custom processor above
      +topology.addStateStore(customStoreBuilder, "the-processor");
      +
      +KafkaStreams streams = new KafkaStreams(topology, config);
      +streams.start();
      +
      +// Get access to the custom store
      +MyReadableCustomStore<String,String> store = streams.store("the-custom-store", new MyCustomStoreType<String,String>());
      +// Query the store
      +String value = store.read("key");
      @@ -369,41 +356,39 @@ piggybacking additional inter-application communication that goes beyond interactive queries.

      This example shows how to configure and run a Kafka Streams application that supports the discovery of its state stores.

      -
      Properties props = new Properties();
      -// Set the unique RPC endpoint of this application instance through which it
      -// can be interactively queried.  In a real application, the value would most
      -// probably not be hardcoded but derived dynamically.
      -String rpcEndpoint = "host1:4460";
      -props.put(StreamsConfig.APPLICATION_SERVER_CONFIG, rpcEndpoint);
      -// ... further settings may follow here ...
      -
      -StreamsBuilder builder = new StreamsBuilder();
      -
      -KStream<String, String> textLines = builder.stream(stringSerde, stringSerde, "word-count-input");
      -
      -final KGroupedStream<String, String> groupedByWord = textLines
      -    .flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
      -    .groupBy((key, word) -> word, Grouped.with(stringSerde, stringSerde));
      -
      -// This call to `count()` creates a state store named "word-count".
      -// The state store is discoverable and can be queried interactively.
      -groupedByWord.count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>as("word-count"));
      -
      -// Start an instance of the topology
      -KafkaStreams streams = new KafkaStreams(builder, props);
      -streams.start();
      -
      -// Then, create and start the actual RPC service for remote access to this
      -// application instance's local state stores.
      -//
      -// This service should be started on the same host and port as defined above by
      -// the property `StreamsConfig.APPLICATION_SERVER_CONFIG`.  The example below is
      -// fictitious, but we provide end-to-end demo applications (such as KafkaMusicExample)
      -// that showcase how to implement such a service to get you started.
      -MyRPCService rpcService = ...;
      -rpcService.listenAt(rpcEndpoint);
      -
      - +
      Properties props = new Properties();
      +// Set the unique RPC endpoint of this application instance through which it
      +// can be interactively queried.  In a real application, the value would most
      +// probably not be hardcoded but derived dynamically.
      +String rpcEndpoint = "host1:4460";
      +props.put(StreamsConfig.APPLICATION_SERVER_CONFIG, rpcEndpoint);
      +// ... further settings may follow here ...
      +
      +StreamsBuilder builder = new StreamsBuilder();
      +
      +KStream<String, String> textLines = builder.stream(stringSerde, stringSerde, "word-count-input");
      +
      +final KGroupedStream<String, String> groupedByWord = textLines
      +    .flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
      +    .groupBy((key, word) -> word, Grouped.with(stringSerde, stringSerde));
      +
      +// This call to `count()` creates a state store named "word-count".
      +// The state store is discoverable and can be queried interactively.
      +groupedByWord.count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>as("word-count"));
      +
      +// Start an instance of the topology
      +KafkaStreams streams = new KafkaStreams(builder, props);
      +streams.start();
      +
      +// Then, create and start the actual RPC service for remote access to this
      +// application instance's local state stores.
      +//
      +// This service should be started on the same host and port as defined above by
      +// the property `StreamsConfig.APPLICATION_SERVER_CONFIG`.  The example below is
      +// fictitious, but we provide end-to-end demo applications (such as KafkaMusicExample)
      +// that showcase how to implement such a service to get you started.
      +MyRPCService rpcService = ...;
      +rpcService.listenAt(rpcEndpoint);

      Discovering and accessing application instances and their local state stores

      The following methods return StreamsMetadata objects, which provide meta-information about application instances such as their RPC endpoint and locally available state stores.

      @@ -419,39 +404,38 @@

      For example, we can now find the StreamsMetadata for the state store named “word-count” that we defined in the code example shown in the previous section:

      -
      KafkaStreams streams = ...;
      -// Find all the locations of local instances of the state store named "word-count"
      -Collection<StreamsMetadata> wordCountHosts = streams.allMetadataForStore("word-count");
      -
      -// For illustrative purposes, we assume using an HTTP client to talk to remote app instances.
      -HttpClient http = ...;
      -
      -// Get the word count for word (aka key) 'alice': Approach 1
      -//
      -// We first find the one app instance that manages the count for 'alice' in its local state stores.
      -StreamsMetadata metadata = streams.metadataForKey("word-count", "alice", Serdes.String().serializer());
      -// Then, we query only that single app instance for the latest count of 'alice'.
      -// Note: The RPC URL shown below is fictitious and only serves to illustrate the idea.  Ultimately,
      -// the URL (or, in general, the method of communication) will depend on the RPC layer you opted to
      -// implement.  Again, we provide end-to-end demo applications (such as KafkaMusicExample) that showcase
      -// how to implement such an RPC layer.
      -Long result = http.getLong("http://" + metadata.host() + ":" + metadata.port() + "/word-count/alice");
      -
      -// Get the word count for word (aka key) 'alice': Approach 2
      -//
      -// Alternatively, we could also choose (say) a brute-force approach where we query every app instance
      -// until we find the one that happens to know about 'alice'.
      -Optional<Long> result = streams.allMetadataForStore("word-count")
      -    .stream()
      -    .map(streamsMetadata -> {
      -        // Construct the (fictituous) full endpoint URL to query the current remote application instance
      -        String url = "http://" + streamsMetadata.host() + ":" + streamsMetadata.port() + "/word-count/alice";
      -        // Read and return the count for 'alice', if any.
      -        return http.getLong(url);
      -    })
      -    .filter(s -> s != null)
      -    .findFirst();
      -
      +
      KafkaStreams streams = ...;
      +// Find all the locations of local instances of the state store named "word-count"
      +Collection<StreamsMetadata> wordCountHosts = streams.allMetadataForStore("word-count");
      +
      +// For illustrative purposes, we assume using an HTTP client to talk to remote app instances.
      +HttpClient http = ...;
      +
      +// Get the word count for word (aka key) 'alice': Approach 1
      +//
      +// We first find the one app instance that manages the count for 'alice' in its local state stores.
      +StreamsMetadata metadata = streams.metadataForKey("word-count", "alice", Serdes.String().serializer());
      +// Then, we query only that single app instance for the latest count of 'alice'.
      +// Note: The RPC URL shown below is fictitious and only serves to illustrate the idea.  Ultimately,
      +// the URL (or, in general, the method of communication) will depend on the RPC layer you opted to
      +// implement.  Again, we provide end-to-end demo applications (such as KafkaMusicExample) that showcase
      +// how to implement such an RPC layer.
      +Long result = http.getLong("http://" + metadata.host() + ":" + metadata.port() + "/word-count/alice");
      +
      +// Get the word count for word (aka key) 'alice': Approach 2
      +//
      +// Alternatively, we could also choose (say) a brute-force approach where we query every app instance
      +// until we find the one that happens to know about 'alice'.
      +Optional<Long> result = streams.allMetadataForStore("word-count")
      +    .stream()
      +    .map(streamsMetadata -> {
      +        // Construct the (fictituous) full endpoint URL to query the current remote application instance
      +        String url = "http://" + streamsMetadata.host() + ":" + streamsMetadata.port() + "/word-count/alice";
      +        // Read and return the count for 'alice', if any.
      +        return http.getLong(url);
      +    })
      +    .filter(s -> s != null)
      +    .findFirst();

      At this point the full state of the application is interactively queryable:

      • You can discover the running instances of the application and the state stores they manage locally.
      • diff --git a/docs/streams/developer-guide/memory-mgmt.html b/docs/streams/developer-guide/memory-mgmt.html index 9aa382e089a20..9a39ce1e78a5c 100644 --- a/docs/streams/developer-guide/memory-mgmt.html +++ b/docs/streams/developer-guide/memory-mgmt.html @@ -80,10 +80,9 @@

      The cache size is specified through the cache.max.bytes.buffering parameter, which is a global setting per processing topology:

      -
      // Enable record cache of size 10 MB.
      -Properties props = new Properties();
      -props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 10 * 1024 * 1024L);
      -
      +
      // Enable record cache of size 10 MB.
      +Properties props = new Properties();
      +props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 10 * 1024 * 1024L);

      This parameter controls the number of bytes allocated for caching. Specifically, for a processor topology instance with T threads and C bytes allocated for caching, each thread will have an even C/T bytes to construct its own cache and use as it sees fit among its tasks. This means that there are as many caches as there are threads, but no sharing of @@ -103,27 +102,16 @@

      Here are example settings for both parameters based on desired scenarios.

      • To turn off caching the cache size can be set to zero:

        -
        -
        // Disable record cache
        -Properties props = new Properties();
        -props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
        -
        -

        Turning off caching might result in high write traffic for the underlying RocksDB store. - With default settings caching is enabled within Kafka Streams but RocksDB caching is disabled. - Thus, to avoid high write traffic it is recommended to enable RocksDB caching if Kafka Streams caching is turned off.

        -

        For example, the RocksDB Block Cache could be set to 100MB and Write Buffer size to 32 MB. For more information, see - the RocksDB config.

        -
        +
        // Disable record cache
        +Properties props = new Properties();
        +props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
      • To enable caching but still have an upper bound on how long records will be cached, you can set the commit interval. In this example, it is set to 1000 milliseconds:

        -
        -
        Properties props = new Properties();
        -// Enable record cache of size 10 MB.
        -props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 10 * 1024 * 1024L);
        -// Set commit interval to 1 second.
        -props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
        -
        -
        +
        Properties props = new Properties();
        +// Enable record cache of size 10 MB.
        +props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 10 * 1024 * 1024L);
        +// Set commit interval to 1 second.
        +props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);

      The effect of these two configurations is described in the figure below. The records are shown using 4 keys: blue, red, yellow, and green. Assume the cache has space for only 3 keys.

      @@ -156,13 +144,12 @@

      Following from the example first shown in section State Stores, to disable caching, you can add the withCachingDisabled call (note that caches are enabled by default, however there is an explicit withCachingEnabled call).

      -
      StoreBuilder countStoreBuilder =
      -  Stores.keyValueStoreBuilder(
      -    Stores.persistentKeyValueStore("Counts"),
      -    Serdes.String(),
      -    Serdes.Long())
      -  .withCachingEnabled()
      -
      +
      StoreBuilder countStoreBuilder =
      +  Stores.keyValueStoreBuilder(
      +    Stores.persistentKeyValueStore("Counts"),
      +    Serdes.String(),
      +    Serdes.Long())
      +  .withCachingEnabled();

      RocksDB

      @@ -171,44 +158,42 @@

      RocksDBrocksdb.config.setter configuration.

      Also, we recommend changing RocksDB's default memory allocator, because the default allocator may lead to increased memory consumption. To change the memory allocator to jemalloc, you need to set the environment variable LD_PRELOADbefore you start your Kafka Streams application:

      -
      -# example: install jemalloc (on Debian)
      +      
      # example: install jemalloc (on Debian)
       $ apt install -y libjemalloc-dev
       # set LD_PRELOAD before you start your Kafka Streams application
       $ export LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libjemalloc.so”
      -      
      +

      As of 2.3.0 the memory usage across all instances can be bounded, limiting the total off-heap memory of your Kafka Streams application. To do so you must configure RocksDB to cache the index and filter blocks in the block cache, limit the memtable memory through a shared WriteBufferManager and count its memory against the block cache, and then pass the same Cache object to each instance. See RocksDB Memory Usage for details. An example RocksDBConfigSetter implementing this is shown below:

      +
      public static class BoundedMemoryRocksDBConfig implements RocksDBConfigSetter {
       
      -      
          public static class BoundedMemoryRocksDBConfig implements RocksDBConfigSetter {
      -
      -       private static org.rocksdb.Cache cache = new org.rocksdb.LRUCache(TOTAL_OFF_HEAP_MEMORY, -1, false, INDEX_FILTER_BLOCK_RATIO);1
      -       private static org.rocksdb.WriteBufferManager writeBufferManager = new org.rocksdb.WriteBufferManager(TOTAL_MEMTABLE_MEMORY, cache);
      +   private static org.rocksdb.Cache cache = new org.rocksdb.LRUCache(TOTAL_OFF_HEAP_MEMORY, -1, false, INDEX_FILTER_BLOCK_RATIO);1
      +   private static org.rocksdb.WriteBufferManager writeBufferManager = new org.rocksdb.WriteBufferManager(TOTAL_MEMTABLE_MEMORY, cache);
       
      -       @Override
      -       public void setConfig(final String storeName, final Options options, final Map<String, Object> configs) {
      +   @Override
      +   public void setConfig(final String storeName, final Options options, final Map<String, Object> configs) {
       
      -         BlockBasedTableConfig tableConfig = (BlockBasedTableConfig) options.tableFormatConfig();
      +     BlockBasedTableConfig tableConfig = (BlockBasedTableConfig) options.tableFormatConfig();
       
      -          // These three options in combination will limit the memory used by RocksDB to the size passed to the block cache (TOTAL_OFF_HEAP_MEMORY)
      -         tableConfig.setBlockCache(cache);
      -         tableConfig.setCacheIndexAndFilterBlocks(true);
      -         options.setWriteBufferManager(writeBufferManager);
      +      // These three options in combination will limit the memory used by RocksDB to the size passed to the block cache (TOTAL_OFF_HEAP_MEMORY)
      +     tableConfig.setBlockCache(cache);
      +     tableConfig.setCacheIndexAndFilterBlocks(true);
      +     options.setWriteBufferManager(writeBufferManager);
       
      -          // These options are recommended to be set when bounding the total memory
      -         tableConfig.setCacheIndexAndFilterBlocksWithHighPriority(true);2
      -         tableConfig.setPinTopLevelIndexAndFilter(true);
      -         tableConfig.setBlockSize(BLOCK_SIZE);3
      -         options.setMaxWriteBufferNumber(N_MEMTABLES);
      -         options.setWriteBufferSize(MEMTABLE_SIZE);
      +      // These options are recommended to be set when bounding the total memory
      +     tableConfig.setCacheIndexAndFilterBlocksWithHighPriority(true);2
      +     tableConfig.setPinTopLevelIndexAndFilter(true);
      +     tableConfig.setBlockSize(BLOCK_SIZE);3
      +     options.setMaxWriteBufferNumber(N_MEMTABLES);
      +     options.setWriteBufferSize(MEMTABLE_SIZE);
       
      -         options.setTableFormatConfig(tableConfig);
      -       }
      +     options.setTableFormatConfig(tableConfig);
      +   }
       
      -       @Override
      -       public void close(final String storeName, final Options options) {
      -         // Cache and WriteBufferManager should not be closed here, as the same objects are shared by every store instance.
      -       }
      -    }
      +   @Override
      +   public void close(final String storeName, final Options options) {
      +     // Cache and WriteBufferManager should not be closed here, as the same objects are shared by every store instance.
      +   }
      +}
      1. INDEX_FILTER_BLOCK_RATIO can be used to set a fraction of the block cache to set aside for "high priority" (aka index and filter) blocks, preventing them from being evicted by data blocks. See the full signature of the LRUCache constructor. NOTE: the boolean parameter in the cache constructor lets you control whether the cache should enforce a strict memory limit by failing the read or iteration in the rare cases where it might go larger than its capacity. Due to a diff --git a/docs/streams/developer-guide/processor-api.html b/docs/streams/developer-guide/processor-api.html index 9cabac030b90c..589a3ff0aa40e 100644 --- a/docs/streams/developer-guide/processor-api.html +++ b/docs/streams/developer-guide/processor-api.html @@ -119,47 +119,46 @@

      Overviewprocess() method, upon each received record, split the value string into words, and update their counts into the state store (we will talk about this later in this section).
    1. In the punctuate() method, iterate the local state store and send the aggregated counts to the downstream processor (we will talk about downstream processors later in this section), and commit the current stream state.
    2. -
      public class WordCountProcessor implements Processor<String, String> {
      +            
      public class WordCountProcessor implements Processor<String, String> {
       
      -  private ProcessorContext context;
      -  private KeyValueStore<String, Long> kvStore;
      +  private ProcessorContext context;
      +  private KeyValueStore<String, Long> kvStore;
       
      -  @Override
      -  @SuppressWarnings("unchecked")
      -  public void init(ProcessorContext context) {
      -      // keep the processor context locally because we need it in punctuate() and commit()
      -      this.context = context;
      +  @Override
      +  @SuppressWarnings("unchecked")
      +  public void init(ProcessorContext context) {
      +      // keep the processor context locally because we need it in punctuate() and commit()
      +      this.context = context;
       
      -      // retrieve the key-value store named "Counts"
      -      kvStore = (KeyValueStore) context.getStateStore("Counts");
      +      // retrieve the key-value store named "Counts"
      +      kvStore = (KeyValueStore) context.getStateStore("Counts");
       
      -      // schedule a punctuate() method every second based on stream-time
      -      this.context.schedule(Duration.ofSeconds(1000), PunctuationType.STREAM_TIME, (timestamp) -> {
      -          KeyValueIterator<String, Long> iter = this.kvStore.all();
      -          while (iter.hasNext()) {
      -              KeyValue<String, Long> entry = iter.next();
      -              context.forward(entry.key, entry.value.toString());
      -          }
      -          iter.close();
      +      // schedule a punctuate() method every second based on stream-time
      +      this.context.schedule(Duration.ofSeconds(1000), PunctuationType.STREAM_TIME, (timestamp) -> {
      +          KeyValueIterator<String, Long> iter = this.kvStore.all();
      +          while (iter.hasNext()) {
      +              KeyValue<String, Long> entry = iter.next();
      +              context.forward(entry.key, entry.value.toString());
      +          }
      +          iter.close();
       
      -          // commit the current processing progress
      -          context.commit();
      -      });
      -  }
      +          // commit the current processing progress
      +          context.commit();
      +      });
      +  }
       
      -  @Override
      -  public void punctuate(long timestamp) {
      -      // this method is deprecated and should not be used anymore
      -  }
      +  @Override
      +  public void punctuate(long timestamp) {
      +      // this method is deprecated and should not be used anymore
      +  }
       
      -  @Override
      -  public void close() {
      -      // close any resources managed by this processor
      -      // Note: Do not close any StateStores as these are managed by the library
      -  }
      +  @Override
      +  public void close() {
      +      // close any resources managed by this processor
      +      // Note: Do not close any StateStores as these are managed by the library
      +  }
       
      -}
      -
      +}

      Note

      Stateful processing with state stores: @@ -234,19 +233,18 @@

    3. Use persistentTimestampedWindowStore when you need a persistent windowedKey-(value/timestamp) store.
    4. -
      // Creating a persistent key-value store:
      -// here, we create a `KeyValueStore<String, Long>` named "persistent-counts".
      -import org.apache.kafka.streams.state.StoreBuilder;
      -import org.apache.kafka.streams.state.Stores;
      +                            
      // Creating a persistent key-value store:
      +// here, we create a `KeyValueStore<String, Long>` named "persistent-counts".
      +import org.apache.kafka.streams.state.StoreBuilder;
      +import org.apache.kafka.streams.state.Stores;
       
      -// Using a `KeyValueStoreBuilder` to build a `KeyValueStore`.
      -StoreBuilder<KeyValueStore<String, Long>> countStoreSupplier =
      -  Stores.keyValueStoreBuilder(
      -    Stores.persistentKeyValueStore("persistent-counts"),
      -    Serdes.String(),
      -    Serdes.Long());
      -KeyValueStore<String, Long> countStore = countStoreSupplier.build();
      -
      +// Using a `KeyValueStoreBuilder` to build a `KeyValueStore`. +StoreBuilder<KeyValueStore<String, Long>> countStoreSupplier = + Stores.keyValueStoreBuilder( + Stores.persistentKeyValueStore("persistent-counts"), + Serdes.String(), + Serdes.Long()); +KeyValueStore<String, Long> countStore = countStoreSupplier.build();

      @@ -317,15 +314,14 @@

      of the store through enableLogging() and disableLogging(). You can also fine-tune the associated topic’s configuration if needed.

      Example for disabling fault-tolerance:

      -
      import org.apache.kafka.streams.state.StoreBuilder;
      -import org.apache.kafka.streams.state.Stores;
      +                
      import org.apache.kafka.streams.state.StoreBuilder;
      +import org.apache.kafka.streams.state.Stores;
       
      -StoreBuilder<KeyValueStore<String, Long>> countStoreSupplier = Stores.keyValueStoreBuilder(
      -  Stores.persistentKeyValueStore("Counts"),
      -    Serdes.String(),
      -    Serdes.Long())
      -  .withLoggingDisabled(); // disable backing up the store to a changelog topic
      -
      +StoreBuilder<KeyValueStore<String, Long>> countStoreSupplier = Stores.keyValueStoreBuilder( + Stores.persistentKeyValueStore("Counts"), + Serdes.String(), + Serdes.Long()) + .withLoggingDisabled(); // disable backing up the store to a changelog topic

      Attention

      If the changelog is disabled then the attached state store is no longer fault tolerant and it can’t have any standby replicas.

      @@ -333,19 +329,18 @@

      Here is an example for enabling fault tolerance, with additional changelog-topic configuration: You can add any log config from kafka.log.LogConfig. Unrecognized configs will be ignored.

      -
      import org.apache.kafka.streams.state.StoreBuilder;
      -import org.apache.kafka.streams.state.Stores;
      +                
      import org.apache.kafka.streams.state.StoreBuilder;
      +import org.apache.kafka.streams.state.Stores;
       
      -Map<String, String> changelogConfig = new HashMap();
      -// override min.insync.replicas
      -changelogConfig.put(TopicConfig.MIN_IN_SYNC_REPLICAS_CONFIG, "1")
      +Map<String, String> changelogConfig = new HashMap();
      +// override min.insync.replicas
      +changelogConfig.put(TopicConfig.MIN_IN_SYNC_REPLICAS_CONFIG, "1")
       
      -StoreBuilder<KeyValueStore<String, Long>> countStoreSupplier = Stores.keyValueStoreBuilder(
      -  Stores.persistentKeyValueStore("Counts"),
      -    Serdes.String(),
      -    Serdes.Long())
      -  .withLoggingEnabled(changlogConfig); // enable changelogging, with custom changelog settings
      -
      +StoreBuilder<KeyValueStore<String, Long>> countStoreSupplier = Stores.keyValueStoreBuilder( + Stores.persistentKeyValueStore("Counts"), + Serdes.String(), + Serdes.Long()) + .withLoggingEnabled(changlogConfig); // enable changelogging, with custom changelog settings

      Timestamped State Stores

      @@ -389,12 +384,11 @@

      Accessing Processor Contextpartition, offset, timestamp and headers.

      Here is an example implementation of how to add a new header to the record:

      -
      public void process(String key, String value) {
      +            
      public void process(String key, String value) {
       
      -    // add a header to the elements
      -    context().headers().add.("key", "key"
      -}
      -
      + // add a header to the elements + context().headers().add.("key", "value"); +}

      Connecting Processors and State Stores

      Now that a processor (WordCountProcessor) and the @@ -403,16 +397,16 @@

      Connecting Processors and State Stores

      Here is an example implementation:

      -
                      Topology builder = new Topology();
      -                // add the source processor node that takes Kafka topic "source-topic" as input
      -                builder.addSource("Source", "source-topic")
      -                    // add the WordCountProcessor node which takes the source processor as its upstream processor
      -                    .addProcessor("Process", () -> new WordCountProcessor(), "Source")
      -                    // add the count store associated with the WordCountProcessor processor
      -                    .addStateStore(countStoreBuilder, "Process")
      -                    // add the sink processor node that takes Kafka topic "sink-topic" as output
      -                    // and the WordCountProcessor node as its upstream processor
      -                    .addSink("Sink", "sink-topic", "Process");
      +
      Topology builder = new Topology();
      +// add the source processor node that takes Kafka topic "source-topic" as input
      +builder.addSource("Source", "source-topic")
      +    // add the WordCountProcessor node which takes the source processor as its upstream processor
      +    .addProcessor("Process", () -> new WordCountProcessor(), "Source")
      +    // add the count store associated with the WordCountProcessor processor
      +    .addStateStore(countStoreBuilder, "Process")
      +    // add the sink processor node that takes Kafka topic "sink-topic" as output
      +    // and the WordCountProcessor node as its upstream processor
      +    .addSink("Sink", "sink-topic", "Process");

      Here is a quick explanation of this example:

      • A source processor node named "Source" is added to the topology using the addSource method, with one Kafka topic @@ -429,22 +423,22 @@

        Connecting Processors and State StoresConnectedStoreProvider#stores() on the ProcessorSupplier instead of calling Topology#addStateStore(), like this:

        -
                        Topology builder = new Topology();
        -                // add the source processor node that takes Kafka "source-topic" as input
        -                builder.addSource("Source", "source-topic")
        -                    // add the WordCountProcessor node which takes the source processor as its upstream processor.
        -                    // the ProcessorSupplier provides the count store associated with the WordCountProcessor
        -                    .addProcessor("Process", new ProcessorSupplier<String, String>() {
        -                        public Processor<String, String> get() {
        -                            return new WordCountProcessor();
        -                        }
        -                        public Set<StoreBuilder<?>> stores() {
        -                            return countStoreBuilder;
        -                        }
        -                    }, "Source")
        -                    // add the sink processor node that takes Kafka topic "sink-topic" as output
        -                    // and the WordCountProcessor node as its upstream processor
        -                    .addSink("Sink", "sink-topic", "Process");
        +
        Topology builder = new Topology();
        +// add the source processor node that takes Kafka "source-topic" as input
        +builder.addSource("Source", "source-topic")
        +    // add the WordCountProcessor node which takes the source processor as its upstream processor.
        +    // the ProcessorSupplier provides the count store associated with the WordCountProcessor
        +    .addProcessor("Process", new ProcessorSupplier<String, String>() {
        +        public Processor<String, String> get() {
        +            return new WordCountProcessor();
        +        }
        +        public Set<StoreBuilder<?>> stores() {
        +            return countStoreBuilder;
        +        }
        +    }, "Source")
        +    // add the sink processor node that takes Kafka topic "sink-topic" as output
        +    // and the WordCountProcessor node as its upstream processor
        +    .addSink("Sink", "sink-topic", "Process");

        This allows for a processor to "own" state stores, effectively encapsulating their usage from the user wiring the topology. Multiple processors that share a state store may provide the same store with this technique, as long as the StoreBuilder is the same instance.

        In these topologies, the "Process" stream processor node is considered a downstream processor of the "Source" node, and an diff --git a/docs/streams/developer-guide/running-app.html b/docs/streams/developer-guide/running-app.html index 87ee8f0a9da30..ff3ed75d29010 100644 --- a/docs/streams/developer-guide/running-app.html +++ b/docs/streams/developer-guide/running-app.html @@ -51,10 +51,9 @@

        Starting a Kafka Streams application

        You can package your Java application as a fat JAR file and then start the application like this:

        -
        # Start the application in class `com.example.MyStreamsApp`
        -# from the fat JAR named `path-to-app-fatjar.jar`.
        -$ java -cp path-to-app-fatjar.jar com.example.MyStreamsApp
        -
        +
        # Start the application in class `com.example.MyStreamsApp`
        +# from the fat JAR named `path-to-app-fatjar.jar`.
        +$ java -cp path-to-app-fatjar.jar com.example.MyStreamsApp

        When you start your application you are launching a Kafka Streams instance of your application. You can run multiple instances of your application. A common scenario is that there are multiple instances of your application running in parallel. For more information, see Parallelism Model.

        diff --git a/docs/streams/developer-guide/security.html b/docs/streams/developer-guide/security.html index 05de0794d63a9..63bc942f7f5a0 100644 --- a/docs/streams/developer-guide/security.html +++ b/docs/streams/developer-guide/security.html @@ -98,47 +98,44 @@ then you must also include these SSL certificates in the correct locations within the Docker image.

        The snippet below shows the settings to enable client authentication and SSL encryption for data-in-transit between your Kafka Streams application and the Kafka cluster it is reading and writing from:

        -
        # Essential security settings to enable client authentication and SSL encryption
        -bootstrap.servers=kafka.example.com:9093
        -security.protocol=SSL
        -ssl.truststore.location=/etc/security/tls/kafka.client.truststore.jks
        -ssl.truststore.password=test1234
        -ssl.keystore.location=/etc/security/tls/kafka.client.keystore.jks
        -ssl.keystore.password=test1234
        -ssl.key.password=test1234
        -
        +
        # Essential security settings to enable client authentication and SSL encryption
        +bootstrap.servers=kafka.example.com:9093
        +security.protocol=SSL
        +ssl.truststore.location=/etc/security/tls/kafka.client.truststore.jks
        +ssl.truststore.password=test1234
        +ssl.keystore.location=/etc/security/tls/kafka.client.keystore.jks
        +ssl.keystore.password=test1234
        +ssl.key.password=test1234

        Configure these settings in the application for your Properties instance. These settings will encrypt any data-in-transit that is being read from or written to Kafka, and your application will authenticate itself against the Kafka brokers that it is communicating with. Note that this example does not cover client authorization.

        -
        // Code of your Java application that uses the Kafka Streams library
        -Properties settings = new Properties();
        -settings.put(StreamsConfig.APPLICATION_ID_CONFIG, "secure-kafka-streams-app");
        -// Where to find secure Kafka brokers.  Here, it's on port 9093.
        -settings.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka.example.com:9093");
        -//
        -// ...further non-security related settings may follow here...
        -//
        -// Security settings.
        -// 1. These settings must match the security settings of the secure Kafka cluster.
        -// 2. The SSL trust store and key store files must be locally accessible to the application.
        -settings.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SSL");
        -settings.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, "/etc/security/tls/kafka.client.truststore.jks");
        -settings.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, "test1234");
        -settings.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, "/etc/security/tls/kafka.client.keystore.jks");
        -settings.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, "test1234");
        -settings.put(SslConfigs.SSL_KEY_PASSWORD_CONFIG, "test1234");
        -
        +
        // Code of your Java application that uses the Kafka Streams library
        +Properties settings = new Properties();
        +settings.put(StreamsConfig.APPLICATION_ID_CONFIG, "secure-kafka-streams-app");
        +// Where to find secure Kafka brokers.  Here, it's on port 9093.
        +settings.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka.example.com:9093");
        +//
        +// ...further non-security related settings may follow here...
        +//
        +// Security settings.
        +// 1. These settings must match the security settings of the secure Kafka cluster.
        +// 2. The SSL trust store and key store files must be locally accessible to the application.
        +settings.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SSL");
        +settings.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, "/etc/security/tls/kafka.client.truststore.jks");
        +settings.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, "test1234");
        +settings.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, "/etc/security/tls/kafka.client.keystore.jks");
        +settings.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, "test1234");
        +settings.put(SslConfigs.SSL_KEY_PASSWORD_CONFIG, "test1234");

        If you incorrectly configure a security setting in your application, it will fail at runtime, typically right after you start it. For example, if you enter an incorrect password for the ssl.keystore.password setting, an error message similar to this would be logged and then the application would terminate:

        -
        # Misconfigured ssl.keystore.password
        -Exception in thread "main" org.apache.kafka.common.KafkaException: Failed to construct kafka producer
        -[...snip...]
        +            
        # Misconfigured ssl.keystore.password
        +Exception in thread "main" org.apache.kafka.common.KafkaException: Failed to construct kafka producer
        +[...snip...]
         Caused by: org.apache.kafka.common.KafkaException: org.apache.kafka.common.KafkaException:
            java.io.IOException: Keystore was tampered with, or password was incorrect
        -[...snip...]
        -Caused by: java.security.UnrecoverableKeyException: Password verification failed
        -
        +[...snip...] +Caused by: java.security.UnrecoverableKeyException: Password verification failed

        Monitor your Kafka Streams application log files for such error messages to spot any misconfigured applications quickly.

      diff --git a/docs/streams/developer-guide/testing.html b/docs/streams/developer-guide/testing.html index ceef648d9c4d8..b5fadb12b3d3e 100644 --- a/docs/streams/developer-guide/testing.html +++ b/docs/streams/developer-guide/testing.html @@ -71,15 +71,15 @@

      // Processor API +
      // Processor API
       Topology topology = new Topology();
      -topology.addSource("sourceProcessor", "input-topic");
      -topology.addProcessor("processor", ..., "sourceProcessor");
      -topology.addSink("sinkProcessor", "output-topic", "processor");
      +topology.addSource("sourceProcessor", "input-topic");
      +topology.addProcessor("processor", ..., "sourceProcessor");
      +topology.addSink("sinkProcessor", "output-topic", "processor");
       // or
       // using DSL
       StreamsBuilder builder = new StreamsBuilder();
      -builder.stream("input-topic").filter(...).to("output-topic");
      +builder.stream("input-topic").filter(...).to("output-topic");
       Topology topology = builder.build();
       
       // create test driver
      @@ -88,7 +88,7 @@ 

      TestInputTopic<String, Long> inputTopic = testDriver.createInputTopic("input-topic", stringSerde.serializer(), longSerde.serializer()); +
      TestInputTopic<String, Long> inputTopic = testDriver.createInputTopic("input-topic", stringSerde.serializer(), longSerde.serializer());
       inputTopic.pipeInput("key", 42L);

      To verify the output, you can use TestOutputTopic @@ -97,7 +97,7 @@

      TestOutputTopic<String, Long> outputTopic = testDriver.createOutputTopic("output-topic", stringSerde.deserializer(), longSerde.deserializer()); +
      TestOutputTopic<String, Long> outputTopic = testDriver.createOutputTopic("output-topic", stringSerde.deserializer(), longSerde.deserializer());
       assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("key", 42L)));

      TopologyTestDriver supports punctuations, too. @@ -105,18 +105,18 @@

      testDriver.advanceWallClockTime(Duration.ofSeconds(20));

      +
      testDriver.advanceWallClockTime(Duration.ofSeconds(20));

      Additionally, you can access state stores via the test driver before or after a test. Accessing stores before a test is useful to pre-populate a store with some initial values. After data was processed, expected updates to the store can be verified.

      -
      KeyValueStore store = testDriver.getKeyValueStore("store-name");
      +
      KeyValueStore store = testDriver.getKeyValueStore("store-name");

      Note, that you should always close the test driver at the end to make sure all resources are release properly.

      -
      testDriver.close();
      +
      testDriver.close();

      Example

      @@ -125,7 +125,7 @@

      Example

      While processing, no output is generated, but only the store is updated. Output is only sent downstream based on event-time and wall-clock punctuations.

      -
      private TopologyTestDriver testDriver;
      +            
      private TopologyTestDriver testDriver;
       private TestInputTopic<String, Long> inputTopic;
       private TestOutputTopic<String, Long> outputTopic;
       private KeyValueStore<String, Long> store;
      @@ -275,21 +275,21 @@ 

      Construction

      To begin with, instantiate your processor and initialize it with the mock context: -

      final Processor processorUnderTest = ...;
      +            
      final Processor processorUnderTest = ...;
       final MockProcessorContext context = new MockProcessorContext();
       processorUnderTest.init(context);
      If you need to pass configuration to your processor or set the default serdes, you can create the mock with config: -
      final Properties props = new Properties();
      +            
      final Properties props = new Properties();
       props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
       props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Long().getClass());
      -props.put("some.other.config", "some config value");
      +props.put("some.other.config", "some config value");
       final MockProcessorContext context = new MockProcessorContext(props);

      Captured data

      The mock will capture any values that your processor forwards. You can make assertions on them: -

      processorUnderTest.process("key", "value");
      +            
      processorUnderTest.process("key", "value");
       
       final Iterator<CapturedForward> forwarded = context.forwarded().iterator();
       assertEquals(forwarded.next().keyValue(), new KeyValue<>(..., ...));
      @@ -301,9 +301,9 @@ 

      assertEquals(context.forwarded().size(), 0);

      If your processor forwards to specific child processors, you can query the context for captured data by child name: -
      final List<CapturedForward> captures = context.forwarded("childProcessorName");
      +
      final List<CapturedForward> captures = context.forwarded("childProcessorName");
      The mock also captures whether your processor has called commit() on the context: -
      assertTrue(context.committed());
      +            
      assertTrue(context.committed());
       
       // commit captures can also be reset.
       context.resetCommit();
      @@ -314,8 +314,8 @@ 

      In case your processor logic depends on the record metadata (topic, partition, offset, or timestamp), you can set them on the context, either all together or individually: -

      context.setRecordMetadata("topicName", /*partition*/ 0, /*offset*/ 0L, /*timestamp*/ 0L);
      -context.setTopic("topicName");
      +            
      context.setRecordMetadata("topicName", /*partition*/ 0, /*offset*/ 0L, /*timestamp*/ 0L);
      +context.setTopic("topicName");
       context.setPartition(0);
       context.setOffset(0L);
       context.setTimestamp(0L);
      @@ -327,7 +327,7 @@

      You're encouraged to use a simple in-memory store of the appropriate type (KeyValue, Windowed, or Session), since the mock context does not manage changelogs, state directories, etc.

      -
      final KeyValueStore<String, Integer> store =
      +            
      final KeyValueStore<String, Integer> store =
           Stores.keyValueStoreBuilder(
                   Stores.inMemoryKeyValueStore("myStore"),
                   Serdes.String(),
      @@ -342,7 +342,7 @@ 

      Processors can schedule punctuators to handle periodic tasks. The mock context does not automatically execute punctuators, but it does capture them to allow you to unit test them as well: -
      final MockProcessorContext.CapturedPunctuator capturedPunctuator = context.scheduledPunctuators().get(0);
      +            
      final MockProcessorContext.CapturedPunctuator capturedPunctuator = context.scheduledPunctuators().get(0);
       final long interval = capturedPunctuator.getIntervalMs();
       final PunctuationType type = capturedPunctuator.getType();
       final boolean cancelled = capturedPunctuator.cancelled();
      diff --git a/docs/streams/developer-guide/write-streams.html b/docs/streams/developer-guide/write-streams.html
      index 720b0c376756c..03bd16328efbe 100644
      --- a/docs/streams/developer-guide/write-streams.html
      +++ b/docs/streams/developer-guide/write-streams.html
      @@ -90,22 +90,22 @@
                     

      See the section Data Types and Serialization for more information about Serializers/Deserializers.

      Example pom.xml snippet when using Maven:

      -
      
      -    org.apache.kafka
      -    kafka-streams
      -    {{fullDotVersion}}
      -
      -
      -    org.apache.kafka
      -    kafka-clients
      -    {{fullDotVersion}}
      -
      -
      -
      -    org.apache.kafka
      -    kafka-streams-scala_{{scalaVersion}}
      -    {{fullDotVersion}}
      -
      +
      <dependency>
      +    <groupId>org.apache.kafka</groupId>
      +    <artifactId>kafka-streams</artifactId>
      +    <version>{{fullDotVersion}}</version>
      +</dependency>
      +<dependency>
      +    <groupId>org.apache.kafka</groupId>
      +    <artifactId>kafka-clients</artifactId>
      +    <version>{{fullDotVersion}}</version>
      +</dependency>
      +<!-- Optionally include Kafka Streams DSL for Scala for Scala {{scalaVersion}} -->
      +<dependency>
      +    <groupId>org.apache.kafka</groupId>
      +    <artifactId>kafka-streams-scala_{{scalaVersion}}</artifactId>
      +    <version>{{fullDotVersion}}</version>
      +</dependency>

      Using Kafka Streams within your application code

      @@ -120,79 +120,69 @@

      Using Kafka Streams within your application codejava.util.Properties, which defines the configuration for this specific topology.

      Code example:

      -
      import org.apache.kafka.streams.KafkaStreams;
      -import org.apache.kafka.streams.kstream.StreamsBuilder;
      -import org.apache.kafka.streams.processor.Topology;
      -
      -// Use the builders to define the actual processing topology, e.g. to specify
      -// from which input topics to read, which stream operations (filter, map, etc.)
      -// should be called, and so on.  We will cover this in detail in the subsequent
      -// sections of this Developer Guide.
      -
      -StreamsBuilder builder = ...;  // when using the DSL
      -Topology topology = builder.build();
      -//
      -// OR
      -//
      -Topology topology = ...; // when using the Processor API
      -
      -// Use the configuration to tell your application where the Kafka cluster is,
      -// which Serializers/Deserializers to use by default, to specify security settings,
      -// and so on.
      -Properties props = ...;
      -
      -KafkaStreams streams = new KafkaStreams(topology, props);
      -
      +
      import org.apache.kafka.streams.KafkaStreams;
      +import org.apache.kafka.streams.kstream.StreamsBuilder;
      +import org.apache.kafka.streams.processor.Topology;
      +
      +// Use the builders to define the actual processing topology, e.g. to specify
      +// from which input topics to read, which stream operations (filter, map, etc.)
      +// should be called, and so on.  We will cover this in detail in the subsequent
      +// sections of this Developer Guide.
      +
      +StreamsBuilder builder = ...;  // when using the DSL
      +Topology topology = builder.build();
      +//
      +// OR
      +//
      +Topology topology = ...; // when using the Processor API
      +
      +// Use the configuration to tell your application where the Kafka cluster is,
      +// which Serializers/Deserializers to use by default, to specify security settings,
      +// and so on.
      +Properties props = ...;
      +
      +KafkaStreams streams = new KafkaStreams(topology, props);

      At this point, internal structures are initialized, but the processing is not started yet. You have to explicitly start the Kafka Streams thread by calling the KafkaStreams#start() method:

      -
      // Start the Kafka Streams threads
      -streams.start();
      -
      +
      // Start the Kafka Streams threads
      +streams.start();

      If there are other instances of this stream processing application running elsewhere (e.g., on another machine), Kafka Streams transparently re-assigns tasks from the existing instances to the new instance that you just started. For more information, see Stream Partitions and Tasks and Threading Model.

      To catch any unexpected exceptions, you can set an java.lang.Thread.UncaughtExceptionHandler before you start the application. This handler is called whenever a stream thread is terminated by an unexpected exception:

      -
      // Java 8+, using lambda expressions
      -streams.setUncaughtExceptionHandler((Thread thread, Throwable throwable) -> {
      -  // here you should examine the throwable/exception and perform an appropriate action!
      -});
      -
      -
      -// Java 7
      -streams.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
      -  public void uncaughtException(Thread thread, Throwable throwable) {
      -    // here you should examine the throwable/exception and perform an appropriate action!
      -  }
      -});
      -
      +
      // Java 8+, using lambda expressions
      +streams.setUncaughtExceptionHandler((Thread thread, Throwable throwable) -> {
      +  // here you should examine the throwable/exception and perform an appropriate action!
      +});
      +
      +
      +// Java 7
      +streams.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
      +  public void uncaughtException(Thread thread, Throwable throwable) {
      +    // here you should examine the throwable/exception and perform an appropriate action!
      +  }
      +});

      To stop the application instance, call the KafkaStreams#close() method:

      -
      // Stop the Kafka Streams threads
      -streams.close();
      -
      +
      // Stop the Kafka Streams threads
      +streams.close();

      To allow your application to gracefully shutdown in response to SIGTERM, it is recommended that you add a shutdown hook and call KafkaStreams#close.

      • Here is a shutdown hook example in Java 8+:

        -
        -
        // Add shutdown hook to stop the Kafka Streams threads.
        -// You can optionally provide a timeout to `close`.
        -Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
        -
        -
        +
        // Add shutdown hook to stop the Kafka Streams threads.
        +// You can optionally provide a timeout to `close`.
        +Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
      • Here is a shutdown hook example in Java 7:

        -
        -
        // Add shutdown hook to stop the Kafka Streams threads.
        -// You can optionally provide a timeout to `close`.
        -Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
        -  @Override
        -  public void run() {
        -      streams.close();
        -  }
        -}));
        -
        -
        +
        // Add shutdown hook to stop the Kafka Streams threads.
        +// You can optionally provide a timeout to `close`.
        +Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
        +  @Override
        +  public void run() {
        +      streams.close();
        +  }
        +}));

      After an application is stopped, Kafka Streams will migrate any tasks that had been running in this instance to available remaining diff --git a/docs/streams/index.html b/docs/streams/index.html index 3d84bbfee7802..e38b3890af9ce 100644 --- a/docs/streams/index.html +++ b/docs/streams/index.html @@ -154,95 +154,95 @@

      Hello Kafka Streams

      -
                         import org.apache.kafka.common.serialization.Serdes;
      -                   import org.apache.kafka.common.utils.Bytes;
      -                   import org.apache.kafka.streams.KafkaStreams;
      -                   import org.apache.kafka.streams.StreamsBuilder;
      -                   import org.apache.kafka.streams.StreamsConfig;
      -                   import org.apache.kafka.streams.kstream.KStream;
      -                   import org.apache.kafka.streams.kstream.KTable;
      -                   import org.apache.kafka.streams.kstream.Materialized;
      -                   import org.apache.kafka.streams.kstream.Produced;
      -                   import org.apache.kafka.streams.state.KeyValueStore;
      -
      -                   import java.util.Arrays;
      -                   import java.util.Properties;
      -
      -                   public class WordCountApplication {
      -
      -                       public static void main(final String[] args) throws Exception {
      -                           Properties props = new Properties();
      -                           props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-application");
      -                           props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka-broker1:9092");
      -                           props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      -                           props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      -
      -                           StreamsBuilder builder = new StreamsBuilder();
      -                           KStream<String, String> textLines = builder.stream("TextLinesTopic");
      -                           KTable<String, Long> wordCounts = textLines
      -                               .flatMapValues(textLine -> Arrays.asList(textLine.toLowerCase().split("\\W+")))
      -                               .groupBy((key, word) -> word)
      -                               .count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("counts-store"));
      -                           wordCounts.toStream().to("WordsWithCountsTopic", Produced.with(Serdes.String(), Serdes.Long()));
      -
      -                           KafkaStreams streams = new KafkaStreams(builder.build(), props);
      -                           streams.start();
      -                       }
      -
      -                   }
      +
      import org.apache.kafka.common.serialization.Serdes;
      +import org.apache.kafka.common.utils.Bytes;
      +import org.apache.kafka.streams.KafkaStreams;
      +import org.apache.kafka.streams.StreamsBuilder;
      +import org.apache.kafka.streams.StreamsConfig;
      +import org.apache.kafka.streams.kstream.KStream;
      +import org.apache.kafka.streams.kstream.KTable;
      +import org.apache.kafka.streams.kstream.Materialized;
      +import org.apache.kafka.streams.kstream.Produced;
      +import org.apache.kafka.streams.state.KeyValueStore;
      +
      +import java.util.Arrays;
      +import java.util.Properties;
      +
      +public class WordCountApplication {
      +
      +   public static void main(final String[] args) throws Exception {
      +       Properties props = new Properties();
      +       props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-application");
      +       props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka-broker1:9092");
      +       props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      +       props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      +
      +       StreamsBuilder builder = new StreamsBuilder();
      +       KStream<String, String> textLines = builder.stream("TextLinesTopic");
      +       KTable<String, Long> wordCounts = textLines
      +           .flatMapValues(textLine -> Arrays.asList(textLine.toLowerCase().split("\\W+")))
      +           .groupBy((key, word) -> word)
      +           .count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("counts-store"));
      +       wordCounts.toStream().to("WordsWithCountsTopic", Produced.with(Serdes.String(), Serdes.Long()));
      +
      +       KafkaStreams streams = new KafkaStreams(builder.build(), props);
      +       streams.start();
      +   }
      +
      +}
      -
                         import org.apache.kafka.common.serialization.Serdes;
      -                   import org.apache.kafka.common.utils.Bytes;
      -                   import org.apache.kafka.streams.KafkaStreams;
      -                   import org.apache.kafka.streams.StreamsBuilder;
      -                   import org.apache.kafka.streams.StreamsConfig;
      -                   import org.apache.kafka.streams.kstream.KStream;
      -                   import org.apache.kafka.streams.kstream.KTable;
      -                   import org.apache.kafka.streams.kstream.ValueMapper;
      -                   import org.apache.kafka.streams.kstream.KeyValueMapper;
      -                   import org.apache.kafka.streams.kstream.Materialized;
      -                   import org.apache.kafka.streams.kstream.Produced;
      -                   import org.apache.kafka.streams.state.KeyValueStore;
      -
      -                   import java.util.Arrays;
      -                   import java.util.Properties;
      -
      -                   public class WordCountApplication {
      -
      -                       public static void main(final String[] args) throws Exception {
      -                           Properties props = new Properties();
      -                           props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-application");
      -                           props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka-broker1:9092");
      -                           props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      -                           props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      -
      -                           StreamsBuilder builder = new StreamsBuilder();
      -                           KStream<String, String> textLines = builder.stream("TextLinesTopic");
      -                           KTable<String, Long> wordCounts = textLines
      -                               .flatMapValues(new ValueMapper<String, Iterable<String>>() {
      -                                   @Override
      -                                   public Iterable<String> apply(String textLine) {
      -                                       return Arrays.asList(textLine.toLowerCase().split("\\W+"));
      -                                   }
      -                               })
      -                               .groupBy(new KeyValueMapper<String, String, String>() {
      -                                   @Override
      -                                   public String apply(String key, String word) {
      -                                       return word;
      -                                   }
      -                               })
      -                               .count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("counts-store"));
      -
      -
      -                           wordCounts.toStream().to("WordsWithCountsTopic", Produced.with(Serdes.String(), Serdes.Long()));
      -
      -                           KafkaStreams streams = new KafkaStreams(builder.build(), props);
      -                           streams.start();
      -                       }
      -
      -                   }
      +
      import org.apache.kafka.common.serialization.Serdes;
      +import org.apache.kafka.common.utils.Bytes;
      +import org.apache.kafka.streams.KafkaStreams;
      +import org.apache.kafka.streams.StreamsBuilder;
      +import org.apache.kafka.streams.StreamsConfig;
      +import org.apache.kafka.streams.kstream.KStream;
      +import org.apache.kafka.streams.kstream.KTable;
      +import org.apache.kafka.streams.kstream.ValueMapper;
      +import org.apache.kafka.streams.kstream.KeyValueMapper;
      +import org.apache.kafka.streams.kstream.Materialized;
      +import org.apache.kafka.streams.kstream.Produced;
      +import org.apache.kafka.streams.state.KeyValueStore;
      +
      +import java.util.Arrays;
      +import java.util.Properties;
      +
      +public class WordCountApplication {
      +
      +   public static void main(final String[] args) throws Exception {
      +       Properties props = new Properties();
      +       props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-application");
      +       props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka-broker1:9092");
      +       props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      +       props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      +
      +       StreamsBuilder builder = new StreamsBuilder();
      +       KStream<String, String> textLines = builder.stream("TextLinesTopic");
      +       KTable<String, Long> wordCounts = textLines
      +           .flatMapValues(new ValueMapper<String, Iterable<String>>() {
      +               @Override
      +               public Iterable<String> apply(String textLine) {
      +                   return Arrays.asList(textLine.toLowerCase().split("\\W+"));
      +               }
      +           })
      +           .groupBy(new KeyValueMapper<String, String, String>() {
      +               @Override
      +               public String apply(String key, String word) {
      +                   return word;
      +               }
      +           })
      +           .count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("counts-store"));
      +
      +
      +       wordCounts.toStream().to("WordsWithCountsTopic", Produced.with(Serdes.String(), Serdes.Long()));
      +
      +       KafkaStreams streams = new KafkaStreams(builder.build(), props);
      +       streams.start();
      +   }
      +
      +}
      diff --git a/docs/streams/tutorial.html b/docs/streams/tutorial.html index 2fa4dc3ec5a0c..a526de568abb1 100644 --- a/docs/streams/tutorial.html +++ b/docs/streams/tutorial.html @@ -42,32 +42,31 @@

      We are going to use a Kafka Streams Maven Archetype for creating a Streams project structure with the following commands:

      -
              mvn archetype:generate \
      -            -DarchetypeGroupId=org.apache.kafka \
      -            -DarchetypeArtifactId=streams-quickstart-java \
      -            -DarchetypeVersion={{fullDotVersion}} \
      -            -DgroupId=streams.examples \
      -            -DartifactId=streams.examples \
      -            -Dversion=0.1 \
      -            -Dpackage=myapps
      - +
      mvn archetype:generate \
      +    -DarchetypeGroupId=org.apache.kafka \
      +    -DarchetypeArtifactId=streams-quickstart-java \
      +    -DarchetypeVersion={{fullDotVersion}} \
      +    -DgroupId=streams.examples \
      +    -DartifactId=streams.examples \
      +    -Dversion=0.1 \
      +    -Dpackage=myapps

      You can use a different value for groupId, artifactId and package parameters if you like. Assuming the above parameter values are used, this command will create a project structure that looks like this:

      -
              > tree streams.examples
      -        streams-quickstart
      -        |-- pom.xml
      -        |-- src
      -            |-- main
      -                |-- java
      -                |   |-- myapps
      -                |       |-- LineSplit.java
      -                |       |-- Pipe.java
      -                |       |-- WordCount.java
      -                |-- resources
      -                    |-- log4j.properties
      +
      > tree streams.examples
      +    streams-quickstart
      +    |-- pom.xml
      +    |-- src
      +        |-- main
      +            |-- java
      +            |   |-- myapps
      +            |       |-- LineSplit.java
      +            |       |-- Pipe.java
      +            |       |-- WordCount.java
      +            |-- resources
      +                |-- log4j.properties

      The pom.xml file included in the project already has the Streams dependency defined. @@ -79,22 +78,22 @@

      Since we are going to start writing such programs from scratch, we can now delete these examples:

      -
              > cd streams-quickstart
      -        > rm src/main/java/myapps/*.java
      +
      > cd streams-quickstart
      +> rm src/main/java/myapps/*.java

      Writing a first Streams application: Pipe

      It's coding time now! Feel free to open your favorite IDE and import this Maven project, or simply open a text editor and create a java file under src/main/java/myapps. Let's name it Pipe.java: -
              package myapps;
      +    
      package myapps;
       
      -        public class Pipe {
      +public class Pipe {
       
      -            public static void main(String[] args) throws Exception {
      +    public static void main(String[] args) throws Exception {
       
      -            }
      -        }
      + } +}

      We are going to fill in the main function to write this pipe program. Note that we will not list the import statements as we go since IDEs can usually add them automatically. @@ -107,16 +106,16 @@

      Writing a first Stream and StreamsConfig.APPLICATION_ID_CONFIG, which gives the unique identifier of your Streams application to distinguish itself with other applications talking to the same Kafka cluster:

      -
              Properties props = new Properties();
      -        props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pipe");
      -        props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");    // assuming that the Kafka broker this application is talking to runs on local machine with port 9092
      +
      Properties props = new Properties();
      +props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pipe");
      +props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");    // assuming that the Kafka broker this application is talking to runs on local machine with port 9092

      In addition, you can customize other configurations in the same map, for example, default serialization and deserialization libraries for the record key-value pairs:

      -
              props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      -        props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      +
      props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      +props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());

      For a full list of configurations of Kafka Streams please refer to this table. @@ -128,13 +127,13 @@

      Writing a first Stream We can use a topology builder to construct such a topology,

      -
              final StreamsBuilder builder = new StreamsBuilder();
      +
      final StreamsBuilder builder = new StreamsBuilder();

      And then create a source stream from a Kafka topic named streams-plaintext-input using this topology builder:

      -
              KStream<String, String> source = builder.stream("streams-plaintext-input");
      +
      KStream<String, String> source = builder.stream("streams-plaintext-input");

      Now we get a KStream that is continuously generating records from its source Kafka topic streams-plaintext-input. @@ -142,38 +141,38 @@

      Writing a first Stream The simplest thing we can do with this stream is to write it into another Kafka topic, say it's named streams-pipe-output:

      -
              source.to("streams-pipe-output");
      +
      source.to("streams-pipe-output");

      Note that we can also concatenate the above two lines into a single line as:

      -
              builder.stream("streams-plaintext-input").to("streams-pipe-output");
      +
      builder.stream("streams-plaintext-input").to("streams-pipe-output");

      We can inspect what kind of topology is created from this builder by doing the following:

      -
              final Topology topology = builder.build();
      +
      final Topology topology = builder.build();

      And print its description to standard output as:

      -
              System.out.println(topology.describe());
      +
      System.out.println(topology.describe());

      If we just stop here, compile and run the program, it will output the following information:

      -
              > mvn clean package
      -        > mvn exec:java -Dexec.mainClass=myapps.Pipe
      -        Sub-topologies:
      -          Sub-topology: 0
      -            Source: KSTREAM-SOURCE-0000000000(topics: streams-plaintext-input) --> KSTREAM-SINK-0000000001
      -            Sink: KSTREAM-SINK-0000000001(topic: streams-pipe-output) <-- KSTREAM-SOURCE-0000000000
      -        Global Stores:
      -          none
      +
      > mvn clean package
      +> mvn exec:java -Dexec.mainClass=myapps.Pipe
      +Sub-topologies:
      +  Sub-topology: 0
      +    Source: KSTREAM-SOURCE-0000000000(topics: streams-plaintext-input) --> KSTREAM-SINK-0000000001
      +    Sink: KSTREAM-SINK-0000000001(topic: streams-pipe-output) <-- KSTREAM-SOURCE-0000000000
      +Global Stores:
      +  none

      As shown above, it illustrates that the constructed topology has two processor nodes, a source node KSTREAM-SOURCE-0000000000 and a sink node KSTREAM-SINK-0000000001. @@ -189,7 +188,7 @@

      Writing a first Stream we can now construct the Streams client with the two components we have just constructed above: the configuration map specified in a java.util.Properties instance and the Topology object.

      -
              final KafkaStreams streams = new KafkaStreams(topology, props);
      +
      final KafkaStreams streams = new KafkaStreams(topology, props);

      By calling its start() function we can trigger the execution of this client. @@ -197,76 +196,76 @@

      Writing a first Stream We can, for example, add a shutdown hook with a countdown latch to capture a user interrupt and close the client upon terminating this program:

      -
              final CountDownLatch latch = new CountDownLatch(1);
      +    
      final CountDownLatch latch = new CountDownLatch(1);
       
      -        // attach shutdown handler to catch control-c
      -        Runtime.getRuntime().addShutdownHook(new Thread("streams-shutdown-hook") {
      -            @Override
      -            public void run() {
      -                streams.close();
      -                latch.countDown();
      -            }
      -        });
      +// attach shutdown handler to catch control-c
      +Runtime.getRuntime().addShutdownHook(new Thread("streams-shutdown-hook") {
      +    @Override
      +    public void run() {
      +        streams.close();
      +        latch.countDown();
      +    }
      +});
       
      -        try {
      -            streams.start();
      -            latch.await();
      -        } catch (Throwable e) {
      -            System.exit(1);
      -        }
      -        System.exit(0);
      +try { + streams.start(); + latch.await(); +} catch (Throwable e) { + System.exit(1); +} +System.exit(0);

      The complete code so far looks like this:

      -
              package myapps;
      -
      -        import org.apache.kafka.common.serialization.Serdes;
      -        import org.apache.kafka.streams.KafkaStreams;
      -        import org.apache.kafka.streams.StreamsBuilder;
      -        import org.apache.kafka.streams.StreamsConfig;
      -        import org.apache.kafka.streams.Topology;
      +    
      package myapps;
       
      -        import java.util.Properties;
      -        import java.util.concurrent.CountDownLatch;
      +import org.apache.kafka.common.serialization.Serdes;
      +import org.apache.kafka.streams.KafkaStreams;
      +import org.apache.kafka.streams.StreamsBuilder;
      +import org.apache.kafka.streams.StreamsConfig;
      +import org.apache.kafka.streams.Topology;
       
      -        public class Pipe {
      +import java.util.Properties;
      +import java.util.concurrent.CountDownLatch;
       
      -            public static void main(String[] args) throws Exception {
      -                Properties props = new Properties();
      -                props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pipe");
      -                props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
      -                props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      -                props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      +public class Pipe {
       
      -                final StreamsBuilder builder = new StreamsBuilder();
      +    public static void main(String[] args) throws Exception {
      +        Properties props = new Properties();
      +        props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pipe");
      +        props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
      +        props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      +        props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
       
      -                builder.stream("streams-plaintext-input").to("streams-pipe-output");
      +        final StreamsBuilder builder = new StreamsBuilder();
       
      -                final Topology topology = builder.build();
      +        builder.stream("streams-plaintext-input").to("streams-pipe-output");
       
      -                final KafkaStreams streams = new KafkaStreams(topology, props);
      -                final CountDownLatch latch = new CountDownLatch(1);
      +        final Topology topology = builder.build();
       
      -                // attach shutdown handler to catch control-c
      -                Runtime.getRuntime().addShutdownHook(new Thread("streams-shutdown-hook") {
      -                    @Override
      -                    public void run() {
      -                        streams.close();
      -                        latch.countDown();
      -                    }
      -                });
      +        final KafkaStreams streams = new KafkaStreams(topology, props);
      +        final CountDownLatch latch = new CountDownLatch(1);
       
      -                try {
      -                    streams.start();
      -                    latch.await();
      -                } catch (Throwable e) {
      -                    System.exit(1);
      -                }
      -                System.exit(0);
      +        // attach shutdown handler to catch control-c
      +        Runtime.getRuntime().addShutdownHook(new Thread("streams-shutdown-hook") {
      +            @Override
      +            public void run() {
      +                streams.close();
      +                latch.countDown();
                   }
      -        }
      + }); + + try { + streams.start(); + latch.await(); + } catch (Throwable e) { + System.exit(1); + } + System.exit(0); + } +}

      If you already have the Kafka broker up and running at localhost:9092, @@ -274,8 +273,8 @@

      Writing a first Stream you can run this code in your IDE or on the command line, using Maven:

      -
              > mvn clean package
      -        > mvn exec:java -Dexec.mainClass=myapps.Pipe
      +
      > mvn clean package
      +> mvn exec:java -Dexec.mainClass=myapps.Pipe

      For detailed instructions on how to run a Streams application and observe its computing results, @@ -291,33 +290,33 @@

      Writing a se We can first create another program by first copy the existing Pipe.java class:

      -
              > cp src/main/java/myapps/Pipe.java src/main/java/myapps/LineSplit.java
      +
      > cp src/main/java/myapps/Pipe.java src/main/java/myapps/LineSplit.java

      And change its class name as well as the application id config to distinguish with the original program:

      -
              public class LineSplit {
      +    
      public class LineSplit {
       
      -            public static void main(String[] args) throws Exception {
      -                Properties props = new Properties();
      -                props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-linesplit");
      -                // ...
      -            }
      -        }
      + public static void main(String[] args) throws Exception { + Properties props = new Properties(); + props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-linesplit"); + // ... + } +}

      Since each of the source stream's record is a String typed key-value pair, let's treat the value string as a text line and split it into words with a FlatMapValues operator:

      -
              KStream<String, String> source = builder.stream("streams-plaintext-input");
      -        KStream<String, String> words = source.flatMapValues(new ValueMapper<String, Iterable<String>>() {
      -                    @Override
      -                    public Iterable<String> apply(String value) {
      -                        return Arrays.asList(value.split("\\W+"));
      -                    }
      -                });
      +
      KStream<String, String> source = builder.stream("streams-plaintext-input");
      +KStream<String, String> words = source.flatMapValues(new ValueMapper<String, Iterable<String>>() {
      +            @Override
      +            public Iterable<String> apply(String value) {
      +                return Arrays.asList(value.split("\\W+"));
      +            }
      +        });

      The operator will take the source stream as its input, and generate a new stream named words @@ -327,31 +326,31 @@

      Writing a se Note if you are using JDK 8 you can use lambda expression and simplify the above code as:

      -
              KStream<String, String> source = builder.stream("streams-plaintext-input");
      -        KStream<String, String> words = source.flatMapValues(value -> Arrays.asList(value.split("\\W+")));
      +
      KStream<String, String> source = builder.stream("streams-plaintext-input");
      +KStream<String, String> words = source.flatMapValues(value -> Arrays.asList(value.split("\\W+")));

      And finally we can write the word stream back into another Kafka topic, say streams-linesplit-output. Again, these two steps can be concatenated as the following (assuming lambda expression is used):

      -
              KStream<String, String> source = builder.stream("streams-plaintext-input");
      -        source.flatMapValues(value -> Arrays.asList(value.split("\\W+")))
      -              .to("streams-linesplit-output");
      +
      KStream<String, String> source = builder.stream("streams-plaintext-input");
      +source.flatMapValues(value -> Arrays.asList(value.split("\\W+")))
      +      .to("streams-linesplit-output");

      If we now describe this augmented topology as System.out.println(topology.describe()), we will get the following:

      -
              > mvn clean package
      -        > mvn exec:java -Dexec.mainClass=myapps.LineSplit
      -        Sub-topologies:
      -          Sub-topology: 0
      -            Source: KSTREAM-SOURCE-0000000000(topics: streams-plaintext-input) --> KSTREAM-FLATMAPVALUES-0000000001
      -            Processor: KSTREAM-FLATMAPVALUES-0000000001(stores: []) --> KSTREAM-SINK-0000000002 <-- KSTREAM-SOURCE-0000000000
      -            Sink: KSTREAM-SINK-0000000002(topic: streams-linesplit-output) <-- KSTREAM-FLATMAPVALUES-0000000001
      -          Global Stores:
      -            none
      +
      > mvn clean package
      +> mvn exec:java -Dexec.mainClass=myapps.LineSplit
      +Sub-topologies:
      +  Sub-topology: 0
      +    Source: KSTREAM-SOURCE-0000000000(topics: streams-plaintext-input) --> KSTREAM-FLATMAPVALUES-0000000001
      +    Processor: KSTREAM-FLATMAPVALUES-0000000001(stores: []) --> KSTREAM-SINK-0000000002 <-- KSTREAM-SOURCE-0000000000
      +    Sink: KSTREAM-SINK-0000000002(topic: streams-linesplit-output) <-- KSTREAM-FLATMAPVALUES-0000000001
      +  Global Stores:
      +    none

      As we can see above, a new processor node KSTREAM-FLATMAPVALUES-0000000001 is injected into the topology between the original source and sink nodes. @@ -365,41 +364,41 @@

      Writing a se The complete code looks like this (assuming lambda expression is used):

      -
              package myapps;
      +    
      package myapps;
       
      -        import org.apache.kafka.common.serialization.Serdes;
      -        import org.apache.kafka.streams.KafkaStreams;
      -        import org.apache.kafka.streams.StreamsBuilder;
      -        import org.apache.kafka.streams.StreamsConfig;
      -        import org.apache.kafka.streams.Topology;
      -        import org.apache.kafka.streams.kstream.KStream;
      +import org.apache.kafka.common.serialization.Serdes;
      +import org.apache.kafka.streams.KafkaStreams;
      +import org.apache.kafka.streams.StreamsBuilder;
      +import org.apache.kafka.streams.StreamsConfig;
      +import org.apache.kafka.streams.Topology;
      +import org.apache.kafka.streams.kstream.KStream;
       
      -        import java.util.Arrays;
      -        import java.util.Properties;
      -        import java.util.concurrent.CountDownLatch;
      +import java.util.Arrays;
      +import java.util.Properties;
      +import java.util.concurrent.CountDownLatch;
       
      -        public class LineSplit {
      +public class LineSplit {
       
      -            public static void main(String[] args) throws Exception {
      -                Properties props = new Properties();
      -                props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-linesplit");
      -                props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
      -                props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      -                props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      +    public static void main(String[] args) throws Exception {
      +        Properties props = new Properties();
      +        props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-linesplit");
      +        props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
      +        props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      +        props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
       
      -                final StreamsBuilder builder = new StreamsBuilder();
      +        final StreamsBuilder builder = new StreamsBuilder();
       
      -                KStream<String, String> source = builder.stream("streams-plaintext-input");
      -                source.flatMapValues(value -> Arrays.asList(value.split("\\W+")))
      -                      .to("streams-linesplit-output");
      +        KStream<String, String> source = builder.stream("streams-plaintext-input");
      +        source.flatMapValues(value -> Arrays.asList(value.split("\\W+")))
      +              .to("streams-linesplit-output");
       
      -                final Topology topology = builder.build();
      -                final KafkaStreams streams = new KafkaStreams(topology, props);
      -                final CountDownLatch latch = new CountDownLatch(1);
      +        final Topology topology = builder.build();
      +        final KafkaStreams streams = new KafkaStreams(topology, props);
      +        final CountDownLatch latch = new CountDownLatch(1);
       
      -                // ... same as Pipe.java above
      -            }
      -        }
      + // ... same as Pipe.java above + } +}

      Writing a third Streams application: Wordcount

      @@ -408,47 +407,47 @@

      Writing a th Following similar steps let's create another program based on the LineSplit.java class:

      -
              public class WordCount {
      +    
      public class WordCount {
       
      -            public static void main(String[] args) throws Exception {
      -                Properties props = new Properties();
      -                props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-wordcount");
      -                // ...
      -            }
      -        }
      + public static void main(String[] args) throws Exception { + Properties props = new Properties(); + props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-wordcount"); + // ... + } +}

      In order to count the words we can first modify the flatMapValues operator to treat all of them as lower case (assuming lambda expression is used):

      -
              source.flatMapValues(new ValueMapper<String, Iterable<String>>() {
      -                    @Override
      -                    public Iterable<String> apply(String value) {
      -                        return Arrays.asList(value.toLowerCase(Locale.getDefault()).split("\\W+"));
      -                    }
      -                });
      +
      source.flatMapValues(new ValueMapper<String, Iterable<String>>() {
      +    @Override
      +    public Iterable<String> apply(String value) {
      +        return Arrays.asList(value.toLowerCase(Locale.getDefault()).split("\\W+"));
      +    }
      +});

      In order to do the counting aggregation we have to first specify that we want to key the stream on the value string, i.e. the lower cased word, with a groupBy operator. This operator generate a new grouped stream, which can then be aggregated by a count operator, which generates a running count on each of the grouped keys:

      -
              KTable<String, Long> counts =
      -        source.flatMapValues(new ValueMapper<String, Iterable<String>>() {
      -                    @Override
      -                    public Iterable<String> apply(String value) {
      -                        return Arrays.asList(value.toLowerCase(Locale.getDefault()).split("\\W+"));
      -                    }
      -                })
      -              .groupBy(new KeyValueMapper<String, String, String>() {
      -                   @Override
      -                   public String apply(String key, String value) {
      -                       return value;
      -                   }
      -                })
      -              // Materialize the result into a KeyValueStore named "counts-store".
      -              // The Materialized store is always of type <Bytes, byte[]> as this is the format of the inner most store.
      -              .count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>> as("counts-store"));
      +
      KTable<String, Long> counts =
      +source.flatMapValues(new ValueMapper<String, Iterable<String>>() {
      +            @Override
      +            public Iterable<String> apply(String value) {
      +                return Arrays.asList(value.toLowerCase(Locale.getDefault()).split("\\W+"));
      +            }
      +        })
      +      .groupBy(new KeyValueMapper<String, String, String>() {
      +           @Override
      +           public String apply(String key, String value) {
      +               return value;
      +           }
      +        })
      +      // Materialize the result into a KeyValueStore named "counts-store".
      +      // The Materialized store is always of type <Bytes, byte[]> as this is the format of the inner most store.
      +      .count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>> as("counts-store"));

      Note that the count operator has a Materialized parameter that specifies that the @@ -463,7 +462,7 @@

      Writing a th We need to provide overridden serialization methods for Long types, otherwise a runtime exception will be thrown:

      -
              counts.toStream().to("streams-wordcount-output", Produced.with(Serdes.String(), Serdes.Long()));
      +
      counts.toStream().to("streams-wordcount-output", Produced.with(Serdes.String(), Serdes.Long()));

      Note that in order to read the changelog stream from topic streams-wordcount-output, @@ -472,33 +471,33 @@

      Writing a th Assuming lambda expression from JDK 8 can be used, the above code can be simplified as:

      -
              KStream<String, String> source = builder.stream("streams-plaintext-input");
      -        source.flatMapValues(value -> Arrays.asList(value.toLowerCase(Locale.getDefault()).split("\\W+")))
      -              .groupBy((key, value) -> value)
      -              .count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("counts-store"))
      -              .toStream()
      -              .to("streams-wordcount-output", Produced.with(Serdes.String(), Serdes.Long()));
      +
      KStream<String, String> source = builder.stream("streams-plaintext-input");
      +source.flatMapValues(value -> Arrays.asList(value.toLowerCase(Locale.getDefault()).split("\\W+")))
      +      .groupBy((key, value) -> value)
      +      .count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("counts-store"))
      +      .toStream()
      +      .to("streams-wordcount-output", Produced.with(Serdes.String(), Serdes.Long()));

      If we again describe this augmented topology as System.out.println(topology.describe()), we will get the following:

      -
              > mvn clean package
      -        > mvn exec:java -Dexec.mainClass=myapps.WordCount
      -        Sub-topologies:
      -          Sub-topology: 0
      -            Source: KSTREAM-SOURCE-0000000000(topics: streams-plaintext-input) --> KSTREAM-FLATMAPVALUES-0000000001
      -            Processor: KSTREAM-FLATMAPVALUES-0000000001(stores: []) --> KSTREAM-KEY-SELECT-0000000002 <-- KSTREAM-SOURCE-0000000000
      -            Processor: KSTREAM-KEY-SELECT-0000000002(stores: []) --> KSTREAM-FILTER-0000000005 <-- KSTREAM-FLATMAPVALUES-0000000001
      -            Processor: KSTREAM-FILTER-0000000005(stores: []) --> KSTREAM-SINK-0000000004 <-- KSTREAM-KEY-SELECT-0000000002
      -            Sink: KSTREAM-SINK-0000000004(topic: Counts-repartition) <-- KSTREAM-FILTER-0000000005
      -          Sub-topology: 1
      -            Source: KSTREAM-SOURCE-0000000006(topics: Counts-repartition) --> KSTREAM-AGGREGATE-0000000003
      -            Processor: KSTREAM-AGGREGATE-0000000003(stores: [Counts]) --> KTABLE-TOSTREAM-0000000007 <-- KSTREAM-SOURCE-0000000006
      -            Processor: KTABLE-TOSTREAM-0000000007(stores: []) --> KSTREAM-SINK-0000000008 <-- KSTREAM-AGGREGATE-0000000003
      -            Sink: KSTREAM-SINK-0000000008(topic: streams-wordcount-output) <-- KTABLE-TOSTREAM-0000000007
      -        Global Stores:
      -          none
      +
      > mvn clean package
      +> mvn exec:java -Dexec.mainClass=myapps.WordCount
      +Sub-topologies:
      +  Sub-topology: 0
      +    Source: KSTREAM-SOURCE-0000000000(topics: streams-plaintext-input) --> KSTREAM-FLATMAPVALUES-0000000001
      +    Processor: KSTREAM-FLATMAPVALUES-0000000001(stores: []) --> KSTREAM-KEY-SELECT-0000000002 <-- KSTREAM-SOURCE-0000000000
      +    Processor: KSTREAM-KEY-SELECT-0000000002(stores: []) --> KSTREAM-FILTER-0000000005 <-- KSTREAM-FLATMAPVALUES-0000000001
      +    Processor: KSTREAM-FILTER-0000000005(stores: []) --> KSTREAM-SINK-0000000004 <-- KSTREAM-KEY-SELECT-0000000002
      +    Sink: KSTREAM-SINK-0000000004(topic: Counts-repartition) <-- KSTREAM-FILTER-0000000005
      +  Sub-topology: 1
      +    Source: KSTREAM-SOURCE-0000000006(topics: Counts-repartition) --> KSTREAM-AGGREGATE-0000000003
      +    Processor: KSTREAM-AGGREGATE-0000000003(stores: [Counts]) --> KTABLE-TOSTREAM-0000000007 <-- KSTREAM-SOURCE-0000000006
      +    Processor: KTABLE-TOSTREAM-0000000007(stores: []) --> KSTREAM-SINK-0000000008 <-- KSTREAM-AGGREGATE-0000000003
      +    Sink: KSTREAM-SINK-0000000008(topic: streams-wordcount-output) <-- KTABLE-TOSTREAM-0000000007
      +Global Stores:
      +  none

      As we can see above, the topology now contains two disconnected sub-topologies. @@ -517,49 +516,49 @@

      Writing a th The complete code looks like this (assuming lambda expression is used):

      -
              package myapps;
      +    
      package myapps;
       
      -        import org.apache.kafka.common.serialization.Serdes;
      -        import org.apache.kafka.common.utils.Bytes;
      -        import org.apache.kafka.streams.KafkaStreams;
      -        import org.apache.kafka.streams.StreamsBuilder;
      -        import org.apache.kafka.streams.StreamsConfig;
      -        import org.apache.kafka.streams.Topology;
      -        import org.apache.kafka.streams.kstream.KStream;
      -        import org.apache.kafka.streams.kstream.Materialized;
      -        import org.apache.kafka.streams.kstream.Produced;
      -        import org.apache.kafka.streams.state.KeyValueStore;
      +import org.apache.kafka.common.serialization.Serdes;
      +import org.apache.kafka.common.utils.Bytes;
      +import org.apache.kafka.streams.KafkaStreams;
      +import org.apache.kafka.streams.StreamsBuilder;
      +import org.apache.kafka.streams.StreamsConfig;
      +import org.apache.kafka.streams.Topology;
      +import org.apache.kafka.streams.kstream.KStream;
      +import org.apache.kafka.streams.kstream.Materialized;
      +import org.apache.kafka.streams.kstream.Produced;
      +import org.apache.kafka.streams.state.KeyValueStore;
       
      -        import java.util.Arrays;
      -        import java.util.Locale;
      -        import java.util.Properties;
      -        import java.util.concurrent.CountDownLatch;
      +import java.util.Arrays;
      +import java.util.Locale;
      +import java.util.Properties;
      +import java.util.concurrent.CountDownLatch;
       
      -        public class WordCount {
      +public class WordCount {
       
      -            public static void main(String[] args) throws Exception {
      -                Properties props = new Properties();
      -                props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-wordcount");
      -                props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
      -                props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      -                props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      +    public static void main(String[] args) throws Exception {
      +        Properties props = new Properties();
      +        props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-wordcount");
      +        props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
      +        props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
      +        props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
       
      -                final StreamsBuilder builder = new StreamsBuilder();
      +        final StreamsBuilder builder = new StreamsBuilder();
       
      -                KStream<String, String> source = builder.stream("streams-plaintext-input");
      -                source.flatMapValues(value -> Arrays.asList(value.toLowerCase(Locale.getDefault()).split("\\W+")))
      -                      .groupBy((key, value) -> value)
      -                      .count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("counts-store"))
      -                      .toStream()
      -                      .to("streams-wordcount-output", Produced.with(Serdes.String(), Serdes.Long()));
      +        KStream<String, String> source = builder.stream("streams-plaintext-input");
      +        source.flatMapValues(value -> Arrays.asList(value.toLowerCase(Locale.getDefault()).split("\\W+")))
      +              .groupBy((key, value) -> value)
      +              .count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("counts-store"))
      +              .toStream()
      +              .to("streams-wordcount-output", Produced.with(Serdes.String(), Serdes.Long()));
       
      -                final Topology topology = builder.build();
      -                final KafkaStreams streams = new KafkaStreams(topology, props);
      -                final CountDownLatch latch = new CountDownLatch(1);
      +        final Topology topology = builder.build();
      +        final KafkaStreams streams = new KafkaStreams(topology, props);
      +        final CountDownLatch latch = new CountDownLatch(1);
       
      -                // ... same as Pipe.java above
      -            }
      -        }
      + // ... same as Pipe.java above + } +}

      In-memory @@ -268,19 +266,18 @@

    5. Use TimestampedWindowStore when you need to store windowedKey-(value/timestamp) pairs.
    6. -
      // Creating an in-memory key-value store:
      -// here, we create a `KeyValueStore<String, Long>` named "inmemory-counts".
      -import org.apache.kafka.streams.state.StoreBuilder;
      -import org.apache.kafka.streams.state.Stores;
      +                            
      // Creating an in-memory key-value store:
      +// here, we create a `KeyValueStore<String, Long>` named "inmemory-counts".
      +import org.apache.kafka.streams.state.StoreBuilder;
      +import org.apache.kafka.streams.state.Stores;
       
      -// Using a `KeyValueStoreBuilder` to build a `KeyValueStore`.
      -StoreBuilder<KeyValueStore<String, Long>> countStoreSupplier =
      -  Stores.keyValueStoreBuilder(
      -    Stores.inMemoryKeyValueStore("inmemory-counts"),
      -    Serdes.String(),
      -    Serdes.Long());
      -KeyValueStore<String, Long> countStore = countStoreSupplier.build();
      -
      +// Using a `KeyValueStoreBuilder` to build a `KeyValueStore`. +StoreBuilder<KeyValueStore<String, Long>> countStoreSupplier = + Stores.keyValueStoreBuilder( + Stores.inMemoryKeyValueStore("inmemory-counts"), + Serdes.String(), + Serdes.Long()); +KeyValueStore<String, Long> countStore = countStoreSupplier.build();