deephaven · jcferretti · Nov 19, 2021 · Nov 19, 2021 · Nov 19, 2021 · niloc132
diff --git a/Integrations/docker-compose.yml b/Integrations/docker-compose.yml
@@ -9,13 +9,4 @@ services:
       service: redpanda
     expose:
       - 29092
-
-  registry:
-    image: apicurio/apicurio-registry-mem:1.2.2.Final
-    expose:
-      - 8080
-    environment:
-      QUARKUS_PROFILE: prod
-      KAFKA_BOOTSTRAP_SERVERS: redpanda:9092
-      APPLICATION_ID: registry_id
-      APPLICATION_SERVER: localhost:9000
+      - 8081
diff --git a/Integrations/python/test/testConsumeKafka.py b/Integrations/python/test/testConsumeKafka.py
@@ -114,7 +114,7 @@ def testAvro(self):
         Check an Avro Kafka subscription creates the right table.
         """
 
-        avro_as_json = \
+        schema = \
         """
         { "type" : "record",
           "namespace" : "io.deephaven.examples",
@@ -128,16 +128,23 @@ def testAvro(self):
         }
         """
 
-        sys_str = "curl -X POST -H 'Content-type: application/json; artifactType=AVRO' " + \
-          "-H 'X-Registry-ArtifactId: share_price_record' -H 'X-Registry-Version: 1' " + \
-          "--data-binary '%s' http://registry:8080/api/artifacts" % (avro_as_json)
+        schema_str = '{ "schema" : "%s" }' % \
+            schema.replace('\n', ' ').replace('"', '\\"')
+
+        sys_str = \
+        """
+        curl -X POST \
+            -H 'Content-type: application/vnd.schemaregistry.v1+json; artifactType=AVRO' \
+            --data-binary '%s' \
+            http://redpanda:8081/subjects/share_price_record/versions
+        """ % schema_str
 
         r = os.system(sys_str)
         self.assertEquals(0, r)
 
         t = ck.consumeToTable(
             { 'bootstrap.servers' : 'redpanda:29092',
-              'schema.registry.url' : 'http://registry:8080/api/ccompat' },
+              'schema.registry.url' : 'http://redpanda:8081' },
             'share_price',
             key = ck.IGNORE,
             value = ck.avro('share_price_record', schema_version='1'),

diff --git a/Integrations/python/test/testProduceKafka.py b/Integrations/python/test/testProduceKafka.py
@@ -93,7 +93,7 @@ def testJsonAllArguments(self):
         cleanup()
 
     def testAvro(self):
-        avro_as_json = \
+        schema = \
         """
         { "type" : "record",
           "namespace" : "io.deephaven.examples",
@@ -113,9 +113,16 @@ def testAvro(self):
         }
         """
 
-        sys_str = "curl -X POST -H 'Content-type: application/json; artifactType=AVRO' " + \
-          "-H 'X-Registry-ArtifactId: share_price_timestamped_record' -H 'X-Registry-Version: 1' " + \
-          "--data-binary '%s' http://registry:8080/api/artifacts" % (avro_as_json)
+        schema_str = '{ "schema" : "%s" }' % \
+            schema.replace('\n', ' ').replace('"', '\\"')
+
+        sys_str = \
+        """
+        curl -X POST \
+            -H 'Content-type: application/vnd.schemaregistry.v1+json; artifactType=AVRO' \
+            --data-binary '%s' \
+            http://redpanda:8081/subjects/share_price_timestamped_record/versions
+        """ % schema_str
 
         r = os.system(sys_str)
         self.assertEquals(0, r)
@@ -124,7 +131,7 @@ def testAvro(self):
         cleanup = pk.produceFromTable(
             t,
             { 'bootstrap.servers' : 'redpanda:29092',
-              'schema.registry.url' : 'http://registry:8080/api/ccompat' },
+              'schema.registry.url' : 'http://redpanda:8081' },
             'share_price_timestamped',
             key = pk.IGNORE,
             value = pk.avro(

diff --git a/redpanda-standalone/docker-compose.yml b/redpanda-standalone/docker-compose.yml
@@ -4,18 +4,20 @@ services:
     command:
     - redpanda
     - start
-    - --smp
-    - '1'
-    - --reserve-memory
-    - 0M
+    - --smp 1
+    - --reserve-memory 0M
     - --overprovisioned
-    - --node-id
-    - '0'
+    - --node-id 0
+    - --check=false
     - --kafka-addr
     - PLAINTEXT://0.0.0.0:29092,OUTSIDE://0.0.0.0:9092
     - --advertise-kafka-addr
     - PLAINTEXT://redpanda:29092,OUTSIDE://localhost:9092
-    image: docker.vectorized.io/vectorized/redpanda:v21.8.2
+    - --pandaproxy-addr 0.0.0.0:8082
+    - --advertise-pandaproxy-addr redpanda:8082
+    image: docker.vectorized.io/vectorized/redpanda:v21.9.5
     ports:
+    - 8081:8081
+    - 8082:8082
     - 9092:9092
     - 29092:29092
diff --git a/redpanda/examples/avro/README.md b/redpanda/examples/avro/README.md
@@ -0,0 +1 @@
+# Avro schema for testing integrations with schema registry + Kafka
diff --git a/redpanda/examples/avro/share_price.json b/redpanda/examples/avro/share_price.json
@@ -0,0 +1,10 @@
+{ "type" : "record",
+  "namespace" : "io.deephaven.examples",
+  "name" : "share_price",
+  "fields" : [
+      { "name" : "Symbol", "type" : "string" },
+      { "name" : "Side",   "type" : "string" },
+      { "name" : "Qty",    "type" : "int"    },
+      { "name" : "Price",  "type" : "double" }
+  ]
+}
diff --git a/redpanda/examples/post-share-price-schema.sh b/redpanda/examples/post-share-price-schema.sh
@@ -0,0 +1,13 @@
+#
+# Simple sh command line to load an example avro schema into our test compose including redpanda and apicurio.
+#
+
+# redpanda schema service does not support newlines, and needs the schema embedded in a json object as a
+# value for a "schema" key, which means the keys and values in the schema itself need to have their
+# quotes escaped... what a royal pain.
+PAYLOAD=$(echo -n '{ "schema" : "'; cat avro/share_price.json | sed 's/"/\\"/g' | tr '\n' ' '; echo -n '" }')
+
+curl -X POST \
+     -H "Content-type: application/vnd.schemaregistry.v1+json; artifactType=AVRO" \
+     --data-binary "$PAYLOAD" \
+     http://localhost:8081/subjects/share_price_record/versions
diff --git a/redpanda/examples/python/kafka-produce-avro.py b/redpanda/examples/python/kafka-produce-avro.py
@@ -0,0 +1,117 @@
+#
+# Test driver to produce kafka messages using an avro schema.
+#
+# To run this script, you need confluent-kafka libraries installed.
+# To create a dedicated venv for it, you can do:
+#
+# $ mkdir confluent-kafka; cd confluent-kafka
+# $ python3 -m venv confluent-kafka
+# $ cd confluent-kafka
+# $ source bin/activate
+# $ pip3 install confluent-kafka
+#
+# Note: On a Mac you may need to install the librdkafka package.
+# You can use "brew install librdkafka" if the pip3 command fails
+# with an error like "librdkafka/rdkafka.h' file not found"
+# as found at confluentinc/confluent-kafka-python#166.
+#
+# Examples of use for DH testing together with web UI.
+#
+# == Common to all:
+#
+#  * Start the redpanda compose: (cd redpanda && docker-compose up --build)
+#  * From web UI do:
+#
+#    > from deephaven import ConsumeKafka as ck
+#
+# == Example (1)
+#
+# Load a schema into schema registry for share_price_record.
+# From the command line in the host (not on a docker image), run:
+#
+#    $ sh ../post-share-price-schema.sh
+#
+# The last command above should have loaded the avro schema in the file avro/share_price.json
+# to the apicurio registry. You can check it was loaded visiting on the host the URL:
+#   http://localhost:8081/ui/artifacts
+# That page should now list 'share_price_record' as an available schema.
+#
+# From the web IDE, run:
+#
+#    > t = ck.consumeToTable({'bootstrap.servers' : 'redpanda:29092', 'schema.registry.url' : 'http://redpanda:8081'}, 'share_price', value=ck.avro('share_price_record'), table_type='append')
+#
+# The last command above should create a table with columns: [ KafkaPartition, KafkaOffset, KafkaTimestamp, Symbol, Price ]
+# Run this script on the host (not on a docker image) to generate one row:
+#
+#    $ python3 ./kafka-produce-avro.py share_price 0 ../avro/share_price.json str:Symbol=MSFT str:Side=BUY double:Price=274.82 int:Qty=200
+#
+# You should see a new row show up in the web IDE with data matching the data sent above.
+#
+from confluent_kafka import avro
+from confluent_kafka.avro import AvroProducer
+
+import sys
+
+value_arg_form = "python_type:field=value"
+
+if len(sys.argv) < 5:
+    print("Usage: " + sys.argv[0] + " topic-name partition avro-schema-file-path " +
+          value_arg_form + " [ " + value_arg_form + " ...]", file=sys.stderr)
+    sys.exit(1)
+
+topic_name = sys.argv[1]
+partition = int(sys.argv[2])
+
+with open(sys.argv[3], 'r') as file:
+    value_schema_str = file.read()
+
+value_schema = avro.loads(value_schema_str)
+
+def delivery_report(err, msg):
+    """ Called once for each message produced to indicate delivery result.
+        Triggered by poll() or flush(). """
+    if err is not None:
+        print('Message delivery failed: {}'.format(err))
+    else:
+        print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))
+
+
+avroProducer = AvroProducer({
+    'bootstrap.servers': 'localhost:9092',
+    'on_delivery': delivery_report,
+    'schema.registry.url': 'http://localhost:8081'
+}, default_value_schema=value_schema)
+
+def wrong_form(value_arg):
+    print(sys.argv[0] + ": Error, argument " + value_arg +
+          " is not of the form " + value_arg_form + ".", file=sys.stderr)
+    sys.exit(1)
+
+value = {}
+for value_arg in sys.argv[4:]:
+    s = value_arg.split(':', 1)
+    if len(s) != 2:
+        wrong_form(value_arg)
+    ptype = s[0]
+    field_eq_value = s[1]
+    s = field_eq_value.split('=', 1)
+    if len(s) != 2:
+        wrong_form(value_arg)
+    # Strictly speaking we are calling for a python type here (eg, "str", "int", "float", "bool").
+    # We allow other type names for ease of use for us, people accostumed to Java.
+    if (ptype == "str" or ptype == "string"):
+        value[s[0]] = s[1]
+    elif (ptype == "bool" or ptype == "boolean"):
+        value[s[0]] = (s[1] == "true" or s[1] == "True")
+    else:
+        if ptype == "double":
+            ptype = "float"
+        elif ptype == "long" or ptype == "short":
+            ptype = "int"
+        # Do a python cast of the string value to the right type via exec
+        cast = ptype + "('" + s[1] + "')"
+        exec("v=" + cast)
+        value[s[0]] = v
+
+avroProducer.produce(topic=topic_name, partition=partition, key=None, value=value)
+avroProducer.flush()