Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update redpanda version and enable embedded schema registry. #1593

Merged
merged 2 commits into from
Nov 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions Integrations/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,4 @@ services:
service: redpanda
expose:
- 29092

registry:
image: apicurio/apicurio-registry-mem:1.2.2.Final
expose:
- 8080
environment:
QUARKUS_PROFILE: prod
KAFKA_BOOTSTRAP_SERVERS: redpanda:9092
APPLICATION_ID: registry_id
APPLICATION_SERVER: localhost:9000
- 8081
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

personal opinion, stop using docker-compose for the test if we can, the plugin is not great, and i think we'll be happier to just do plain docker commands.

this isnt a requirement for merge or even a follow up, just a preference

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hear you and makes sense but that is a non-trivial amount of work, so not doing it on this pass.

17 changes: 12 additions & 5 deletions Integrations/python/test/testConsumeKafka.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def testAvro(self):
Check an Avro Kafka subscription creates the right table.
"""

avro_as_json = \
schema = \
"""
{ "type" : "record",
"namespace" : "io.deephaven.examples",
Expand All @@ -128,16 +128,23 @@ def testAvro(self):
}
"""

sys_str = "curl -X POST -H 'Content-type: application/json; artifactType=AVRO' " + \
"-H 'X-Registry-ArtifactId: share_price_record' -H 'X-Registry-Version: 1' " + \
"--data-binary '%s' http://registry:8080/api/artifacts" % (avro_as_json)
schema_str = '{ "schema" : "%s" }' % \
schema.replace('\n', ' ').replace('"', '\\"')

sys_str = \
"""
curl -X POST \
-H 'Content-type: application/vnd.schemaregistry.v1+json; artifactType=AVRO' \
--data-binary '%s' \
http://redpanda:8081/subjects/share_price_record/versions
""" % schema_str

r = os.system(sys_str)
self.assertEquals(0, r)

t = ck.consumeToTable(
{ 'bootstrap.servers' : 'redpanda:29092',
'schema.registry.url' : 'http://registry:8080/api/ccompat' },
'schema.registry.url' : 'http://redpanda:8081' },
'share_price',
key = ck.IGNORE,
value = ck.avro('share_price_record', schema_version='1'),
Expand Down
17 changes: 12 additions & 5 deletions Integrations/python/test/testProduceKafka.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def testJsonAllArguments(self):
cleanup()

def testAvro(self):
avro_as_json = \
schema = \
"""
{ "type" : "record",
"namespace" : "io.deephaven.examples",
Expand All @@ -113,9 +113,16 @@ def testAvro(self):
}
"""

sys_str = "curl -X POST -H 'Content-type: application/json; artifactType=AVRO' " + \
"-H 'X-Registry-ArtifactId: share_price_timestamped_record' -H 'X-Registry-Version: 1' " + \
"--data-binary '%s' http://registry:8080/api/artifacts" % (avro_as_json)
schema_str = '{ "schema" : "%s" }' % \
schema.replace('\n', ' ').replace('"', '\\"')

sys_str = \
"""
curl -X POST \
-H 'Content-type: application/vnd.schemaregistry.v1+json; artifactType=AVRO' \
--data-binary '%s' \
http://redpanda:8081/subjects/share_price_timestamped_record/versions
""" % schema_str

r = os.system(sys_str)
self.assertEquals(0, r)
Expand All @@ -124,7 +131,7 @@ def testAvro(self):
cleanup = pk.produceFromTable(
t,
{ 'bootstrap.servers' : 'redpanda:29092',
'schema.registry.url' : 'http://registry:8080/api/ccompat' },
'schema.registry.url' : 'http://redpanda:8081' },
'share_price_timestamped',
key = pk.IGNORE,
value = pk.avro(
Expand Down
16 changes: 9 additions & 7 deletions redpanda-standalone/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,20 @@ services:
command:
- redpanda
- start
- --smp
- '1'
- --reserve-memory
- 0M
- --smp 1
- --reserve-memory 0M
- --overprovisioned
- --node-id
- '0'
- --node-id 0
- --check=false
- --kafka-addr
- PLAINTEXT://0.0.0.0:29092,OUTSIDE://0.0.0.0:9092
- --advertise-kafka-addr
- PLAINTEXT://redpanda:29092,OUTSIDE://localhost:9092
image: docker.vectorized.io/vectorized/redpanda:v21.8.2
- --pandaproxy-addr 0.0.0.0:8082
- --advertise-pandaproxy-addr redpanda:8082
image: docker.vectorized.io/vectorized/redpanda:v21.9.5
ports:
- 8081:8081
- 8082:8082
- 9092:9092
- 29092:29092
1 change: 1 addition & 0 deletions redpanda/examples/avro/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Avro schema for testing integrations with schema registry + Kafka
10 changes: 10 additions & 0 deletions redpanda/examples/avro/share_price.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{ "type" : "record",
"namespace" : "io.deephaven.examples",
"name" : "share_price",
"fields" : [
{ "name" : "Symbol", "type" : "string" },
{ "name" : "Side", "type" : "string" },
{ "name" : "Qty", "type" : "int" },
{ "name" : "Price", "type" : "double" }
]
}
13 changes: 13 additions & 0 deletions redpanda/examples/post-share-price-schema.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#
# Simple sh command line to load an example avro schema into our test compose including redpanda and apicurio.
#

# redpanda schema service does not support newlines, and needs the schema embedded in a json object as a
# value for a "schema" key, which means the keys and values in the schema itself need to have their
# quotes escaped... what a royal pain.
PAYLOAD=$(echo -n '{ "schema" : "'; cat avro/share_price.json | sed 's/"/\\"/g' | tr '\n' ' '; echo -n '" }')

curl -X POST \
-H "Content-type: application/vnd.schemaregistry.v1+json; artifactType=AVRO" \
--data-binary "$PAYLOAD" \
http://localhost:8081/subjects/share_price_record/versions
117 changes: 117 additions & 0 deletions redpanda/examples/python/kafka-produce-avro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#
# Test driver to produce kafka messages using an avro schema.
#
# To run this script, you need confluent-kafka libraries installed.
# To create a dedicated venv for it, you can do:
#
# $ mkdir confluent-kafka; cd confluent-kafka
# $ python3 -m venv confluent-kafka
# $ cd confluent-kafka
# $ source bin/activate
# $ pip3 install confluent-kafka
#
# Note: On a Mac you may need to install the librdkafka package.
# You can use "brew install librdkafka" if the pip3 command fails
# with an error like "librdkafka/rdkafka.h' file not found"
# as found at confluentinc/confluent-kafka-python#166.
#
# Examples of use for DH testing together with web UI.
#
# == Common to all:
#
# * Start the redpanda compose: (cd redpanda && docker-compose up --build)
# * From web UI do:
#
# > from deephaven import ConsumeKafka as ck
#
# == Example (1)
#
# Load a schema into schema registry for share_price_record.
# From the command line in the host (not on a docker image), run:
#
# $ sh ../post-share-price-schema.sh
#
# The last command above should have loaded the avro schema in the file avro/share_price.json
# to the apicurio registry. You can check it was loaded visiting on the host the URL:
# http://localhost:8081/ui/artifacts
# That page should now list 'share_price_record' as an available schema.
#
# From the web IDE, run:
#
# > t = ck.consumeToTable({'bootstrap.servers' : 'redpanda:29092', 'schema.registry.url' : 'http://redpanda:8081'}, 'share_price', value=ck.avro('share_price_record'), table_type='append')
#
# The last command above should create a table with columns: [ KafkaPartition, KafkaOffset, KafkaTimestamp, Symbol, Price ]
# Run this script on the host (not on a docker image) to generate one row:
#
# $ python3 ./kafka-produce-avro.py share_price 0 ../avro/share_price.json str:Symbol=MSFT str:Side=BUY double:Price=274.82 int:Qty=200
#
# You should see a new row show up in the web IDE with data matching the data sent above.
#
from confluent_kafka import avro
from confluent_kafka.avro import AvroProducer

import sys

value_arg_form = "python_type:field=value"

if len(sys.argv) < 5:
print("Usage: " + sys.argv[0] + " topic-name partition avro-schema-file-path " +
value_arg_form + " [ " + value_arg_form + " ...]", file=sys.stderr)
sys.exit(1)

topic_name = sys.argv[1]
partition = int(sys.argv[2])

with open(sys.argv[3], 'r') as file:
value_schema_str = file.read()

value_schema = avro.loads(value_schema_str)

def delivery_report(err, msg):
""" Called once for each message produced to indicate delivery result.
Triggered by poll() or flush(). """
if err is not None:
print('Message delivery failed: {}'.format(err))
else:
print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))


avroProducer = AvroProducer({
'bootstrap.servers': 'localhost:9092',
'on_delivery': delivery_report,
'schema.registry.url': 'http://localhost:8081'
}, default_value_schema=value_schema)

def wrong_form(value_arg):
print(sys.argv[0] + ": Error, argument " + value_arg +
" is not of the form " + value_arg_form + ".", file=sys.stderr)
sys.exit(1)

value = {}
for value_arg in sys.argv[4:]:
s = value_arg.split(':', 1)
if len(s) != 2:
wrong_form(value_arg)
ptype = s[0]
field_eq_value = s[1]
s = field_eq_value.split('=', 1)
if len(s) != 2:
wrong_form(value_arg)
# Strictly speaking we are calling for a python type here (eg, "str", "int", "float", "bool").
# We allow other type names for ease of use for us, people accostumed to Java.
if (ptype == "str" or ptype == "string"):
value[s[0]] = s[1]
elif (ptype == "bool" or ptype == "boolean"):
value[s[0]] = (s[1] == "true" or s[1] == "True")
else:
if ptype == "double":
ptype = "float"
elif ptype == "long" or ptype == "short":
ptype = "int"
# Do a python cast of the string value to the right type via exec
cast = ptype + "('" + s[1] + "')"
exec("v=" + cast)
value[s[0]] = v

avroProducer.produce(topic=topic_name, partition=partition, key=None, value=value)
avroProducer.flush()