Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create C library wrapper over existing Java library using GraalVM #196

Open
wants to merge 2 commits into
base: native-schema-registry-release
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions native-schema-registry/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Native Schema Registry

This module provides a native shared library (.so, .dll) version of the AWS Glue Schema Registry SerDes.
It uses GraalVM to generate the shared library.

## Build

Requires GraalVM (21.0+) with native-image support.

The C data types module needs to be built before building the Java module.

```asm
cd c && cmake -S. -Bbuild
cd build && cmake --build . --target native_schema_registry_c_data_types
cd ../../ && mvn package -P native-image
```
57 changes: 57 additions & 0 deletions native-schema-registry/c/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
---
Language: Cpp
# BasedOnStyle: Mozilla
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Right
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
BinPackArguments: false
BinPackParameters: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakStringLiterals: true
ColumnLimit: 120
ContinuationIndentWidth: 4
DerivePointerAlignment: false
IncludeBlocks: Preserve
IndentCaseLabels: true
IndentPPDirectives: AfterHash
IndentWidth: 4
IndentWrappedFunctionNames: true
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 100000
PointerAlignment: Right
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 4
UseTab: Never
...
12 changes: 12 additions & 0 deletions native-schema-registry/c/.clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
Checks: 'clang-diagnostic-*,clang-analyzer-*,readability-*,modernize-*,bugprone-*,misc-*,google-runtime-int,llvm-header-guard,fuchsia-restrict-system-includes,-clang-analyzer-valist.Uninitialized,-clang-analyzer-security.insecureAPI.rand,-clang-analyzer-alpha.*,-readability-magic-numbers,-readability-non-const-parameter,-readability-avoid-const-params-in-decls,-readability-else-after-return,-readability-isolate-declaration,-readability-uppercase-literal-suffix,-bugprone-sizeof-expression'
WarningsAsErrors: '*'
HeaderFilterRegex: '(./c/src/.*\.c$)|(./c/include/.*\.h$)'
FormatStyle: 'file'
CheckOptions:
- key: readability-braces-around-statements.ShortStatementLines
value: '1'
- key: google-runtime-int.TypeSuffix
value: '_t'
- key: fuchsia-restrict-system-includes.Includes
value: '*,-stdint.h,-stdbool.h,-assert.h'
36 changes: 36 additions & 0 deletions native-schema-registry/c/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
cmake_minimum_required(VERSION 3.19)
IF (APPLE)
#Defaults to Apple Compile. Explicitly install and set to gcc.
set(CMAKE_C_COMPILER gcc)
ENDIF()
project(native_schema_registry_c C)
set(CMAKE_C_STANDARD 99)

##Global variables
#Path to GraalVM generated shared library
set(LIB_NATIVE_SCHEMA_REGISTRY_PATH "${PROJECT_SOURCE_DIR}/../target")
set(LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME_PREFIX libnativeschemaregistry)
##OS Specific variables
IF (WIN32)
set(LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME ${LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME_PREFIX}.dll)
set(LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_OBJ_NAME ${LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME_PREFIX}.lib)
ELSEIF(APPLE)
set(LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME ${LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME_PREFIX}.dylib)
ELSE()
set(LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME ${LIB_NATIVE_SCHEMA_REGISTRY_LIBRARY_NAME_PREFIX}.so)
ENDIF()

set(DATA_TYPES_MODULE_NAME native_schema_registry_c_data_types)
set(SERDE_MODULE_NAME native_schema_registry_c)
set(AWS_COMMON_MEMALLOC aws_common_memalloc)
set(NATIVE_SCHEMA_REGISTRY_MODULE_NAME libnativeschemaregistry)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)

include_directories("include")
include(cmake/FetchAwsCommon.cmake)
include_directories(${LIB_NATIVE_SCHEMA_REGISTRY_PATH})

add_subdirectory("src")
include (CTest)
enable_testing()
add_subdirectory("test")
blacktooth marked this conversation as resolved.
Show resolved Hide resolved
46 changes: 46 additions & 0 deletions native-schema-registry/c/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Native Schema Registry in C

This module provides a C language based API for the schema registry serializer / de-serializers.

## Build
We use CMake to build the targets in this module.

### Compile
```asm
#Run in c directory

cmake -S. -Bbuild
cd build
cmake --build .

#### Clean
cmake --build . --target clean
```
### Testing
```asm
ctest .
#Re-run failed tests with verbose output
ctest --rerun-failed --output-on-failure
```

### Code Analysis
Code is statically analyzed using clang-tidy.

### Coverage
Code coverage checks using gcov and lcov and fail if the coverage is below threshold.

#### Installation
You might have to install these modules using your OS package manager.

### Sanitizers
We use address,leak sanitizers to detect memory leaks and any potential issues during build. As of now, they only work on Linux.

### Platform Support

TBD

## License

**Project License** [Apache License Version 2.0](https://github.com/awslabs/aws-glue-schema-registry/blob/master/LICENSE.txt)

N.B.: Although this repository is released under the Apache-2.0 license, its build dependencies include the third party Swig project. The Swig project's licensing includes the GPL-3.0 license.
12 changes: 12 additions & 0 deletions native-schema-registry/c/cmake/FetchAwsCommon.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
include(FetchContent)

set(AWS_C_COMMON aws-c-common)

FetchContent_Declare(
AWS_C_COMMON
GIT_REPOSITORY https://github.com/awslabs/aws-c-common.git
GIT_TAG v0.7.4
GIT_SHALLOW 1
)

FetchContent_MakeAvailable(AWS_C_COMMON)
24 changes: 24 additions & 0 deletions native-schema-registry/c/format-check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env bash

if [[ -z $CLANG_FORMAT ]] ; then
CLANG_FORMAT=clang-format
fi

if NOT type $CLANG_FORMAT 2> /dev/null ; then
echo "No appropriate clang-format found."
exit 1
fi

FAIL=0
SOURCE_FILES=`find src include -type f \( -name '*.h' -o -name '*.c' \)`
for i in $SOURCE_FILES
do
$CLANG_FORMAT -output-replacements-xml $i | grep -c "<replacement " > /dev/null
if [ $? -ne 1 ]
then
echo "$i failed clang-format check."
FAIL=1
fi
done

exit $FAIL
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#ifndef GLUE_SCHEMA_REGISTRY_DESERIALIZER_H
#define GLUE_SCHEMA_REGISTRY_DESERIALIZER_H

#include "glue_schema_registry_error.h"
#include "glue_schema_registry_schema.h"
#include "mutable_byte_array.h"
#include "read_only_byte_array.h"
#include <stdbool.h>

typedef struct glue_schema_registry_deserializer {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not blocking anything here, because it's 100% your call, but the typedef struct idiom goes off the rails when used too frequently. I tend to just use 'struct type' declarations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will check how it plays with Swig and take a call.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It plays well with Swig as Swig generates the class structures in target languages from typefdef structs, https://www.swig.org/Doc4.0/SWIGDocumentation.html#SWIG_nn32
Leaving it as is.

/* This is used for storing the instance context. Currently, being used for
* managing GraalVM instance. */
void *instance_context;
} glue_schema_registry_deserializer;

glue_schema_registry_deserializer *new_glue_schema_registry_deserializer(glue_schema_registry_error **p_err);

void delete_glue_schema_registry_deserializer(glue_schema_registry_deserializer *deserializer);

mutable_byte_array *glue_schema_registry_deserializer_decode(
const glue_schema_registry_deserializer *deserializer,
const read_only_byte_array *array,
glue_schema_registry_error **p_err);

glue_schema_registry_schema *glue_schema_registry_deserializer_decode_schema(
const glue_schema_registry_deserializer *deserializer,
const read_only_byte_array *array,
glue_schema_registry_error **p_err);

bool glue_schema_registry_deserializer_can_decode(
const glue_schema_registry_deserializer *deserializer,
const read_only_byte_array *array,
glue_schema_registry_error **p_err);

#endif /* GLUE_SCHEMA_REGISTRY_DESERIALIZER_H */
56 changes: 56 additions & 0 deletions native-schema-registry/c/include/glue_schema_registry_error.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#ifndef GLUE_SCHEMA_REGISTRY_ERROR_H
#define GLUE_SCHEMA_REGISTRY_ERROR_H

#include <stdio.h>

/* Error codes are arbitrarily listed from 5000. No specific reason. */
enum aws_gsr_error_code {
AWS_GSR_ERR_CODE_INVALID_STATE = 5000,
AWS_GSR_ERR_CODE_NULL_PARAMETERS = 5001,
AWS_GSR_ERR_CODE_GRAALVM_INIT_EXCEPTION = 5002,
AWS_GSR_ERR_CODE_GRAALVM_TEARDOWN_EXCEPTION = 5003,
AWS_GSR_ERR_CODE_INVALID_PARAMETERS = 5004,
AWS_GSR_ERR_CODE_RUNTIME_ERROR = 5005
};

/* TODO: Improve error reporting to respect logging levels. */
#define log_warn(msg, code) fprintf(stderr, "WARN: %s, Code: %d\n", msg, code)
blacktooth marked this conversation as resolved.
Show resolved Hide resolved

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's a whole logger in aws-c-common

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing it out. I will evaluate and take the logging part separately.


#define AWS_GSR_MAX_ERROR_MSG_LEN 10000

/** Defines the glue_schema_registry_error structure for holding error messages
* and codes resulting from function executions.
*/
typedef struct glue_schema_registry_error {
char *msg;
int code;
} glue_schema_registry_error;

glue_schema_registry_error *new_glue_schema_registry_error(const char *err_msg, int err_code);

void delete_glue_schema_registry_error(glue_schema_registry_error *error);

/* Copies the given error's msg into dst array trimming the size as necessary. */
void glue_schema_registry_error_get_msg(const glue_schema_registry_error *error, char *dst, size_t len);

/**
* Creates an instance of glue_schema_registry_error and writes it to the given
* glue_schema_registry_error pointer holder (*p_err). It is expected that
* *p_err is initialized by caller.
* @param p_err Initialized glue_schema_registry_error pointer holder.
* @param msg Error message to write.
* @param code Non-zero error code.
*/
void throw_error(glue_schema_registry_error **p_err, const char *msg, int code);

/**
* Creates a pointer to hold an instance of glue_schema_registry_error
*/
glue_schema_registry_error **new_glue_schema_registry_error_holder(void);

/**
* Deletes the pointer holder of glue_schema_registry_error and it's content.
*/
void delete_glue_schema_registry_error_holder(glue_schema_registry_error **p_err);

#endif /* GLUE_SCHEMA_REGISTRY_ERROR_H */
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#ifndef GLUE_SCHEMA_REGISTRY_MEMORY_ALLOCATOR_H
#define GLUE_SCHEMA_REGISTRY_MEMORY_ALLOCATOR_H

#include <stdlib.h>

/*
* Wrapper over AWS SDK Common memory allocator.
*/

void *aws_common_malloc(size_t size);

void *aws_common_calloc(size_t count, size_t size);

void aws_common_free(void *ptr);

#endif /* GLUE_SCHEMA_REGISTRY_MEMORY_ALLOCATOR_H */
47 changes: 47 additions & 0 deletions native-schema-registry/c/include/glue_schema_registry_schema.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#ifndef GLUE_SCHEMA_REGISTRY_SCHEMA_H
#define GLUE_SCHEMA_REGISTRY_SCHEMA_H

#include "glue_schema_registry_error.h"

/*
* Glue Schema Registry Schema structure that represents
* schema object required by Glue Schema Registry Serializers / De-serializers.
*/
typedef struct glue_schema_registry_schema {
/* String name of the schema */
char *schema_name;

/* Complete definition of the schema as String */
char *schema_def;

/* Data format name, JSON, AVRO, PROTOBUF as String */
char *data_format;

} glue_schema_registry_schema;

/*
* Creates a new instance of glue_schema_registry_schema
*/
glue_schema_registry_schema *new_glue_schema_registry_schema(
const char *schema_name,
const char *schema_def,
const char *data_format,
glue_schema_registry_error **p_err);

/*
* Deletes the glue schema registry schema.
*/
void delete_glue_schema_registry_schema(glue_schema_registry_schema *schema);

/*
* Gets different attributes from glue_schema_registry_schema instance.
* These getter methods are translated into "Getter" methods in target
* languages.
*/
const char *glue_schema_registry_schema_get_schema_name(const glue_schema_registry_schema *schema);

const char *glue_schema_registry_schema_get_schema_def(const glue_schema_registry_schema *schema);

const char *glue_schema_registry_schema_get_data_format(const glue_schema_registry_schema *schema);

#endif /* GLUE_SCHEMA_REGISTRY_SCHEMA_H */
27 changes: 27 additions & 0 deletions native-schema-registry/c/include/glue_schema_registry_serializer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#ifndef GLUE_SCHEMA_REGISTRY_SERIALIZER_H
#define GLUE_SCHEMA_REGISTRY_SERIALIZER_H

#include "glue_schema_registry_error.h"
#include "glue_schema_registry_schema.h"
#include "mutable_byte_array.h"
#include "read_only_byte_array.h"

typedef struct glue_schema_registry_serializer {
/* This is used for storing the instance context. Currently being used for
* managing GraalVM instance. */
void *instance_context;
} glue_schema_registry_serializer;

glue_schema_registry_serializer *new_glue_schema_registry_serializer(glue_schema_registry_error **p_err);

void delete_glue_schema_registry_serializer(glue_schema_registry_serializer *serializer);

// Encodes the GSR Schema with a byte array.
mutable_byte_array *glue_schema_registry_serializer_encode(
const glue_schema_registry_serializer *serializer,
const read_only_byte_array *array,
const char *transport_name,
const glue_schema_registry_schema *gsr_schema,
glue_schema_registry_error **p_err);

#endif /* GLUE_SCHEMA_REGISTRY_SERIALIZER_H */
Loading