-
Notifications
You must be signed in to change notification settings - Fork 35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add remaining data types to schema visiting in ffi #187
Changes from 6 commits
1d5a9dd
32428a5
7f59e54
bcc5a4f
9593038
6fd15cf
4baa68a
8747fe3
6c4a173
87ef58c
21a3402
78378b8
0fc9bd7
a3ff414
03c34db
184e2c3
0805f38
76b942b
245da8a
d479631
872b0e7
47abc39
84af4ba
cd86766
93fc17c
711b16b
953c3e8
2498d10
8cdf7f8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
#include <stdint.h> | ||
#include "delta_kernel_ffi.h" | ||
|
||
/** | ||
|
@@ -8,21 +9,21 @@ | |
* Each list is a "SchemaItemList", which tracks its length an an array of "SchemaItem"s. | ||
* | ||
* Each "SchemaItem" has a name and a type, which are just strings. It can also have a list which is | ||
* its children. This is initially always NULL, but when visiting a struct, map, or array, we point | ||
* this at the list specified in the callback, which allows us to traverse the schema when printing | ||
* it. Note that this points to one of the lists in the builder's set of lists and is not a copy. | ||
* its children. This is initially always UINTPTR_MAX, but when visiting a struct, map, or array, we | ||
* point this at the list id specified in the callback, which allows us to traverse the schema when | ||
* printing it. | ||
*/ | ||
|
||
// If you want the visitor to print out what it's being asked to do at each step, uncomment the | ||
// following line | ||
// #define PRINT_VISITS | ||
//#define PRINT_VISITS | ||
|
||
typedef struct SchemaItemList SchemaItemList; | ||
|
||
typedef struct { | ||
char* name; | ||
char* type; | ||
SchemaItemList* children; | ||
uintptr_t children; | ||
} SchemaItem; | ||
|
||
typedef struct SchemaItemList { | ||
|
@@ -36,9 +37,7 @@ typedef struct { | |
} SchemaBuilder; | ||
|
||
char* allocate_name(const KernelStringSlice slice) { | ||
char* buf = malloc(sizeof(char) * (slice.len + 1)); // +1 for null | ||
snprintf(buf, slice.len + 1, "%s", slice.ptr); | ||
return buf; | ||
return strndup(slice.ptr, slice.len); | ||
} | ||
|
||
// lists are preallocated to have exactly enough space, so we just fill in the next open slot and | ||
|
@@ -52,42 +51,40 @@ SchemaItem* add_to_list(SchemaItemList *list, char* name, char* type) { | |
} | ||
|
||
// print out all items in a list, recursing into any children they may have | ||
void print_list(SchemaItemList *list, int indent, bool parent_on_last) { | ||
void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, bool parent_on_last) { | ||
SchemaItemList *list = builder->lists+list_id; | ||
nicklan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for (int i = 0; i < list->len; i++) { | ||
bool is_last = i == list->len - 1; | ||
for (int j = 0; j <= indent; j++) { | ||
if (j == indent) { | ||
if (is_last) { | ||
printf("└"); | ||
} else { | ||
printf("├"); | ||
} | ||
for (int j = 0; j < indent; j++) { | ||
if (parent_on_last && j == indent - 1) { | ||
// don't print a dangling | on my parent's last item | ||
printf(" "); | ||
} else { | ||
if (parent_on_last && j == indent - 1) { | ||
// don't print a dangling | on my parent's last item | ||
printf(" "); | ||
} else { | ||
printf("│ "); | ||
} | ||
printf("│ "); | ||
} | ||
} | ||
printf("─ %s: %s\n", list->list[i].name, list->list[i].type); | ||
if (list->list[i].children) { | ||
print_list(list->list[i].children, indent+1, is_last); | ||
SchemaItem* item = &list->list[i]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably decide whether to use (I personally prefer the former because it's obvious at a glance that it's not normal arithmetic... but I recognize it's also a bit more typing) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, makes sense. I tend to use |
||
char* prefix = is_last? "└" : "├"; | ||
printf("%s─ %s: %s\n", prefix, item->name, item->type); | ||
if (list->list[i].children != UINTPTR_MAX) { | ||
print_list(builder, list->list[i].children, indent+1, is_last); | ||
} | ||
} | ||
} | ||
|
||
// declare all our visitor methods | ||
uintptr_t make_field_list(void *data, uintptr_t reserve) { | ||
SchemaBuilder *builder = (SchemaBuilder*)data; | ||
SchemaBuilder *builder = data; | ||
int id = builder->list_count; | ||
#ifdef PRINT_VISITS | ||
printf("Making a list of lenth %i with id %i\n", reserve, id); | ||
#endif | ||
builder->list_count++; | ||
builder->lists = realloc(builder->lists, sizeof(SchemaItemList) * builder->list_count); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why (if there was some valid previous usage, wouldn't we need to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yikes, this is being used to grow the array... but There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, real bad on the pointers 🤦🏽 . I've updated things to use indexes.
We don't know the total number of lists ahead of time. SchemaBuilder builder = {
.list_count = 0,
.lists = calloc(0, sizeof(SchemaItem*)),
}; as well as each other call to this method.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I was referring to the lists of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One gotcha with using realloc this way -- it has There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep. I considered increasing more each time, but: |
||
SchemaItem* list = calloc(reserve, sizeof(SchemaItem)); | ||
for (int i = 0; i < reserve; i++) { | ||
list[i].children = UINTPTR_MAX; | ||
} | ||
builder->lists[id].len = 0; | ||
builder->lists[id].list = list; | ||
return id; | ||
|
@@ -97,38 +94,38 @@ void visit_struct(void *data, | |
uintptr_t sibling_list_id, | ||
struct KernelStringSlice name, | ||
uintptr_t child_list_id) { | ||
SchemaBuilder *builder = data; | ||
char* name_ptr = allocate_name(name); | ||
#ifdef PRINT_VISITS | ||
printf("Asked to visit a struct, belonging to list %i. Children are in %i\n", sibling_list_id, child_list_id); | ||
printf("Asked to visit a struct, belonging to list %i for %s. Children are in %i\n", sibling_list_id, name_ptr, child_list_id); | ||
#endif | ||
SchemaBuilder *builder = (SchemaBuilder*)data; | ||
char* name_ptr = allocate_name(name); | ||
SchemaItem* struct_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "struct"); | ||
struct_item->children = builder->lists+child_list_id; | ||
struct_item->children = child_list_id; | ||
} | ||
void visit_array(void *data, | ||
uintptr_t sibling_list_id, | ||
struct KernelStringSlice name, | ||
bool contains_null, | ||
uintptr_t child_list_id) { | ||
SchemaBuilder *builder = data; | ||
char* name_ptr = allocate_name(name); | ||
#ifdef PRINT_VISITS | ||
printf("Asked to visit array, belonging to list %i. Types are in %i\n", sibling_list_id, child_list_id); | ||
printf("Asked to visit array, belonging to list %i for %s. Types are in %i\n", sibling_list_id, name_ptr, child_list_id); | ||
#endif | ||
SchemaBuilder *builder = (SchemaBuilder*)data; | ||
char* name_ptr = allocate_name(name); | ||
SchemaItem* array_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "array"); | ||
array_item->children = builder->lists+child_list_id; | ||
array_item->children = child_list_id; | ||
} | ||
void visit_map(void *data, | ||
uintptr_t sibling_list_id, | ||
struct KernelStringSlice name, | ||
uintptr_t child_list_id) { | ||
SchemaBuilder *builder = data; | ||
char* name_ptr = allocate_name(name); | ||
#ifdef PRINT_VISITS | ||
printf("Asked to visit map, belonging to list %i. Types are in %i\n", sibling_list_id, child_list_id); | ||
printf("Asked to visit map, belonging to list %i for %s. Types are in %i\n", sibling_list_id, name_ptr, child_list_id); | ||
#endif | ||
SchemaBuilder *builder = (SchemaBuilder*)data; | ||
char* name_ptr = allocate_name(name); | ||
SchemaItem* map_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "map"); | ||
map_item->children = builder->lists+child_list_id; | ||
map_item->children = child_list_id; | ||
} | ||
|
||
void visit_decimal(void *data, | ||
|
@@ -139,70 +136,43 @@ void visit_decimal(void *data, | |
#ifdef PRINT_VISITS | ||
printf("Asked to visit decimal with precision %i and scale %i, belonging to list %i\n", sibling_list_id); | ||
#endif | ||
SchemaBuilder *builder = (SchemaBuilder*)data; | ||
SchemaBuilder *builder = data; | ||
char* name_ptr = allocate_name(name); | ||
char* type = malloc(16 * sizeof(char)); | ||
sprintf(type, "decimal(%i)(%i)", precision, scale); | ||
snprintf(type, 16, "decimal(%i)(%i)", precision, scale); | ||
add_to_list(builder->lists+sibling_list_id, name_ptr, type); | ||
} | ||
|
||
|
||
|
||
void visit_simple_type(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, char* type) { | ||
SchemaBuilder *builder = data; | ||
char* name_ptr = allocate_name(name); | ||
#ifdef PRINT_VISITS | ||
printf("Asked to visit a(n) %s belonging to list %i\n", type, sibling_list_id); | ||
printf("Asked to visit a(n) %s belonging to list %i for %s\n", type, sibling_list_id, name_ptr); | ||
#endif | ||
SchemaBuilder *builder = (SchemaBuilder*)data; | ||
char* name_ptr = allocate_name(name); | ||
add_to_list(builder->lists+sibling_list_id, name_ptr, type); | ||
} | ||
|
||
void visit_string(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { | ||
visit_simple_type(data, sibling_list_id, name, "string"); | ||
} | ||
|
||
void visit_long(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { | ||
visit_simple_type(data, sibling_list_id, name, "long"); | ||
} | ||
|
||
void visit_integer(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { | ||
visit_simple_type(data, sibling_list_id, name, "integer"); | ||
} | ||
|
||
void visit_short(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { | ||
visit_simple_type(data, sibling_list_id, name, "short"); | ||
} | ||
|
||
void visit_byte(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { | ||
visit_simple_type(data, sibling_list_id, name, "byte"); | ||
} | ||
void visit_float(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { | ||
visit_simple_type(data, sibling_list_id, name, "float"); | ||
} | ||
|
||
void visit_double(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { | ||
visit_simple_type(data, sibling_list_id, name, "double"); | ||
} | ||
|
||
void visit_boolean(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { | ||
visit_simple_type(data, sibling_list_id, name, "boolean"); | ||
} | ||
|
||
void visit_binary(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { | ||
visit_simple_type(data, sibling_list_id, name, "binary"); | ||
} | ||
|
||
void visit_date(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { | ||
visit_simple_type(data, sibling_list_id, name, "date"); | ||
} | ||
|
||
void visit_timestamp(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { | ||
visit_simple_type(data, sibling_list_id, name, "timestamp"); | ||
} | ||
|
||
void visit_timestamp_ntz(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { | ||
visit_simple_type(data, sibling_list_id, name, "timestamp_ntz"); | ||
} | ||
#define DEFINE_VISIT_SIMPLE_TYPE(typename) \ | ||
void visit_##typename(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { \ | ||
visit_simple_type(data, sibling_list_id, name, #typename); \ | ||
} | ||
|
||
// free all the data in the builder (but not the builder itself, since that might be stack allocated) | ||
DEFINE_VISIT_SIMPLE_TYPE(string); | ||
DEFINE_VISIT_SIMPLE_TYPE(integer); | ||
DEFINE_VISIT_SIMPLE_TYPE(short); | ||
DEFINE_VISIT_SIMPLE_TYPE(byte); | ||
DEFINE_VISIT_SIMPLE_TYPE(long); | ||
DEFINE_VISIT_SIMPLE_TYPE(float); | ||
DEFINE_VISIT_SIMPLE_TYPE(double); | ||
DEFINE_VISIT_SIMPLE_TYPE(boolean); | ||
DEFINE_VISIT_SIMPLE_TYPE(binary); | ||
DEFINE_VISIT_SIMPLE_TYPE(date); | ||
DEFINE_VISIT_SIMPLE_TYPE(timestamp); | ||
DEFINE_VISIT_SIMPLE_TYPE(timestamp_ntz); | ||
|
||
// free all the data in the builder (but not the builder itself, it's stack allocated) | ||
void free_builder(SchemaBuilder builder) { | ||
scovich marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for (int i = 0; i < builder.list_count; i++) { | ||
SchemaItemList *list = (builder.lists)+i; | ||
|
@@ -214,8 +184,6 @@ void free_builder(SchemaBuilder builder) { | |
// except decimal types, we malloc'd those :) | ||
free(item->type); | ||
} | ||
// don't free item->children, it's just a list in the builder so will be freed by the outer | ||
// loop. | ||
} | ||
free(list->list); // free all the items in this list (we alloc'd them together) | ||
} | ||
|
@@ -253,7 +221,7 @@ void print_schema(const SnapshotHandle *snapshot) { | |
printf("Schema returned in list %i\n", schema_list_id); | ||
#endif | ||
printf("Schema:\n"); | ||
print_list(builder.lists+schema_list_id, 0, false); | ||
print_list(&builder, schema_list_id, 0, false); | ||
printf("\n"); | ||
free_builder(builder); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Could also potentially leverage variadic macro args for this:
and then just use it:
(a bit more work up front, but less #ifdef magic at use sites)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also more fun :) switched to that