From 1d5a9dda177c4a8c9269280b6cefeebc8770c050 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Wed, 1 May 2024 14:40:17 -0700 Subject: [PATCH 01/25] Add most types other than a few less common primitive ones --- ffi/src/lib.rs | 108 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 22 deletions(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 2956fda1..5d9f7ca7 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -10,7 +10,7 @@ use tracing::debug; use url::Url; use delta_kernel::expressions::{BinaryOperator, Expression, Scalar}; -use delta_kernel::schema::{DataType, PrimitiveType, StructField, StructType}; +use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructType}; use delta_kernel::snapshot::Snapshot; use delta_kernel::{DeltaResult, Engine, Error}; @@ -569,7 +569,7 @@ pub unsafe extern "C" fn version(snapshot: *const SnapshotHandle) -> u64 { } // WARNING: the visitor MUST NOT retain internal references to the string slices passed to visitor methods -// TODO: other types, nullability +// TODO: nullability #[repr(C)] pub struct EngineSchemaVisitor { // opaque state pointer @@ -583,10 +583,59 @@ pub struct EngineSchemaVisitor { name: KernelStringSlice, child_list_id: usize, ), + // Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list with + // the schema type for each item of the array + visit_array: extern "C" fn( + data: *mut c_void, + sibling_list_id: usize, + name: KernelStringSlice, + contains_null: bool, // if this array can contain null values + child_list_id: usize, + ), + // Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list + // where the first element is the schema type the keys of the map, and the second element is the + // schema type of the values of the map + visit_map: extern "C" fn( + data: *mut c_void, + sibling_list_id: usize, + name: KernelStringSlice, + child_list_id: usize, + ), visit_string: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + visit_long: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), visit_integer: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_long: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + visit_short: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + visit_byte: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + visit_float: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + visit_double: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + visit_boolean: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), +} + +macro_rules! gen_visit_match { + ( $match_item: expr, $visitor: ident, $sibling_list_id: ident, $name: ident, $(($prim_type: ident, $visit_fun: ident)), * ) => { + match $match_item { + $( + DataType::Primitive(PrimitiveType::$prim_type) => { + ($visitor.$visit_fun)($visitor.data, $sibling_list_id, $name.into()) + } + )* + DataType::Struct(s) => { + let child_list_id = visit_struct_fields($visitor, s); + ($visitor.visit_struct)($visitor.data, $sibling_list_id, $name.into(), child_list_id); + } + DataType::Array(at) => { + let child_list_id = visit_array_item($visitor, at); + ($visitor.visit_array)($visitor.data, $sibling_list_id, $name.into(), at.contains_null, child_list_id); + } + DataType::Map(mt) => { + let child_list_id = visit_map_types($visitor, mt); + ($visitor.visit_map)($visitor.data, $sibling_list_id, $name.into(), child_list_id); + } + other => println!("Unsupported data type: {}", other), + } + }; } /// # Safety @@ -602,30 +651,45 @@ pub unsafe extern "C" fn visit_schema( fn visit_struct_fields(visitor: &EngineSchemaVisitor, s: &StructType) -> usize { let child_list_id = (visitor.make_field_list)(visitor.data, s.fields.len()); for field in s.fields() { - visit_field(visitor, child_list_id, field); + visit_schema_item(&field.data_type(), field.name(), visitor, child_list_id); } child_list_id } + fn visit_array_item(visitor: &EngineSchemaVisitor, at: &ArrayType) -> usize { + let child_list_id = (visitor.make_field_list)(visitor.data, 1); + visit_schema_item(&at.element_type, "list_data_type", visitor, child_list_id); + child_list_id + } + + fn visit_map_types(visitor: &EngineSchemaVisitor, mt: &MapType) -> usize { + let child_list_id = (visitor.make_field_list)(visitor.data, 2); + visit_schema_item(&mt.key_type, "map_key_type", visitor, child_list_id); + visit_schema_item(&mt.value_type, "map_value_type", visitor, child_list_id); + child_list_id + } + // Visit a struct field (recursively) and add the result to the list of siblings. - fn visit_field(visitor: &EngineSchemaVisitor, sibling_list_id: usize, field: &StructField) { - let name: &str = field.name.as_ref(); - match &field.data_type { - DataType::Primitive(PrimitiveType::Integer) => { - (visitor.visit_integer)(visitor.data, sibling_list_id, name.into()) - } - DataType::Primitive(PrimitiveType::Long) => { - (visitor.visit_long)(visitor.data, sibling_list_id, name.into()) - } - DataType::Primitive(PrimitiveType::String) => { - (visitor.visit_string)(visitor.data, sibling_list_id, name.into()) - } - DataType::Struct(s) => { - let child_list_id = visit_struct_fields(visitor, s); - (visitor.visit_struct)(visitor.data, sibling_list_id, name.into(), child_list_id); - } - other => println!("Unsupported data type: {}", other), - } + fn visit_schema_item( + data_type: &DataType, + name: &str, + visitor: &EngineSchemaVisitor, + sibling_list_id: usize, + ) { + gen_visit_match!( + data_type, + visitor, + sibling_list_id, + name, + (String, visit_string), + (Long, visit_long), + (Integer, visit_integer), + (Short, visit_short), + (Byte, visit_byte), + (Float, visit_float), + (Double, visit_double), + (Boolean, visit_boolean) // See macro def for how Struct/Map/Array are handled + ); } visit_struct_fields(visitor, snapshot.schema()) From 32428a5e6c4745b6e9fde8380b8cb49190c11f0f Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Wed, 1 May 2024 14:53:00 -0700 Subject: [PATCH 02/25] add remaining types --- ffi/src/lib.rs | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 5d9f7ca7..fcb4f30c 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -601,6 +601,14 @@ pub struct EngineSchemaVisitor { name: KernelStringSlice, child_list_id: usize, ), + // visit a decimal type with the specified precision and scale + visit_decimal: extern "C" fn( + data: *mut c_void, + sibling_list_id: usize, + name: KernelStringSlice, + precision: u8, + scale: i8, + ), visit_string: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), visit_long: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), visit_integer: @@ -611,6 +619,13 @@ pub struct EngineSchemaVisitor { visit_double: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), visit_boolean: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + visit_binary: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + visit_date: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + visit_timestamp: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + visit_timestamp_ntz: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), } macro_rules! gen_visit_match { @@ -621,19 +636,21 @@ macro_rules! gen_visit_match { ($visitor.$visit_fun)($visitor.data, $sibling_list_id, $name.into()) } )* - DataType::Struct(s) => { - let child_list_id = visit_struct_fields($visitor, s); - ($visitor.visit_struct)($visitor.data, $sibling_list_id, $name.into(), child_list_id); + DataType::Primitive(PrimitiveType::Decimal(precision, scale)) => { + ($visitor.visit_decimal)($visitor.data, $sibling_list_id, $name.into(), *precision, *scale); } DataType::Array(at) => { let child_list_id = visit_array_item($visitor, at); ($visitor.visit_array)($visitor.data, $sibling_list_id, $name.into(), at.contains_null, child_list_id); } + DataType::Struct(s) => { + let child_list_id = visit_struct_fields($visitor, s); + ($visitor.visit_struct)($visitor.data, $sibling_list_id, $name.into(), child_list_id); + } DataType::Map(mt) => { let child_list_id = visit_map_types($visitor, mt); ($visitor.visit_map)($visitor.data, $sibling_list_id, $name.into(), child_list_id); } - other => println!("Unsupported data type: {}", other), } }; } @@ -651,7 +668,7 @@ pub unsafe extern "C" fn visit_schema( fn visit_struct_fields(visitor: &EngineSchemaVisitor, s: &StructType) -> usize { let child_list_id = (visitor.make_field_list)(visitor.data, s.fields.len()); for field in s.fields() { - visit_schema_item(&field.data_type(), field.name(), visitor, child_list_id); + visit_schema_item(field.data_type(), field.name(), visitor, child_list_id); } child_list_id } @@ -676,6 +693,7 @@ pub unsafe extern "C" fn visit_schema( visitor: &EngineSchemaVisitor, sibling_list_id: usize, ) { + // See macro def for how Struct/Map/Array/Decimal are handled gen_visit_match!( data_type, visitor, @@ -688,7 +706,11 @@ pub unsafe extern "C" fn visit_schema( (Byte, visit_byte), (Float, visit_float), (Double, visit_double), - (Boolean, visit_boolean) // See macro def for how Struct/Map/Array are handled + (Boolean, visit_boolean), + (Binary, visit_binary), + (Date, visit_date), + (Timestamp, visit_timestamp), + (TimestampNtz, visit_timestamp_ntz) ); } From 7f59e542fc762598e616467ecd1c70601ea2fbff Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Thu, 2 May 2024 12:22:01 -0700 Subject: [PATCH 03/25] include pragma once --- ffi/cbindgen.toml | 2 ++ ffi/examples/read-table/read_table.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ffi/cbindgen.toml b/ffi/cbindgen.toml index b1060a32..2a37e7e8 100644 --- a/ffi/cbindgen.toml +++ b/ffi/cbindgen.toml @@ -4,6 +4,8 @@ # default to generating c bindings language = "C" +pragma_once = true + # only applies to Cxx namespace = "ffi" diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index fe686bee..28e12424 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -1,6 +1,8 @@ #include #include + #include "delta_kernel_ffi.h" +#include "schema.h" #ifdef PRINT_ARROW_DATA #include "arrow.h" @@ -145,7 +147,8 @@ int main(int argc, char* argv[]) { const SnapshotHandle *snapshot_handle = snapshot_handle_res.ok; uint64_t v = version(snapshot_handle); - printf("version: %llu\n", v); + printf("version: %llu\n\n", v); + print_schema(snapshot_handle); ExternResultScan scan_res = scan(snapshot_handle, engine, NULL); if (scan_res.tag != OkScan) { From bcc5a4f3c339505c3aee3182b5ea7f8d510d4472 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Thu, 2 May 2024 12:22:22 -0700 Subject: [PATCH 04/25] include schema printing --- ffi/examples/read-table/schema.h | 202 +++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 ffi/examples/read-table/schema.h diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h new file mode 100644 index 00000000..4647f4d1 --- /dev/null +++ b/ffi/examples/read-table/schema.h @@ -0,0 +1,202 @@ +#include "delta_kernel_ffi.h" + +typedef struct SchemaItemList SchemaItemList; + +typedef struct { + char* name; + char* type; + SchemaItemList* children; +} SchemaItem; + +typedef struct SchemaItemList { + uint32_t len; + SchemaItem* list; +} SchemaItemList; + +typedef struct { + int list_count; + SchemaItemList* lists; +} SchemaBuilder; + +char* allocate_name(const KernelStringSlice slice) { + char* buf = malloc(sizeof(char) * (slice.len + 1)); // +1 for null + snprintf(buf, slice.len + 1, "%s", slice.ptr); + return buf; +} + +SchemaItem* add_to_list(SchemaItemList *list, char* name, char* type) { + int idx = list->len; + list->list[idx].name = name; + list->list[idx].type = type; + list->len++; + return list->list+idx; +} + +void print_list(SchemaItemList *list, int indent, bool last) { + for (int i = 0; i < list->len; i++) { + for (int j = 0; j <= indent; j++) { + if (j == indent) { + if (i == list->len - 1) { + printf("└"); + } else { + printf("├"); + } + } else { + if (last && j == indent - 1) { + // don't print a dangling | for my last item + printf(" "); + } else { + printf("│ "); + } + } + } + printf("─ %s: %s\n", list->list[i].name, list->list[i].type); + if (list->list[i].children) { + bool last = i == list->len - 1; + print_list(list->list[i].children, indent+1, last); + } + } +} + +// declare all our visitor methods +uintptr_t make_field_list(void *data, uintptr_t reserve) { + SchemaBuilder *builder = (SchemaBuilder*)data; + int id = builder->list_count; + //printf("Asked to make list of len %i, id %i\n", reserve, id); + builder->list_count++; + builder->lists = realloc(builder->lists, sizeof(SchemaItemList) * builder->list_count); + SchemaItem* list = calloc(reserve, sizeof(SchemaItem)); + //list->children = NULL; + builder->lists[id].len = 0; + builder->lists[id].list = list; + return id; +} + +void visit_struct(void *data, + uintptr_t sibling_list_id, + struct KernelStringSlice name, + uintptr_t child_list_id) { + //printf("Asked to visit struct children are in %i, i am in %i\n", child_list_id, sibling_list_id); + SchemaBuilder *builder = (SchemaBuilder*)data; + char* name_ptr = allocate_name(name); + SchemaItem* struct_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "struct"); + struct_item->children = builder->lists+child_list_id; +} +void visit_array(void *data, + uintptr_t sibling_list_id, + struct KernelStringSlice name, + bool contains_null, + uintptr_t child_list_id) { + //printf("Asked to visit array type is in %i, i am in %i\n", child_list_id, sibling_list_id); + SchemaBuilder *builder = (SchemaBuilder*)data; + char* name_ptr = allocate_name(name); + SchemaItem* array_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "array"); + array_item->children = builder->lists+child_list_id; +} +void visit_map(void *data, + uintptr_t sibling_list_id, + struct KernelStringSlice name, + uintptr_t child_list_id) { + //printf("Asked to visit map, types are in %i, i am in %i\n", child_list_id, sibling_list_id); + SchemaBuilder *builder = (SchemaBuilder*)data; + char* name_ptr = allocate_name(name); + SchemaItem* map_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "map"); + map_item->children = builder->lists+child_list_id; +} + +void visit_decimal(void *data, + uintptr_t sibling_list_id, + struct KernelStringSlice name, + uint8_t precision, + int8_t scale) { + SchemaBuilder *builder = (SchemaBuilder*)data; + char* name_ptr = allocate_name(name); + char* type = malloc(16 * sizeof(char)); + sprintf(type, "decimal(%i)(%i)", precision, scale); + add_to_list(builder->lists+sibling_list_id, name_ptr, type); +} + +void visit_simple_type(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, char* type) { + SchemaBuilder *builder = (SchemaBuilder*)data; + char* name_ptr = allocate_name(name); + add_to_list(builder->lists+sibling_list_id, name_ptr, type); +} + +void visit_string(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { + visit_simple_type(data, sibling_list_id, name, "string"); +} + +void visit_long(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { + visit_simple_type(data, sibling_list_id, name, "long"); +} + +void visit_integer(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { + visit_simple_type(data, sibling_list_id, name, "integer"); +} + +void visit_short(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { + visit_simple_type(data, sibling_list_id, name, "short"); +} + +void visit_byte(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { + visit_simple_type(data, sibling_list_id, name, "byte"); +} +void visit_float(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { + visit_simple_type(data, sibling_list_id, name, "float"); +} + +void visit_double(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { + visit_simple_type(data, sibling_list_id, name, "double"); +} + +void visit_boolean(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { + visit_simple_type(data, sibling_list_id, name, "boolean"); +} + +void visit_binary(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { + visit_simple_type(data, sibling_list_id, name, "binary"); +} + +void visit_date(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { + visit_simple_type(data, sibling_list_id, name, "date"); +} + +void visit_timestamp(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { + visit_simple_type(data, sibling_list_id, name, "timestamp"); +} + +void visit_timestamp_ntz(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { + visit_simple_type(data, sibling_list_id, name, "timestamp_ntz"); +} + +// Print the schema of the snapshot +void print_schema(const SnapshotHandle *snapshot) { + SchemaBuilder builder = { + .list_count = 0, + .lists = calloc(0, sizeof(SchemaItem*)), + }; + EngineSchemaVisitor visitor = { + .data = &builder, + .make_field_list = make_field_list, + .visit_struct = visit_struct, + .visit_array = visit_array, + .visit_map = visit_map, + .visit_decimal = visit_decimal, + .visit_string = visit_string, + .visit_long = visit_long, + .visit_integer = visit_integer, + .visit_short = visit_short, + .visit_byte = visit_byte, + .visit_float = visit_float, + .visit_double = visit_double, + .visit_boolean = visit_boolean, + .visit_binary = visit_binary, + .visit_date = visit_date, + .visit_timestamp = visit_timestamp, + .visit_timestamp_ntz = visit_timestamp_ntz + }; + visit_schema(snapshot, &visitor); + printf("Schema:\n"); + print_list(builder.lists, 0, false); + printf("\n"); +} From 9593038a2f9dff42be7c6e88af3f17ff6d57d90f Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Thu, 2 May 2024 13:39:39 -0700 Subject: [PATCH 05/25] docs mostly --- ffi/examples/read-table/schema.h | 49 ++++++++++-- ffi/src/lib.rs | 128 ++++++++++++++++++++++--------- ffi/src/scan.rs | 8 +- 3 files changed, 139 insertions(+), 46 deletions(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index 4647f4d1..eb6c32e7 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -1,5 +1,21 @@ #include "delta_kernel_ffi.h" +/** + * This module defines a very simple model of a schema, just used to be able to print the schema of + * a table. It consists of a "SchemaBuilder" which is our user data that gets passed into each visit_x + * call. This simply keeps track of all the lists we are asked to allocate. + * + * Each list is a "SchemaItemList", which just tracks its length an an array of "SchemaItem"s. + * + * Each "SchemaItem" just has a name and a type, which are just strings. It can also have a list + * which is its children. This is initially always NULL, but when visiting a struct, map, or array, + * we point this at the list specified in the callback, which allows us to traverse the schema. + */ + +// If you want the visitor to print out what it's being asked to do at each step, uncomment the +// following line +// #define PRINT_VISITS + typedef struct SchemaItemList SchemaItemList; typedef struct { @@ -24,6 +40,8 @@ char* allocate_name(const KernelStringSlice slice) { return buf; } +// lists are preallocated to have exactly enough space, so we just fill in the next open slot and +// increment our length SchemaItem* add_to_list(SchemaItemList *list, char* name, char* type) { int idx = list->len; list->list[idx].name = name; @@ -32,6 +50,7 @@ SchemaItem* add_to_list(SchemaItemList *list, char* name, char* type) { return list->list+idx; } +// print out all items in a list, recursing into any children they may have void print_list(SchemaItemList *list, int indent, bool last) { for (int i = 0; i < list->len; i++) { for (int j = 0; j <= indent; j++) { @@ -62,11 +81,12 @@ void print_list(SchemaItemList *list, int indent, bool last) { uintptr_t make_field_list(void *data, uintptr_t reserve) { SchemaBuilder *builder = (SchemaBuilder*)data; int id = builder->list_count; - //printf("Asked to make list of len %i, id %i\n", reserve, id); +#ifdef PRINT_VISITS + printf("Making a list of lenth %i with id %i\n", reserve, id); +#endif builder->list_count++; builder->lists = realloc(builder->lists, sizeof(SchemaItemList) * builder->list_count); SchemaItem* list = calloc(reserve, sizeof(SchemaItem)); - //list->children = NULL; builder->lists[id].len = 0; builder->lists[id].list = list; return id; @@ -76,7 +96,9 @@ void visit_struct(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, uintptr_t child_list_id) { - //printf("Asked to visit struct children are in %i, i am in %i\n", child_list_id, sibling_list_id); +#ifdef PRINT_VISITS + printf("Asked to visit a struct, belonging to list %i. Children are in %i\n", sibling_list_id, child_list_id); +#endif SchemaBuilder *builder = (SchemaBuilder*)data; char* name_ptr = allocate_name(name); SchemaItem* struct_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "struct"); @@ -87,7 +109,9 @@ void visit_array(void *data, struct KernelStringSlice name, bool contains_null, uintptr_t child_list_id) { - //printf("Asked to visit array type is in %i, i am in %i\n", child_list_id, sibling_list_id); +#ifdef PRINT_VISITS + printf("Asked to visit array, belonging to list %i. Types are in %i\n", sibling_list_id, child_list_id); +#endif SchemaBuilder *builder = (SchemaBuilder*)data; char* name_ptr = allocate_name(name); SchemaItem* array_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "array"); @@ -97,7 +121,9 @@ void visit_map(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, uintptr_t child_list_id) { - //printf("Asked to visit map, types are in %i, i am in %i\n", child_list_id, sibling_list_id); +#ifdef PRINT_VISITS + printf("Asked to visit map, belonging to list %i. Types are in %i\n", sibling_list_id, child_list_id); +#endif SchemaBuilder *builder = (SchemaBuilder*)data; char* name_ptr = allocate_name(name); SchemaItem* map_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "map"); @@ -109,6 +135,9 @@ void visit_decimal(void *data, struct KernelStringSlice name, uint8_t precision, int8_t scale) { +#ifdef PRINT_VISITS + printf("Asked to visit decimal with precision %i and scale %i, belonging to list %i\n", sibling_list_id); +#endif SchemaBuilder *builder = (SchemaBuilder*)data; char* name_ptr = allocate_name(name); char* type = malloc(16 * sizeof(char)); @@ -117,6 +146,9 @@ void visit_decimal(void *data, } void visit_simple_type(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, char* type) { +#ifdef PRINT_VISITS + printf("Asked to visit a(n) %s belonging to list %i\n", type, sibling_list_id); +#endif SchemaBuilder *builder = (SchemaBuilder*)data; char* name_ptr = allocate_name(name); add_to_list(builder->lists+sibling_list_id, name_ptr, type); @@ -195,8 +227,11 @@ void print_schema(const SnapshotHandle *snapshot) { .visit_timestamp = visit_timestamp, .visit_timestamp_ntz = visit_timestamp_ntz }; - visit_schema(snapshot, &visitor); + uintptr_t schema_list_id = visit_schema(snapshot, &visitor); +#ifdef PRINT_VISITS + printf("Schema returned in list %i\n", schema_list_id); +#endif printf("Schema:\n"); - print_list(builder.lists, 0, false); + print_list(builder.lists+schema_list_id, 0, false); printf("\n"); } diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index fcb4f30c..82966939 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -388,7 +388,7 @@ unsafe fn unwrap_and_parse_path_as_url(path: KernelStringSlice) -> DeltaResult u64 { snapshot.version() } + +/// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own +/// representation of a schema from a particular schema within kernel. +/// +/// The model is list based. When the kernel needs a list, it will ask engine to allocate one of a +/// particular size. Once allocated the engine returns an `id`, which can be anything the engine +/// wants, and will be passed back to the engine to identify the list in the future. +/// +/// The top level of a schema is always a struct. A schema can be visited by calling +/// [`visit_schema`] and passing a [`Snapshot`] and an `EngineSchemaVisitor`. As kernel traverses +/// its schema it will do the following at each node: +/// +/// - For a struct, map, or array type: +/// 1. Ask engine to allocate a list, specifying enough space to hold associated schema +/// information. +/// 2. For a struct, visit each child, with `sibling_list_id` set to the just allocated list. +/// 3. For a map, visit the key type and the value type with `sibling_list_id` set to the just +/// allocated list. +/// 4. For an array, visit the item type with `sibling_list_id` set to the just allocated list. +/// 5. Finally call `visit_[struct/array/map]` with `child_list_id` set to the list allocated +/// in `1`. This has enough information to build the entire type on the engine side. +/// +/// - For a simple type simply call `visit_[type]`, with `sibling_list_id` set to the `id` of the +/// list the type should be added to. +/// +/// [`visit_schema`] will return the id of the list associated with the top level struct // WARNING: the visitor MUST NOT retain internal references to the string slices passed to visitor methods // TODO: nullability #[repr(C)] pub struct EngineSchemaVisitor { - // opaque state pointer - data: *mut c_void, - // Creates a new field list, optionally reserving capacity up front - make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, + /// opaque state pointer + pub data: *mut c_void, + /// Creates a new field list, optionally reserving capacity up front + pub make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, + // visitor methods that should instantiate and append the appropriate type to the field list - visit_struct: extern "C" fn( + + /// Indidate that the schema contains a `Struct` type. The top level of a Schema is always a + /// `Struct`. The children of the `Struct` are in the list identified by `child_list_id`. + pub visit_struct: extern "C" fn( data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice, child_list_id: usize, ), - // Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list with - // the schema type for each item of the array - visit_array: extern "C" fn( + + /// Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list + /// with the schema type for each item of the array + pub visit_array: extern "C" fn( data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice, contains_null: bool, // if this array can contain null values child_list_id: usize, ), - // Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list - // where the first element is the schema type the keys of the map, and the second element is the - // schema type of the values of the map - visit_map: extern "C" fn( + + /// Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list + /// where the first element is the schema type the keys of the map, and the second element is the + /// schema type of the values of the map + pub visit_map: extern "C" fn( data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice, child_list_id: usize, ), - // visit a decimal type with the specified precision and scale - visit_decimal: extern "C" fn( + + /// visit a `decimal` with the specified `precision` and `scale` + pub visit_decimal: extern "C" fn( data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice, precision: u8, scale: i8, ), - visit_string: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_long: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_integer: + + /// Visit a `string` belonging to the list identified by `sibling_list_id`. + pub visit_string: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `long` belonging to the list identified by `sibling_list_id`. + pub visit_long: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit an `integer` belonging to the list identified by `sibling_list_id`. + pub visit_integer: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_short: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_byte: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_float: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_double: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_boolean: + + /// Visit a `short` belonging to the list identified by `sibling_list_id`. + pub visit_short: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `byte` belonging to the list identified by `sibling_list_id`. + pub visit_byte: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `float` belonging to the list identified by `sibling_list_id`. + pub visit_float: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `double` belonging to the list identified by `sibling_list_id`. + pub visit_double: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. + pub visit_boolean: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_binary: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_date: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_timestamp: + /// Visit `binary` belonging to the list identified by `sibling_list_id`. + pub visit_binary: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `date` belonging to the list identified by `sibling_list_id`. + pub visit_date: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `timestamp` belonging to the list identified by `sibling_list_id`. + pub visit_timestamp: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_timestamp_ntz: + + /// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`. + pub visit_timestamp_ntz: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), } @@ -782,14 +838,16 @@ impl KernelExpressionVisitorState { } } -// When invoking [[get_scan_files]], The engine provides a pointer to the (engine's native) -// predicate, along with a visitor function that can be invoked to recursively visit the -// predicate. This engine state is valid until the call to [[get_scan_files]] returns. Inside that -// method, the kernel allocates visitor state, which becomes the second argument to the predicate -// visitor invocation along with the engine-provided predicate pointer. The visitor state is valid -// for the lifetime of the predicate visitor invocation. Thanks to this double indirection, engine -// and kernel each retain ownership of their respective objects, with no need to coordinate memory -// lifetimes with the other. +/// A predicate that can be used to skip data when scanning. +/// +/// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, +/// along with a visitor function that can be invoked to recursively visit the predicate. This +/// engine state must be valid until the call to `scan::scan` returns. Inside that method, the +/// kernel allocates visitor state, which becomes the second argument to the predicate visitor +/// invocation along with the engine-provided predicate pointer. The visitor state is valid for the +/// lifetime of the predicate visitor invocation. Thanks to this double indirection, engine and +/// kernel each retain ownership of their respective objects, with no need to coordinate memory +/// lifetimes with the other. #[repr(C)] pub struct EnginePredicate { predicate: *mut c_void, diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index 678af62f..3613251b 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -22,7 +22,7 @@ use super::handle::{ArcHandle, BoxHandle}; // that are the engine data /// an opaque struct that encapsulates data read by an engine. this handle can be passed back into /// some kernel calls to operate on the data, or can be converted into the raw data as read by the -/// [`Engine`] by calling [`get_raw_engine_data`] +/// [`delta_kernel::Engine`] by calling [`get_raw_engine_data`] pub struct EngineDataHandle { data: Box, } @@ -53,8 +53,8 @@ pub struct ArrowFFIData { /// the schema. /// /// # Safety -/// data_handle must be a valid EngineDataHandle as read by the [`DefaultEngine`] obtained -/// from `get_default_client`. +/// data_handle must be a valid EngineDataHandle as read by the +/// [`delta_kernel::engine::default::DefaultEngine`] obtained from `get_default_client`. #[cfg(feature = "default-engine")] pub unsafe extern "C" fn get_raw_arrow_data( data_handle: *mut EngineDataHandle, @@ -226,7 +226,7 @@ fn kernel_scan_data_next_impl( /// # Safety /// /// Caller is responsible for (at most once) passing a valid pointer returned by a call to -/// [kernel_scan_files_init]. +/// [`kernel_scan_data_init`]. // we should probably be consistent with drop vs. free on engine side (probably the latter is more // intuitive to non-rust code) #[no_mangle] From 6fd15cf93c5a344c794cb68a8c66682cdf4f0b22 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Thu, 2 May 2024 15:39:51 -0700 Subject: [PATCH 06/25] fmt --- ffi/src/lib.rs | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 82966939..2782b2f3 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -568,7 +568,6 @@ pub unsafe extern "C" fn version(snapshot: *const SnapshotHandle) -> u64 { snapshot.version() } - /// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own /// representation of a schema from a particular schema within kernel. /// @@ -604,7 +603,6 @@ pub struct EngineSchemaVisitor { pub make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, // visitor methods that should instantiate and append the appropriate type to the field list - /// Indidate that the schema contains a `Struct` type. The top level of a Schema is always a /// `Struct`. The children of the `Struct` are in the list identified by `child_list_id`. pub visit_struct: extern "C" fn( @@ -644,36 +642,44 @@ pub struct EngineSchemaVisitor { ), /// Visit a `string` belonging to the list identified by `sibling_list_id`. - pub visit_string: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + pub visit_string: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), /// Visit a `long` belonging to the list identified by `sibling_list_id`. - pub visit_long: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + pub visit_long: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), /// Visit an `integer` belonging to the list identified by `sibling_list_id`. pub visit_integer: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), /// Visit a `short` belonging to the list identified by `sibling_list_id`. - pub visit_short: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + pub visit_short: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), /// Visit a `byte` belonging to the list identified by `sibling_list_id`. - pub visit_byte: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + pub visit_byte: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), /// Visit a `float` belonging to the list identified by `sibling_list_id`. - pub visit_float: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + pub visit_float: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), /// Visit a `double` belonging to the list identified by `sibling_list_id`. - pub visit_double: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + pub visit_double: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. pub visit_boolean: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), /// Visit `binary` belonging to the list identified by `sibling_list_id`. - pub visit_binary: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + pub visit_binary: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), /// Visit a `date` belonging to the list identified by `sibling_list_id`. - pub visit_date: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + pub visit_date: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), /// Visit a `timestamp` belonging to the list identified by `sibling_list_id`. pub visit_timestamp: From 4baa68a9bf401c877d7044befd1dbe508d3a5645 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Thu, 2 May 2024 15:55:09 -0700 Subject: [PATCH 07/25] free schema! --- ffi/examples/read-table/schema.h | 21 +++++++++++++++++++++ ffi/src/lib.rs | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index eb6c32e7..6c074f55 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -201,6 +201,26 @@ void visit_timestamp_ntz(void *data, uintptr_t sibling_list_id, struct KernelStr visit_simple_type(data, sibling_list_id, name, "timestamp_ntz"); } +// free all the data in the builder (but not the builder itself, since that might be stack allocated) +void free_builder(SchemaBuilder builder) { + for (int i = 0; i < builder.list_count; i++) { + SchemaItemList *list = (builder.lists)+i; + for (int j = 0; j < list->len; j++) { + SchemaItem *item = list->list+j; + free(item->name); + // don't free item->type, those are static strings + if (!strncmp(item->type, "decimal", 7)) { + // except decimal types, we malloc'd those :) + free(item->type); + } + // don't free item->children, it's just a list in the builder so will be freed by the outer + // loop. + } + free(list->list); // free all the items in this list (we alloc'd them together) + } + free(builder.lists); +} + // Print the schema of the snapshot void print_schema(const SnapshotHandle *snapshot) { SchemaBuilder builder = { @@ -234,4 +254,5 @@ void print_schema(const SnapshotHandle *snapshot) { printf("Schema:\n"); print_list(builder.lists+schema_list_id, 0, false); printf("\n"); + free_builder(builder); } diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 2782b2f3..6ebe989c 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -737,7 +737,7 @@ pub unsafe extern "C" fn visit_schema( fn visit_array_item(visitor: &EngineSchemaVisitor, at: &ArrayType) -> usize { let child_list_id = (visitor.make_field_list)(visitor.data, 1); - visit_schema_item(&at.element_type, "list_data_type", visitor, child_list_id); + visit_schema_item(&at.element_type, "array_data_type", visitor, child_list_id); child_list_id } From 8747fe34586ae12bc14d0a90bbdff98b6774119e Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Thu, 2 May 2024 16:16:11 -0700 Subject: [PATCH 08/25] cleanup --- ffi/examples/read-table/schema.h | 67 ++++++++++++++++---------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index 6c074f55..a55c7db1 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -1,15 +1,16 @@ #include "delta_kernel_ffi.h" /** - * This module defines a very simple model of a schema, just used to be able to print the schema of + * This module defines a very simple model of a schema, used only to be able to print the schema of * a table. It consists of a "SchemaBuilder" which is our user data that gets passed into each visit_x * call. This simply keeps track of all the lists we are asked to allocate. * - * Each list is a "SchemaItemList", which just tracks its length an an array of "SchemaItem"s. + * Each list is a "SchemaItemList", which tracks its length an an array of "SchemaItem"s. * - * Each "SchemaItem" just has a name and a type, which are just strings. It can also have a list - * which is its children. This is initially always NULL, but when visiting a struct, map, or array, - * we point this at the list specified in the callback, which allows us to traverse the schema. + * Each "SchemaItem" has a name and a type, which are just strings. It can also have a list which is + * its children. This is initially always NULL, but when visiting a struct, map, or array, we point + * this at the list specified in the callback, which allows us to traverse the schema when printing + * it. Note that this points to one of the lists in the builder's set of lists and is not a copy. */ // If you want the visitor to print out what it's being asked to do at each step, uncomment the @@ -51,28 +52,28 @@ SchemaItem* add_to_list(SchemaItemList *list, char* name, char* type) { } // print out all items in a list, recursing into any children they may have -void print_list(SchemaItemList *list, int indent, bool last) { +void print_list(SchemaItemList *list, int indent, bool parent_on_last) { for (int i = 0; i < list->len; i++) { + bool is_last = i == list->len - 1; for (int j = 0; j <= indent; j++) { if (j == indent) { - if (i == list->len - 1) { - printf("└"); - } else { - printf("├"); - } + if (is_last) { + printf("└"); + } else { + printf("├"); + } } else { - if (last && j == indent - 1) { - // don't print a dangling | for my last item - printf(" "); - } else { - printf("│ "); - } + if (parent_on_last && j == indent - 1) { + // don't print a dangling | on my parent's last item + printf(" "); + } else { + printf("│ "); + } } } printf("─ %s: %s\n", list->list[i].name, list->list[i].type); if (list->list[i].children) { - bool last = i == list->len - 1; - print_list(list->list[i].children, indent+1, last); + print_list(list->list[i].children, indent+1, is_last); } } } @@ -93,9 +94,9 @@ uintptr_t make_field_list(void *data, uintptr_t reserve) { } void visit_struct(void *data, - uintptr_t sibling_list_id, - struct KernelStringSlice name, - uintptr_t child_list_id) { + uintptr_t sibling_list_id, + struct KernelStringSlice name, + uintptr_t child_list_id) { #ifdef PRINT_VISITS printf("Asked to visit a struct, belonging to list %i. Children are in %i\n", sibling_list_id, child_list_id); #endif @@ -105,10 +106,10 @@ void visit_struct(void *data, struct_item->children = builder->lists+child_list_id; } void visit_array(void *data, - uintptr_t sibling_list_id, - struct KernelStringSlice name, - bool contains_null, - uintptr_t child_list_id) { + uintptr_t sibling_list_id, + struct KernelStringSlice name, + bool contains_null, + uintptr_t child_list_id) { #ifdef PRINT_VISITS printf("Asked to visit array, belonging to list %i. Types are in %i\n", sibling_list_id, child_list_id); #endif @@ -118,9 +119,9 @@ void visit_array(void *data, array_item->children = builder->lists+child_list_id; } void visit_map(void *data, - uintptr_t sibling_list_id, - struct KernelStringSlice name, - uintptr_t child_list_id) { + uintptr_t sibling_list_id, + struct KernelStringSlice name, + uintptr_t child_list_id) { #ifdef PRINT_VISITS printf("Asked to visit map, belonging to list %i. Types are in %i\n", sibling_list_id, child_list_id); #endif @@ -131,10 +132,10 @@ void visit_map(void *data, } void visit_decimal(void *data, - uintptr_t sibling_list_id, - struct KernelStringSlice name, - uint8_t precision, - int8_t scale) { + uintptr_t sibling_list_id, + struct KernelStringSlice name, + uint8_t precision, + int8_t scale) { #ifdef PRINT_VISITS printf("Asked to visit decimal with precision %i and scale %i, belonging to list %i\n", sibling_list_id); #endif From 6c4a173c50e672c85fba8692a65b341de52d0076 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Mon, 6 May 2024 11:48:17 -0700 Subject: [PATCH 09/25] fix pointer issue --- ffi/examples/read-table/schema.h | 57 +++++++++++++++++--------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index a55c7db1..72822095 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -1,3 +1,4 @@ +#include #include "delta_kernel_ffi.h" /** @@ -8,21 +9,21 @@ * Each list is a "SchemaItemList", which tracks its length an an array of "SchemaItem"s. * * Each "SchemaItem" has a name and a type, which are just strings. It can also have a list which is - * its children. This is initially always NULL, but when visiting a struct, map, or array, we point - * this at the list specified in the callback, which allows us to traverse the schema when printing - * it. Note that this points to one of the lists in the builder's set of lists and is not a copy. + * its children. This is initially always UINTPTR_MAX, but when visiting a struct, map, or array, we + * point this at the list id specified in the callback, which allows us to traverse the schema when + * printing it. */ // If you want the visitor to print out what it's being asked to do at each step, uncomment the // following line -// #define PRINT_VISITS +//#define PRINT_VISITS typedef struct SchemaItemList SchemaItemList; typedef struct { char* name; char* type; - SchemaItemList* children; + uintptr_t children; } SchemaItem; typedef struct SchemaItemList { @@ -52,7 +53,8 @@ SchemaItem* add_to_list(SchemaItemList *list, char* name, char* type) { } // print out all items in a list, recursing into any children they may have -void print_list(SchemaItemList *list, int indent, bool parent_on_last) { +void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, bool parent_on_last) { + SchemaItemList *list = builder->lists+list_id; for (int i = 0; i < list->len; i++) { bool is_last = i == list->len - 1; for (int j = 0; j <= indent; j++) { @@ -72,8 +74,8 @@ void print_list(SchemaItemList *list, int indent, bool parent_on_last) { } } printf("─ %s: %s\n", list->list[i].name, list->list[i].type); - if (list->list[i].children) { - print_list(list->list[i].children, indent+1, is_last); + if (list->list[i].children != UINTPTR_MAX) { + print_list(builder, list->list[i].children, indent+1, is_last); } } } @@ -88,6 +90,9 @@ uintptr_t make_field_list(void *data, uintptr_t reserve) { builder->list_count++; builder->lists = realloc(builder->lists, sizeof(SchemaItemList) * builder->list_count); SchemaItem* list = calloc(reserve, sizeof(SchemaItem)); + for (int i = 0; i < reserve; i++) { + list[i].children = UINTPTR_MAX; + } builder->lists[id].len = 0; builder->lists[id].list = list; return id; @@ -97,38 +102,38 @@ void visit_struct(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, uintptr_t child_list_id) { -#ifdef PRINT_VISITS - printf("Asked to visit a struct, belonging to list %i. Children are in %i\n", sibling_list_id, child_list_id); -#endif SchemaBuilder *builder = (SchemaBuilder*)data; char* name_ptr = allocate_name(name); +#ifdef PRINT_VISITS + printf("Asked to visit a struct, belonging to list %i for %s. Children are in %i\n", sibling_list_id, name_ptr, child_list_id); +#endif SchemaItem* struct_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "struct"); - struct_item->children = builder->lists+child_list_id; + struct_item->children = child_list_id; } void visit_array(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, bool contains_null, uintptr_t child_list_id) { -#ifdef PRINT_VISITS - printf("Asked to visit array, belonging to list %i. Types are in %i\n", sibling_list_id, child_list_id); -#endif SchemaBuilder *builder = (SchemaBuilder*)data; char* name_ptr = allocate_name(name); +#ifdef PRINT_VISITS + printf("Asked to visit array, belonging to list %i for %s. Types are in %i\n", sibling_list_id, name_ptr, child_list_id); +#endif SchemaItem* array_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "array"); - array_item->children = builder->lists+child_list_id; + array_item->children = child_list_id; } void visit_map(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, uintptr_t child_list_id) { -#ifdef PRINT_VISITS - printf("Asked to visit map, belonging to list %i. Types are in %i\n", sibling_list_id, child_list_id); -#endif SchemaBuilder *builder = (SchemaBuilder*)data; char* name_ptr = allocate_name(name); +#ifdef PRINT_VISITS + printf("Asked to visit map, belonging to list %i for %s. Types are in %i\n", sibling_list_id, name_ptr, child_list_id); +#endif SchemaItem* map_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "map"); - map_item->children = builder->lists+child_list_id; + map_item->children = child_list_id; } void visit_decimal(void *data, @@ -147,11 +152,11 @@ void visit_decimal(void *data, } void visit_simple_type(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, char* type) { -#ifdef PRINT_VISITS - printf("Asked to visit a(n) %s belonging to list %i\n", type, sibling_list_id); -#endif SchemaBuilder *builder = (SchemaBuilder*)data; char* name_ptr = allocate_name(name); +#ifdef PRINT_VISITS + printf("Asked to visit a(n) %s belonging to list %i for %s\n", type, sibling_list_id, name_ptr); +#endif add_to_list(builder->lists+sibling_list_id, name_ptr, type); } @@ -202,7 +207,7 @@ void visit_timestamp_ntz(void *data, uintptr_t sibling_list_id, struct KernelStr visit_simple_type(data, sibling_list_id, name, "timestamp_ntz"); } -// free all the data in the builder (but not the builder itself, since that might be stack allocated) +// free all the data in the builder (but not the builder itself, it's stack allocated) void free_builder(SchemaBuilder builder) { for (int i = 0; i < builder.list_count; i++) { SchemaItemList *list = (builder.lists)+i; @@ -214,8 +219,6 @@ void free_builder(SchemaBuilder builder) { // except decimal types, we malloc'd those :) free(item->type); } - // don't free item->children, it's just a list in the builder so will be freed by the outer - // loop. } free(list->list); // free all the items in this list (we alloc'd them together) } @@ -253,7 +256,7 @@ void print_schema(const SnapshotHandle *snapshot) { printf("Schema returned in list %i\n", schema_list_id); #endif printf("Schema:\n"); - print_list(builder.lists+schema_list_id, 0, false); + print_list(&builder, schema_list_id, 0, false); printf("\n"); free_builder(builder); } From 87ef58c22643dc2cb2e01013078800b2682101f5 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Mon, 6 May 2024 16:52:11 -0700 Subject: [PATCH 10/25] strndup + macro --- ffi/examples/read-table/schema.h | 102 ++++++++++--------------------- 1 file changed, 33 insertions(+), 69 deletions(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index 72822095..3f2e5ccc 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -37,9 +37,7 @@ typedef struct { } SchemaBuilder; char* allocate_name(const KernelStringSlice slice) { - char* buf = malloc(sizeof(char) * (slice.len + 1)); // +1 for null - snprintf(buf, slice.len + 1, "%s", slice.ptr); - return buf; + return strndup(slice.ptr, slice.len); } // lists are preallocated to have exactly enough space, so we just fill in the next open slot and @@ -57,23 +55,17 @@ void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, bool pare SchemaItemList *list = builder->lists+list_id; for (int i = 0; i < list->len; i++) { bool is_last = i == list->len - 1; - for (int j = 0; j <= indent; j++) { - if (j == indent) { - if (is_last) { - printf("└"); - } else { - printf("├"); - } + for (int j = 0; j < indent; j++) { + if (parent_on_last && j == indent - 1) { + // don't print a dangling | on my parent's last item + printf(" "); } else { - if (parent_on_last && j == indent - 1) { - // don't print a dangling | on my parent's last item - printf(" "); - } else { - printf("│ "); - } + printf("│ "); } } - printf("─ %s: %s\n", list->list[i].name, list->list[i].type); + SchemaItem* item = &list->list[i]; + char* prefix = is_last? "└" : "├"; + printf("%s─ %s: %s\n", prefix, item->name, item->type); if (list->list[i].children != UINTPTR_MAX) { print_list(builder, list->list[i].children, indent+1, is_last); } @@ -82,7 +74,7 @@ void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, bool pare // declare all our visitor methods uintptr_t make_field_list(void *data, uintptr_t reserve) { - SchemaBuilder *builder = (SchemaBuilder*)data; + SchemaBuilder *builder = data; int id = builder->list_count; #ifdef PRINT_VISITS printf("Making a list of lenth %i with id %i\n", reserve, id); @@ -102,7 +94,7 @@ void visit_struct(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, uintptr_t child_list_id) { - SchemaBuilder *builder = (SchemaBuilder*)data; + SchemaBuilder *builder = data; char* name_ptr = allocate_name(name); #ifdef PRINT_VISITS printf("Asked to visit a struct, belonging to list %i for %s. Children are in %i\n", sibling_list_id, name_ptr, child_list_id); @@ -115,7 +107,7 @@ void visit_array(void *data, struct KernelStringSlice name, bool contains_null, uintptr_t child_list_id) { - SchemaBuilder *builder = (SchemaBuilder*)data; + SchemaBuilder *builder = data; char* name_ptr = allocate_name(name); #ifdef PRINT_VISITS printf("Asked to visit array, belonging to list %i for %s. Types are in %i\n", sibling_list_id, name_ptr, child_list_id); @@ -127,7 +119,7 @@ void visit_map(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, uintptr_t child_list_id) { - SchemaBuilder *builder = (SchemaBuilder*)data; + SchemaBuilder *builder = data; char* name_ptr = allocate_name(name); #ifdef PRINT_VISITS printf("Asked to visit map, belonging to list %i for %s. Types are in %i\n", sibling_list_id, name_ptr, child_list_id); @@ -144,15 +136,17 @@ void visit_decimal(void *data, #ifdef PRINT_VISITS printf("Asked to visit decimal with precision %i and scale %i, belonging to list %i\n", sibling_list_id); #endif - SchemaBuilder *builder = (SchemaBuilder*)data; + SchemaBuilder *builder = data; char* name_ptr = allocate_name(name); char* type = malloc(16 * sizeof(char)); - sprintf(type, "decimal(%i)(%i)", precision, scale); + snprintf(type, 16, "decimal(%i)(%i)", precision, scale); add_to_list(builder->lists+sibling_list_id, name_ptr, type); } + + void visit_simple_type(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, char* type) { - SchemaBuilder *builder = (SchemaBuilder*)data; + SchemaBuilder *builder = data; char* name_ptr = allocate_name(name); #ifdef PRINT_VISITS printf("Asked to visit a(n) %s belonging to list %i for %s\n", type, sibling_list_id, name_ptr); @@ -160,52 +154,22 @@ void visit_simple_type(void *data, uintptr_t sibling_list_id, struct KernelStrin add_to_list(builder->lists+sibling_list_id, name_ptr, type); } -void visit_string(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { - visit_simple_type(data, sibling_list_id, name, "string"); -} - -void visit_long(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { - visit_simple_type(data, sibling_list_id, name, "long"); -} - -void visit_integer(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { - visit_simple_type(data, sibling_list_id, name, "integer"); -} - -void visit_short(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { - visit_simple_type(data, sibling_list_id, name, "short"); -} - -void visit_byte(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { - visit_simple_type(data, sibling_list_id, name, "byte"); -} -void visit_float(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { - visit_simple_type(data, sibling_list_id, name, "float"); -} - -void visit_double(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { - visit_simple_type(data, sibling_list_id, name, "double"); -} - -void visit_boolean(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { - visit_simple_type(data, sibling_list_id, name, "boolean"); -} - -void visit_binary(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { - visit_simple_type(data, sibling_list_id, name, "binary"); -} - -void visit_date(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { - visit_simple_type(data, sibling_list_id, name, "date"); -} - -void visit_timestamp(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { - visit_simple_type(data, sibling_list_id, name, "timestamp"); -} +#define DEFINE_VISIT_SIMPLE_TYPE(typename) \ + void visit_##typename(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { \ + visit_simple_type(data, sibling_list_id, name, #typename); \ + } -void visit_timestamp_ntz(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { - visit_simple_type(data, sibling_list_id, name, "timestamp_ntz"); -} +DEFINE_VISIT_SIMPLE_TYPE(string); +DEFINE_VISIT_SIMPLE_TYPE(integer); +DEFINE_VISIT_SIMPLE_TYPE(short); +DEFINE_VISIT_SIMPLE_TYPE(byte); +DEFINE_VISIT_SIMPLE_TYPE(float); +DEFINE_VISIT_SIMPLE_TYPE(double); +DEFINE_VISIT_SIMPLE_TYPE(boolean); +DEFINE_VISIT_SIMPLE_TYPE(binary); +DEFINE_VISIT_SIMPLE_TYPE(date); +DEFINE_VISIT_SIMPLE_TYPE(timestamp); +DEFINE_VISIT_SIMPLE_TYPE(timestamp_ntz); // free all the data in the builder (but not the builder itself, it's stack allocated) void free_builder(SchemaBuilder builder) { From 21a3402f20b22be10f9b3ca254a77fb81ca378cc Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Mon, 6 May 2024 16:53:19 -0700 Subject: [PATCH 11/25] Apply suggestions from code review Co-authored-by: Ryan Johnson --- ffi/src/lib.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 6ebe989c..e6e91f18 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -580,14 +580,13 @@ pub unsafe extern "C" fn version(snapshot: *const SnapshotHandle) -> u64 { /// its schema it will do the following at each node: /// /// - For a struct, map, or array type: -/// 1. Ask engine to allocate a list, specifying enough space to hold associated schema -/// information. +/// 1. Ask engine to allocate a "child" list, indicating how many items will be needed. /// 2. For a struct, visit each child, with `sibling_list_id` set to the just allocated list. /// 3. For a map, visit the key type and the value type with `sibling_list_id` set to the just /// allocated list. /// 4. For an array, visit the item type with `sibling_list_id` set to the just allocated list. /// 5. Finally call `visit_[struct/array/map]` with `child_list_id` set to the list allocated -/// in `1`. This has enough information to build the entire type on the engine side. +/// in step 1. This has enough information to build the entire type on the engine side. /// /// - For a simple type simply call `visit_[type]`, with `sibling_list_id` set to the `id` of the /// list the type should be added to. @@ -603,7 +602,7 @@ pub struct EngineSchemaVisitor { pub make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, // visitor methods that should instantiate and append the appropriate type to the field list - /// Indidate that the schema contains a `Struct` type. The top level of a Schema is always a + /// Indicate that the schema contains a `Struct` type. The top level of a Schema is always a /// `Struct`. The children of the `Struct` are in the list identified by `child_list_id`. pub visit_struct: extern "C" fn( data: *mut c_void, From 0fc9bd7af17efbeea91684e3a1345fa68821a64e Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Mon, 6 May 2024 17:03:27 -0700 Subject: [PATCH 12/25] don't forget long --- ffi/examples/read-table/schema.h | 1 + 1 file changed, 1 insertion(+) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index 3f2e5ccc..c177b65b 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -163,6 +163,7 @@ DEFINE_VISIT_SIMPLE_TYPE(string); DEFINE_VISIT_SIMPLE_TYPE(integer); DEFINE_VISIT_SIMPLE_TYPE(short); DEFINE_VISIT_SIMPLE_TYPE(byte); +DEFINE_VISIT_SIMPLE_TYPE(long); DEFINE_VISIT_SIMPLE_TYPE(float); DEFINE_VISIT_SIMPLE_TYPE(double); DEFINE_VISIT_SIMPLE_TYPE(boolean); From a3ff414c5ca00398f01b7cd9ed35d7795fdb4c6b Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Mon, 6 May 2024 17:04:24 -0700 Subject: [PATCH 13/25] untabify :sigh: --- ffi/examples/read-table/schema.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index c177b65b..30a247dc 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -57,10 +57,10 @@ void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, bool pare bool is_last = i == list->len - 1; for (int j = 0; j < indent; j++) { if (parent_on_last && j == indent - 1) { - // don't print a dangling | on my parent's last item - printf(" "); + // don't print a dangling | on my parent's last item + printf(" "); } else { - printf("│ "); + printf("│ "); } } SchemaItem* item = &list->list[i]; @@ -156,7 +156,7 @@ void visit_simple_type(void *data, uintptr_t sibling_list_id, struct KernelStrin #define DEFINE_VISIT_SIMPLE_TYPE(typename) \ void visit_##typename(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { \ - visit_simple_type(data, sibling_list_id, name, #typename); \ + visit_simple_type(data, sibling_list_id, name, #typename); \ } DEFINE_VISIT_SIMPLE_TYPE(string); From 184e2c3930c7804ff4bfa2e85d662d3fc4ee3494 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Tue, 7 May 2024 15:50:16 -0700 Subject: [PATCH 14/25] fix up selection vector stuff --- ffi/examples/read-table/read_table.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index c0055de0..d110d055 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -67,13 +67,13 @@ void visit_callback(void* engine_context, const KernelStringSlice path, long siz return; } KernelBoolSlice selection_vector = selection_vector_res.ok; - if (selection_vector) { + if (selection_vector.len > 0) { printf(" Selection vector:\n"); - print_selection_vector(" ", selection_vector); - drop_bool_slice(selection_vector); + print_selection_vector(" ", &selection_vector); } else { printf(" No selection vector for this call\n"); } + drop_bool_slice(selection_vector); // normally this would be picked out of the schema char* letter_key = "letter"; KernelStringSlice key = {letter_key, strlen(letter_key)}; @@ -91,7 +91,7 @@ void visit_data(void *engine_context, EngineDataHandle *engine_data, KernelBoolS printf(" Of this data, here is a selection vector\n"); print_selection_vector(" ", &selection_vec); visit_scan_data(engine_data, selection_vec, engine_context, visit_callback); - drop_bool_slice(&selection_vec); + drop_bool_slice(selection_vec); } int main(int argc, char* argv[]) { From 0805f38b08245d99b4ad9e29e45326b23cebea0c Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Tue, 7 May 2024 16:12:03 -0700 Subject: [PATCH 15/25] maps can have nulls + print if array/map has null in schema.h --- ffi/examples/read-table/schema.h | 9 +++++++-- ffi/src/lib.rs | 3 ++- kernel/src/schema.rs | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index 30a247dc..51b865f9 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -108,7 +108,9 @@ void visit_array(void *data, bool contains_null, uintptr_t child_list_id) { SchemaBuilder *builder = data; - char* name_ptr = allocate_name(name); + char* name_ptr = malloc(sizeof(char) * (name.len + 24)); + snprintf(name_ptr, name.len+1, "%s", name.ptr); + snprintf(name_ptr+name.len, 24, " (contains null: %s)", contains_null ? "true" : "false"); #ifdef PRINT_VISITS printf("Asked to visit array, belonging to list %i for %s. Types are in %i\n", sibling_list_id, name_ptr, child_list_id); #endif @@ -118,9 +120,12 @@ void visit_array(void *data, void visit_map(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, + bool value_contains_null, uintptr_t child_list_id) { SchemaBuilder *builder = data; - char* name_ptr = allocate_name(name); + char* name_ptr = malloc(sizeof(char) * (name.len + 24)); + snprintf(name_ptr, name.len+1, "%s", name.ptr); + snprintf(name_ptr+name.len, 24, " (contains null: %s)", value_contains_null ? "true" : "false"); #ifdef PRINT_VISITS printf("Asked to visit map, belonging to list %i for %s. Types are in %i\n", sibling_list_id, name_ptr, child_list_id); #endif diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 0582391e..b60db292 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -680,6 +680,7 @@ pub struct EngineSchemaVisitor { data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice, + value_contains_null: bool, // if this map can contain null values child_list_id: usize, ), @@ -762,7 +763,7 @@ macro_rules! gen_visit_match { } DataType::Map(mt) => { let child_list_id = visit_map_types($visitor, mt); - ($visitor.visit_map)($visitor.data, $sibling_list_id, $name.into(), child_list_id); + ($visitor.visit_map)($visitor.data, $sibling_list_id, $name.into(), mt.value_contains_null, child_list_id); } } }; diff --git a/kernel/src/schema.rs b/kernel/src/schema.rs index cbfb375b..faba56c6 100644 --- a/kernel/src/schema.rs +++ b/kernel/src/schema.rs @@ -261,7 +261,7 @@ pub struct MapType { pub key_type: DataType, /// The type of element used for the value of this map pub value_type: DataType, - /// Denoting whether this array can contain one or more null values + /// Denoting whether this map can contain one or more null values #[serde(default = "default_true")] pub value_contains_null: bool, } From 76b942b4623f3f3d83afc2abe81bbe82a5415179 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Tue, 7 May 2024 17:27:02 -0700 Subject: [PATCH 16/25] cleaner visit macro --- ffi/src/lib.rs | 77 ++++++++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 46 deletions(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index b60db292..b8d850d7 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -742,33 +742,6 @@ pub struct EngineSchemaVisitor { extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), } -macro_rules! gen_visit_match { - ( $match_item: expr, $visitor: ident, $sibling_list_id: ident, $name: ident, $(($prim_type: ident, $visit_fun: ident)), * ) => { - match $match_item { - $( - DataType::Primitive(PrimitiveType::$prim_type) => { - ($visitor.$visit_fun)($visitor.data, $sibling_list_id, $name.into()) - } - )* - DataType::Primitive(PrimitiveType::Decimal(precision, scale)) => { - ($visitor.visit_decimal)($visitor.data, $sibling_list_id, $name.into(), *precision, *scale); - } - DataType::Array(at) => { - let child_list_id = visit_array_item($visitor, at); - ($visitor.visit_array)($visitor.data, $sibling_list_id, $name.into(), at.contains_null, child_list_id); - } - DataType::Struct(s) => { - let child_list_id = visit_struct_fields($visitor, s); - ($visitor.visit_struct)($visitor.data, $sibling_list_id, $name.into(), child_list_id); - } - DataType::Map(mt) => { - let child_list_id = visit_map_types($visitor, mt); - ($visitor.visit_map)($visitor.data, $sibling_list_id, $name.into(), mt.value_contains_null, child_list_id); - } - } - }; -} - /// # Safety /// /// Caller is responsible for passing a valid handle. @@ -807,25 +780,37 @@ pub unsafe extern "C" fn visit_schema( visitor: &EngineSchemaVisitor, sibling_list_id: usize, ) { - // See macro def for how Struct/Map/Array/Decimal are handled - gen_visit_match!( - data_type, - visitor, - sibling_list_id, - name, - (String, visit_string), - (Long, visit_long), - (Integer, visit_integer), - (Short, visit_short), - (Byte, visit_byte), - (Float, visit_float), - (Double, visit_double), - (Boolean, visit_boolean), - (Binary, visit_binary), - (Date, visit_date), - (Timestamp, visit_timestamp), - (TimestampNtz, visit_timestamp_ntz) - ); + macro_rules! call { + ( $visitor_fn:ident $(, $extra_args:expr) *) => { + (visitor.$visitor_fn)(visitor.data, sibling_list_id, name.into() $(, $extra_args) *) + }; + } + match data_type { + DataType::Struct(st) => call!(visit_struct, visit_struct_fields(visitor, st)), + DataType::Map(mt) => call!( + visit_map, + mt.value_contains_null, + visit_map_types(visitor, mt) + ), + DataType::Array(at) => { + call!(visit_array, at.contains_null, visit_array_item(visitor, at)); + } + DataType::Primitive(PrimitiveType::Decimal(precision, scale)) => { + call!(visit_decimal, *precision, *scale); + } + &DataType::STRING => call!(visit_string), + &DataType::LONG => call!(visit_long), + &DataType::INTEGER => call!(visit_integer), + &DataType::SHORT => call!(visit_short), + &DataType::BYTE => call!(visit_byte), + &DataType::FLOAT => call!(visit_float), + &DataType::DOUBLE => call!(visit_double), + &DataType::BOOLEAN => call!(visit_boolean), + &DataType::BINARY => call!(visit_binary), + &DataType::DATE => call!(visit_date), + &DataType::TIMESTAMP => call!(visit_timestamp), + &DataType::TIMESTAMP_NTZ => call!(visit_timestamp_ntz), + } } visit_struct_fields(visitor, snapshot.schema()) From 245da8af136e19fd4d93a981bc2ba5918a1a2775 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Tue, 7 May 2024 17:53:44 -0700 Subject: [PATCH 17/25] comment update --- ffi/src/lib.rs | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index b8d850d7..fb6f8570 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -624,26 +624,26 @@ pub unsafe extern "C" fn version(snapshot: *const SnapshotHandle) -> u64 { /// representation of a schema from a particular schema within kernel. /// /// The model is list based. When the kernel needs a list, it will ask engine to allocate one of a -/// particular size. Once allocated the engine returns an `id`, which can be anything the engine -/// wants, and will be passed back to the engine to identify the list in the future. +/// particular size. Once allocated the engine returns an `id`, which can be any identifier the +/// engine wants, and will be passed back to the engine to identify the list in the future. /// -/// The top level of a schema is always a struct. A schema can be visited by calling -/// [`visit_schema`] and passing a [`Snapshot`] and an `EngineSchemaVisitor`. As kernel traverses -/// its schema it will do the following at each node: -/// -/// - For a struct, map, or array type: -/// 1. Ask engine to allocate a "child" list, indicating how many items will be needed. -/// 2. For a struct, visit each child, with `sibling_list_id` set to the just allocated list. -/// 3. For a map, visit the key type and the value type with `sibling_list_id` set to the just -/// allocated list. -/// 4. For an array, visit the item type with `sibling_list_id` set to the just allocated list. -/// 5. Finally call `visit_[struct/array/map]` with `child_list_id` set to the list allocated -/// in step 1. This has enough information to build the entire type on the engine side. -/// -/// - For a simple type simply call `visit_[type]`, with `sibling_list_id` set to the `id` of the -/// list the type should be added to. -/// -/// [`visit_schema`] will return the id of the list associated with the top level struct +/// Every schema element the kernel visits belongs to some list of "sibling" elements. The schema +/// itself is a list of schema elements, and every complex type (struct, map, array) contains a list +/// of "child" elements. +/// 1. Before visiting schema or any complex type, the kernel asks the engine to allocate a list to +/// hold its children +/// 2. When visiting any schema element, the kernel passes its parent's "child list" as the +/// "sibling list" the element should be appended to: +/// - For the top-level schema, visit each top-level column, passing the column's name and type +/// - For a struct, first visit each struct field, passing the field's name, type, nullability, +/// and metadata +/// - For a map, visit the key and value, passing its special name ("key" or "value"), type, +/// and value nullability (keys are never nullable) +/// - For a list, visit the element, passing its special name ("element"), type, and +/// nullability +/// 3. When visiting a complex schema element, the kernel also passes the "child list" containing +/// that element's (already-visited) children. +/// 4. The [`visit_schema`] method returns the id of the list of top-level columns // WARNING: the visitor MUST NOT retain internal references to the string slices passed to visitor methods // TODO: nullability #[repr(C)] @@ -742,9 +742,14 @@ pub struct EngineSchemaVisitor { extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), } +/// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the +/// documentation of [`EngineSchemaVisitor`] for a description of how this visitor works. +/// +/// This method returns the id of the list allocated to hold the top level schema columns. +/// /// # Safety /// -/// Caller is responsible for passing a valid handle. +/// Caller is responsible for passing a valid snapshot handle and schema visitor. #[no_mangle] pub unsafe extern "C" fn visit_schema( snapshot: *const SnapshotHandle, From d479631f5b47a2cd9ddcfb2d664765963a8946a7 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Tue, 7 May 2024 17:54:19 -0700 Subject: [PATCH 18/25] clearer todo --- ffi/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index fb6f8570..06fbc5d2 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -645,7 +645,7 @@ pub unsafe extern "C" fn version(snapshot: *const SnapshotHandle) -> u64 { /// that element's (already-visited) children. /// 4. The [`visit_schema`] method returns the id of the list of top-level columns // WARNING: the visitor MUST NOT retain internal references to the string slices passed to visitor methods -// TODO: nullability +// TODO: struct nullability and field metadata #[repr(C)] pub struct EngineSchemaVisitor { /// opaque state pointer From 872b0e777ed71fc1b9e0b562e71522d9437a8fd9 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Wed, 8 May 2024 13:05:26 -0700 Subject: [PATCH 19/25] fix schema printing for deep nesting --- ffi/examples/read-table/schema.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index 51b865f9..fa536717 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -51,13 +51,13 @@ SchemaItem* add_to_list(SchemaItemList *list, char* name, char* type) { } // print out all items in a list, recursing into any children they may have -void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, bool parent_on_last) { +void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, int parents_on_last) { SchemaItemList *list = builder->lists+list_id; for (int i = 0; i < list->len; i++) { bool is_last = i == list->len - 1; for (int j = 0; j < indent; j++) { - if (parent_on_last && j == indent - 1) { - // don't print a dangling | on my parent's last item + if ((indent - parents_on_last) <= j) { + // don't print a dangling | on any parents that are on their last item printf(" "); } else { printf("│ "); @@ -67,7 +67,7 @@ void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, bool pare char* prefix = is_last? "└" : "├"; printf("%s─ %s: %s\n", prefix, item->name, item->type); if (list->list[i].children != UINTPTR_MAX) { - print_list(builder, list->list[i].children, indent+1, is_last); + print_list(builder, list->list[i].children, indent+1, parents_on_last + is_last); } } } @@ -226,7 +226,7 @@ void print_schema(const SnapshotHandle *snapshot) { printf("Schema returned in list %i\n", schema_list_id); #endif printf("Schema:\n"); - print_list(&builder, schema_list_id, 0, false); + print_list(&builder, schema_list_id, 0, 0); printf("\n"); free_builder(builder); } From 47abc393efda23e435bbc28f5e36be2e31f8fbea Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Wed, 8 May 2024 13:12:07 -0700 Subject: [PATCH 20/25] use schema appropriate names --- ffi/src/lib.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 06fbc5d2..637a58c0 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -637,9 +637,9 @@ pub unsafe extern "C" fn version(snapshot: *const SnapshotHandle) -> u64 { /// - For the top-level schema, visit each top-level column, passing the column's name and type /// - For a struct, first visit each struct field, passing the field's name, type, nullability, /// and metadata -/// - For a map, visit the key and value, passing its special name ("key" or "value"), type, -/// and value nullability (keys are never nullable) -/// - For a list, visit the element, passing its special name ("element"), type, and +/// - For a map, visit the key and value, passing its special name ("map_key" or "map_value"), +/// type, and value nullability (keys are never nullable) +/// - For a list, visit the element, passing its special name ("array_element"), type, and /// nullability /// 3. When visiting a complex schema element, the kernel also passes the "child list" containing /// that element's (already-visited) children. @@ -767,14 +767,14 @@ pub unsafe extern "C" fn visit_schema( fn visit_array_item(visitor: &EngineSchemaVisitor, at: &ArrayType) -> usize { let child_list_id = (visitor.make_field_list)(visitor.data, 1); - visit_schema_item(&at.element_type, "array_data_type", visitor, child_list_id); + visit_schema_item(&at.element_type, "array_element", visitor, child_list_id); child_list_id } fn visit_map_types(visitor: &EngineSchemaVisitor, mt: &MapType) -> usize { let child_list_id = (visitor.make_field_list)(visitor.data, 2); - visit_schema_item(&mt.key_type, "map_key_type", visitor, child_list_id); - visit_schema_item(&mt.value_type, "map_value_type", visitor, child_list_id); + visit_schema_item(&mt.key_type, "map_key", visitor, child_list_id); + visit_schema_item(&mt.value_type, "map_value", visitor, child_list_id); child_list_id } From 93fc17c56c8b50e261d89095aafa0310bf036c7e Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Fri, 10 May 2024 11:36:42 -0700 Subject: [PATCH 21/25] macros are fun --- ffi/examples/read-table/schema.h | 36 +++++++++++++++++--------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index fa536717..f76ed576 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -16,7 +16,19 @@ // If you want the visitor to print out what it's being asked to do at each step, uncomment the // following line -//#define PRINT_VISITS +//#define VERBOSE + +#ifdef VERBOSE +#define _NTH_ARG(_1, _2, _3, _4, _5, N, ...) N +#define NUMARGS(...) _NTH_ARG(__VA_ARGS__, 5, 4, 3, 2, 1) +#define CHILD_FMT "Asked to visit %s named %s belonging to list %i. %s are in %i.\n" +#define NO_CHILD_FMT "Asked to visit %s named %s belonging to list %i.\n" +#define PRINT_VISIT(...) NUMARGS(__VA_ARGS__) == 5?\ + printf(CHILD_FMT, __VA_ARGS__): \ + printf(NO_CHILD_FMT, __VA_ARGS__) +#else +#define PRINT_VISIT(...) +#endif typedef struct SchemaItemList SchemaItemList; @@ -76,7 +88,7 @@ void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, int paren uintptr_t make_field_list(void *data, uintptr_t reserve) { SchemaBuilder *builder = data; int id = builder->list_count; -#ifdef PRINT_VISITS +#ifdef VERBOSE printf("Making a list of lenth %i with id %i\n", reserve, id); #endif builder->list_count++; @@ -96,9 +108,7 @@ void visit_struct(void *data, uintptr_t child_list_id) { SchemaBuilder *builder = data; char* name_ptr = allocate_name(name); -#ifdef PRINT_VISITS - printf("Asked to visit a struct, belonging to list %i for %s. Children are in %i\n", sibling_list_id, name_ptr, child_list_id); -#endif + PRINT_VISIT("struct", name_ptr, sibling_list_id, "Children", child_list_id); SchemaItem* struct_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "struct"); struct_item->children = child_list_id; } @@ -111,9 +121,7 @@ void visit_array(void *data, char* name_ptr = malloc(sizeof(char) * (name.len + 24)); snprintf(name_ptr, name.len+1, "%s", name.ptr); snprintf(name_ptr+name.len, 24, " (contains null: %s)", contains_null ? "true" : "false"); -#ifdef PRINT_VISITS - printf("Asked to visit array, belonging to list %i for %s. Types are in %i\n", sibling_list_id, name_ptr, child_list_id); -#endif + PRINT_VISIT("array", name_ptr, sibling_list_id, "Types", child_list_id); SchemaItem* array_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "array"); array_item->children = child_list_id; } @@ -126,9 +134,7 @@ void visit_map(void *data, char* name_ptr = malloc(sizeof(char) * (name.len + 24)); snprintf(name_ptr, name.len+1, "%s", name.ptr); snprintf(name_ptr+name.len, 24, " (contains null: %s)", value_contains_null ? "true" : "false"); -#ifdef PRINT_VISITS - printf("Asked to visit map, belonging to list %i for %s. Types are in %i\n", sibling_list_id, name_ptr, child_list_id); -#endif + PRINT_VISIT("map", name_ptr, sibling_list_id, "Types", child_list_id); SchemaItem* map_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "map"); map_item->children = child_list_id; } @@ -138,13 +144,11 @@ void visit_decimal(void *data, struct KernelStringSlice name, uint8_t precision, int8_t scale) { -#ifdef PRINT_VISITS - printf("Asked to visit decimal with precision %i and scale %i, belonging to list %i\n", sibling_list_id); -#endif SchemaBuilder *builder = data; char* name_ptr = allocate_name(name); char* type = malloc(16 * sizeof(char)); snprintf(type, 16, "decimal(%i)(%i)", precision, scale); + PRINT_VISIT(type, name_ptr, sibling_list_id); add_to_list(builder->lists+sibling_list_id, name_ptr, type); } @@ -153,9 +157,7 @@ void visit_decimal(void *data, void visit_simple_type(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, char* type) { SchemaBuilder *builder = data; char* name_ptr = allocate_name(name); -#ifdef PRINT_VISITS - printf("Asked to visit a(n) %s belonging to list %i for %s\n", type, sibling_list_id, name_ptr); -#endif + PRINT_VISIT(type, name_ptr, sibling_list_id); add_to_list(builder->lists+sibling_list_id, name_ptr, type); } From 711b16b402049d10426d9353d534813896c738ac Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Fri, 10 May 2024 11:44:40 -0700 Subject: [PATCH 22/25] Apply suggestions from code review Co-authored-by: Ryan Johnson --- ffi/examples/read-table/schema.h | 2 +- ffi/src/lib.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index f76ed576..4b440ed8 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -64,7 +64,7 @@ SchemaItem* add_to_list(SchemaItemList *list, char* name, char* type) { // print out all items in a list, recursing into any children they may have void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, int parents_on_last) { - SchemaItemList *list = builder->lists+list_id; + SchemaItemList *list = builder->lists + list_id; for (int i = 0; i < list->len; i++) { bool is_last = i == list->len - 1; for (int j = 0; j < indent; j++) { diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index c4c64db3..0d3b22ad 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -655,7 +655,7 @@ pub struct EngineSchemaVisitor { // visitor methods that should instantiate and append the appropriate type to the field list /// Indicate that the schema contains a `Struct` type. The top level of a Schema is always a - /// `Struct`. The children of the `Struct` are in the list identified by `child_list_id`. + /// `Struct`. The fields of the `Struct` are in the list identified by `child_list_id`. pub visit_struct: extern "C" fn( data: *mut c_void, sibling_list_id: usize, @@ -664,7 +664,7 @@ pub struct EngineSchemaVisitor { ), /// Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list - /// with the schema type for each item of the array + /// with the array's element type pub visit_array: extern "C" fn( data: *mut c_void, sibling_list_id: usize, @@ -674,8 +674,8 @@ pub struct EngineSchemaVisitor { ), /// Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list - /// where the first element is the schema type the keys of the map, and the second element is the - /// schema type of the values of the map + /// where the first element is the map's key type and the second element is the + /// map's value type pub visit_map: extern "C" fn( data: *mut c_void, sibling_list_id: usize, From 953c3e840617d7ac731f65f5c89d54091add7bff Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Fri, 10 May 2024 11:44:49 -0700 Subject: [PATCH 23/25] doc update --- ffi/src/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 0d3b22ad..def9e665 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -624,8 +624,9 @@ pub unsafe extern "C" fn version(snapshot: *const SnapshotHandle) -> u64 { /// representation of a schema from a particular schema within kernel. /// /// The model is list based. When the kernel needs a list, it will ask engine to allocate one of a -/// particular size. Once allocated the engine returns an `id`, which can be any identifier the -/// engine wants, and will be passed back to the engine to identify the list in the future. +/// particular size. Once allocated the engine returns an `id`, which can be any integer identifier +/// ([`usize`]) the engine wants, and will be passed back to the engine to identify the list in the +/// future. /// /// Every schema element the kernel visits belongs to some list of "sibling" elements. The schema /// itself is a list of schema elements, and every complex type (struct, map, array) contains a list From 2498d10f12a948cd6e6e17f2e80e0a500d15ce66 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Fri, 10 May 2024 11:53:17 -0700 Subject: [PATCH 24/25] slightly more consistent formatting --- ffi/src/lib.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index def9e665..d22f7f63 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -793,16 +793,18 @@ pub unsafe extern "C" fn visit_schema( } match data_type { DataType::Struct(st) => call!(visit_struct, visit_struct_fields(visitor, st)), - DataType::Map(mt) => call!( - visit_map, - mt.value_contains_null, - visit_map_types(visitor, mt) - ), + DataType::Map(mt) => { + call!( + visit_map, + mt.value_contains_null, + visit_map_types(visitor, mt) + ) + } DataType::Array(at) => { - call!(visit_array, at.contains_null, visit_array_item(visitor, at)); + call!(visit_array, at.contains_null, visit_array_item(visitor, at)) } DataType::Primitive(PrimitiveType::Decimal(precision, scale)) => { - call!(visit_decimal, *precision, *scale); + call!(visit_decimal, *precision, *scale) } &DataType::STRING => call!(visit_string), &DataType::LONG => call!(visit_long), From 8cdf7f8c83f6cf79b15a1d7c1a666930857e9340 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Fri, 10 May 2024 11:56:07 -0700 Subject: [PATCH 25/25] more consistent array indexing --- ffi/examples/read-table/schema.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index 4b440ed8..d6e8cabd 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -59,12 +59,12 @@ SchemaItem* add_to_list(SchemaItemList *list, char* name, char* type) { list->list[idx].name = name; list->list[idx].type = type; list->len++; - return list->list+idx; + return &list->list[idx]; } // print out all items in a list, recursing into any children they may have void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, int parents_on_last) { - SchemaItemList *list = builder->lists + list_id; + SchemaItemList *list = &builder->lists[list_id]; for (int i = 0; i < list->len; i++) { bool is_last = i == list->len - 1; for (int j = 0; j < indent; j++) { @@ -109,7 +109,7 @@ void visit_struct(void *data, SchemaBuilder *builder = data; char* name_ptr = allocate_name(name); PRINT_VISIT("struct", name_ptr, sibling_list_id, "Children", child_list_id); - SchemaItem* struct_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "struct"); + SchemaItem* struct_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "struct"); struct_item->children = child_list_id; } void visit_array(void *data, @@ -122,7 +122,7 @@ void visit_array(void *data, snprintf(name_ptr, name.len+1, "%s", name.ptr); snprintf(name_ptr+name.len, 24, " (contains null: %s)", contains_null ? "true" : "false"); PRINT_VISIT("array", name_ptr, sibling_list_id, "Types", child_list_id); - SchemaItem* array_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "array"); + SchemaItem* array_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "array"); array_item->children = child_list_id; } void visit_map(void *data, @@ -135,7 +135,7 @@ void visit_map(void *data, snprintf(name_ptr, name.len+1, "%s", name.ptr); snprintf(name_ptr+name.len, 24, " (contains null: %s)", value_contains_null ? "true" : "false"); PRINT_VISIT("map", name_ptr, sibling_list_id, "Types", child_list_id); - SchemaItem* map_item = add_to_list(builder->lists+sibling_list_id, name_ptr, "map"); + SchemaItem* map_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "map"); map_item->children = child_list_id; } @@ -149,7 +149,7 @@ void visit_decimal(void *data, char* type = malloc(16 * sizeof(char)); snprintf(type, 16, "decimal(%i)(%i)", precision, scale); PRINT_VISIT(type, name_ptr, sibling_list_id); - add_to_list(builder->lists+sibling_list_id, name_ptr, type); + add_to_list(&builder->lists[sibling_list_id], name_ptr, type); } @@ -158,7 +158,7 @@ void visit_simple_type(void *data, uintptr_t sibling_list_id, struct KernelStrin SchemaBuilder *builder = data; char* name_ptr = allocate_name(name); PRINT_VISIT(type, name_ptr, sibling_list_id); - add_to_list(builder->lists+sibling_list_id, name_ptr, type); + add_to_list(&builder->lists[sibling_list_id], name_ptr, type); } #define DEFINE_VISIT_SIMPLE_TYPE(typename) \