From 663585d1940b01ecddf5edabb63cb70df7bc7add Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Fri, 10 May 2024 12:05:59 -0700 Subject: [PATCH] Add remaining data types to schema visiting in ffi (#187) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds more types, and uses a macro to reduce repetition for the primitive ones. Also adds some rust docs to try and explain how the visitors for schema work. Tested by adding a visitor to the `read_table` program that builds and prints the schema. Output: ``` .$ /read_table file:///home/nick/databricks/delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/nested_types/delta/ Reading table at file:///home/nick/databricks/delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/nested_types/delta/ version: 0 Schema: ├─ pk: integer ├─ struct: struct │ ├─ float64: double │ └─ bool: boolean ├─ array: array │ └─ array_data_type: short └─ map: map ├─ map_key_type: string └─ map_value_type: integer Got some data Of this data, here is a selection vector sel[0] = 0 sel[1] = 0 sel[2] = 0 sel[3] = 1 called back to actually read! path: part-00000-3b8ca3f0-6444-4ed5-8961-c605cba95bf1-c000.snappy.parquet No selection vector for this call no partition here Iterator done All done ``` --------- Co-authored-by: Ryan Johnson --- ffi/cbindgen.toml | 2 + ffi/examples/read-table/read_table.c | 13 +- ffi/examples/read-table/schema.h | 234 +++++++++++++++++++++++++++ ffi/src/lib.rs | 213 ++++++++++++++++++++---- ffi/src/scan.rs | 8 +- kernel/src/schema.rs | 2 +- 6 files changed, 427 insertions(+), 45 deletions(-) create mode 100644 ffi/examples/read-table/schema.h diff --git a/ffi/cbindgen.toml b/ffi/cbindgen.toml index 06b88117..943fd4d5 100644 --- a/ffi/cbindgen.toml +++ b/ffi/cbindgen.toml @@ -4,6 +4,8 @@ # default to generating c bindings language = "C" +pragma_once = true + # only applies to Cxx namespace = "ffi" diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index ecb4e5bd..d110d055 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -1,6 +1,8 @@ #include #include + #include "delta_kernel_ffi.h" +#include "schema.h" #ifdef PRINT_ARROW_DATA #include "arrow.h" @@ -65,13 +67,13 @@ void visit_callback(void* engine_context, const KernelStringSlice path, long siz return; } KernelBoolSlice selection_vector = selection_vector_res.ok; - if (selection_vector) { + if (selection_vector.len > 0) { printf(" Selection vector:\n"); - print_selection_vector(" ", selection_vector); - drop_bool_slice(selection_vector); + print_selection_vector(" ", &selection_vector); } else { printf(" No selection vector for this call\n"); } + drop_bool_slice(selection_vector); // normally this would be picked out of the schema char* letter_key = "letter"; KernelStringSlice key = {letter_key, strlen(letter_key)}; @@ -89,7 +91,7 @@ void visit_data(void *engine_context, EngineDataHandle *engine_data, KernelBoolS printf(" Of this data, here is a selection vector\n"); print_selection_vector(" ", &selection_vec); visit_scan_data(engine_data, selection_vec, engine_context, visit_callback); - drop_bool_slice(&selection_vec); + drop_bool_slice(selection_vec); } int main(int argc, char* argv[]) { @@ -145,7 +147,8 @@ int main(int argc, char* argv[]) { const SnapshotHandle *snapshot_handle = snapshot_handle_res.ok; uint64_t v = version(snapshot_handle); - printf("version: %llu\n", v); + printf("version: %llu\n\n", v); + print_schema(snapshot_handle); ExternResultScan scan_res = scan(snapshot_handle, engine, NULL); if (scan_res.tag != OkScan) { diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h new file mode 100644 index 00000000..d6e8cabd --- /dev/null +++ b/ffi/examples/read-table/schema.h @@ -0,0 +1,234 @@ +#include +#include "delta_kernel_ffi.h" + +/** + * This module defines a very simple model of a schema, used only to be able to print the schema of + * a table. It consists of a "SchemaBuilder" which is our user data that gets passed into each visit_x + * call. This simply keeps track of all the lists we are asked to allocate. + * + * Each list is a "SchemaItemList", which tracks its length an an array of "SchemaItem"s. + * + * Each "SchemaItem" has a name and a type, which are just strings. It can also have a list which is + * its children. This is initially always UINTPTR_MAX, but when visiting a struct, map, or array, we + * point this at the list id specified in the callback, which allows us to traverse the schema when + * printing it. + */ + +// If you want the visitor to print out what it's being asked to do at each step, uncomment the +// following line +//#define VERBOSE + +#ifdef VERBOSE +#define _NTH_ARG(_1, _2, _3, _4, _5, N, ...) N +#define NUMARGS(...) _NTH_ARG(__VA_ARGS__, 5, 4, 3, 2, 1) +#define CHILD_FMT "Asked to visit %s named %s belonging to list %i. %s are in %i.\n" +#define NO_CHILD_FMT "Asked to visit %s named %s belonging to list %i.\n" +#define PRINT_VISIT(...) NUMARGS(__VA_ARGS__) == 5?\ + printf(CHILD_FMT, __VA_ARGS__): \ + printf(NO_CHILD_FMT, __VA_ARGS__) +#else +#define PRINT_VISIT(...) +#endif + +typedef struct SchemaItemList SchemaItemList; + +typedef struct { + char* name; + char* type; + uintptr_t children; +} SchemaItem; + +typedef struct SchemaItemList { + uint32_t len; + SchemaItem* list; +} SchemaItemList; + +typedef struct { + int list_count; + SchemaItemList* lists; +} SchemaBuilder; + +char* allocate_name(const KernelStringSlice slice) { + return strndup(slice.ptr, slice.len); +} + +// lists are preallocated to have exactly enough space, so we just fill in the next open slot and +// increment our length +SchemaItem* add_to_list(SchemaItemList *list, char* name, char* type) { + int idx = list->len; + list->list[idx].name = name; + list->list[idx].type = type; + list->len++; + return &list->list[idx]; +} + +// print out all items in a list, recursing into any children they may have +void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, int parents_on_last) { + SchemaItemList *list = &builder->lists[list_id]; + for (int i = 0; i < list->len; i++) { + bool is_last = i == list->len - 1; + for (int j = 0; j < indent; j++) { + if ((indent - parents_on_last) <= j) { + // don't print a dangling | on any parents that are on their last item + printf(" "); + } else { + printf("│ "); + } + } + SchemaItem* item = &list->list[i]; + char* prefix = is_last? "└" : "├"; + printf("%s─ %s: %s\n", prefix, item->name, item->type); + if (list->list[i].children != UINTPTR_MAX) { + print_list(builder, list->list[i].children, indent+1, parents_on_last + is_last); + } + } +} + +// declare all our visitor methods +uintptr_t make_field_list(void *data, uintptr_t reserve) { + SchemaBuilder *builder = data; + int id = builder->list_count; +#ifdef VERBOSE + printf("Making a list of lenth %i with id %i\n", reserve, id); +#endif + builder->list_count++; + builder->lists = realloc(builder->lists, sizeof(SchemaItemList) * builder->list_count); + SchemaItem* list = calloc(reserve, sizeof(SchemaItem)); + for (int i = 0; i < reserve; i++) { + list[i].children = UINTPTR_MAX; + } + builder->lists[id].len = 0; + builder->lists[id].list = list; + return id; +} + +void visit_struct(void *data, + uintptr_t sibling_list_id, + struct KernelStringSlice name, + uintptr_t child_list_id) { + SchemaBuilder *builder = data; + char* name_ptr = allocate_name(name); + PRINT_VISIT("struct", name_ptr, sibling_list_id, "Children", child_list_id); + SchemaItem* struct_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "struct"); + struct_item->children = child_list_id; +} +void visit_array(void *data, + uintptr_t sibling_list_id, + struct KernelStringSlice name, + bool contains_null, + uintptr_t child_list_id) { + SchemaBuilder *builder = data; + char* name_ptr = malloc(sizeof(char) * (name.len + 24)); + snprintf(name_ptr, name.len+1, "%s", name.ptr); + snprintf(name_ptr+name.len, 24, " (contains null: %s)", contains_null ? "true" : "false"); + PRINT_VISIT("array", name_ptr, sibling_list_id, "Types", child_list_id); + SchemaItem* array_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "array"); + array_item->children = child_list_id; +} +void visit_map(void *data, + uintptr_t sibling_list_id, + struct KernelStringSlice name, + bool value_contains_null, + uintptr_t child_list_id) { + SchemaBuilder *builder = data; + char* name_ptr = malloc(sizeof(char) * (name.len + 24)); + snprintf(name_ptr, name.len+1, "%s", name.ptr); + snprintf(name_ptr+name.len, 24, " (contains null: %s)", value_contains_null ? "true" : "false"); + PRINT_VISIT("map", name_ptr, sibling_list_id, "Types", child_list_id); + SchemaItem* map_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "map"); + map_item->children = child_list_id; +} + +void visit_decimal(void *data, + uintptr_t sibling_list_id, + struct KernelStringSlice name, + uint8_t precision, + int8_t scale) { + SchemaBuilder *builder = data; + char* name_ptr = allocate_name(name); + char* type = malloc(16 * sizeof(char)); + snprintf(type, 16, "decimal(%i)(%i)", precision, scale); + PRINT_VISIT(type, name_ptr, sibling_list_id); + add_to_list(&builder->lists[sibling_list_id], name_ptr, type); +} + + + +void visit_simple_type(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name, char* type) { + SchemaBuilder *builder = data; + char* name_ptr = allocate_name(name); + PRINT_VISIT(type, name_ptr, sibling_list_id); + add_to_list(&builder->lists[sibling_list_id], name_ptr, type); +} + +#define DEFINE_VISIT_SIMPLE_TYPE(typename) \ + void visit_##typename(void *data, uintptr_t sibling_list_id, struct KernelStringSlice name) { \ + visit_simple_type(data, sibling_list_id, name, #typename); \ + } + +DEFINE_VISIT_SIMPLE_TYPE(string); +DEFINE_VISIT_SIMPLE_TYPE(integer); +DEFINE_VISIT_SIMPLE_TYPE(short); +DEFINE_VISIT_SIMPLE_TYPE(byte); +DEFINE_VISIT_SIMPLE_TYPE(long); +DEFINE_VISIT_SIMPLE_TYPE(float); +DEFINE_VISIT_SIMPLE_TYPE(double); +DEFINE_VISIT_SIMPLE_TYPE(boolean); +DEFINE_VISIT_SIMPLE_TYPE(binary); +DEFINE_VISIT_SIMPLE_TYPE(date); +DEFINE_VISIT_SIMPLE_TYPE(timestamp); +DEFINE_VISIT_SIMPLE_TYPE(timestamp_ntz); + +// free all the data in the builder (but not the builder itself, it's stack allocated) +void free_builder(SchemaBuilder builder) { + for (int i = 0; i < builder.list_count; i++) { + SchemaItemList *list = (builder.lists)+i; + for (int j = 0; j < list->len; j++) { + SchemaItem *item = list->list+j; + free(item->name); + // don't free item->type, those are static strings + if (!strncmp(item->type, "decimal", 7)) { + // except decimal types, we malloc'd those :) + free(item->type); + } + } + free(list->list); // free all the items in this list (we alloc'd them together) + } + free(builder.lists); +} + +// Print the schema of the snapshot +void print_schema(const SnapshotHandle *snapshot) { + SchemaBuilder builder = { + .list_count = 0, + .lists = calloc(0, sizeof(SchemaItem*)), + }; + EngineSchemaVisitor visitor = { + .data = &builder, + .make_field_list = make_field_list, + .visit_struct = visit_struct, + .visit_array = visit_array, + .visit_map = visit_map, + .visit_decimal = visit_decimal, + .visit_string = visit_string, + .visit_long = visit_long, + .visit_integer = visit_integer, + .visit_short = visit_short, + .visit_byte = visit_byte, + .visit_float = visit_float, + .visit_double = visit_double, + .visit_boolean = visit_boolean, + .visit_binary = visit_binary, + .visit_date = visit_date, + .visit_timestamp = visit_timestamp, + .visit_timestamp_ntz = visit_timestamp_ntz + }; + uintptr_t schema_list_id = visit_schema(snapshot, &visitor); +#ifdef PRINT_VISITS + printf("Schema returned in list %i\n", schema_list_id); +#endif + printf("Schema:\n"); + print_list(&builder, schema_list_id, 0, 0); + printf("\n"); + free_builder(builder); +} diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 5b874921..d22f7f63 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -11,7 +11,7 @@ use tracing::debug; use url::Url; use delta_kernel::expressions::{BinaryOperator, Expression, Scalar}; -use delta_kernel::schema::{DataType, PrimitiveType, StructField, StructType}; +use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructType}; use delta_kernel::snapshot::Snapshot; use delta_kernel::{DeltaResult, Engine, Error}; @@ -438,7 +438,7 @@ unsafe fn unwrap_and_parse_path_as_url(path: KernelStringSlice) -> DeltaResult u64 { snapshot.version() } +/// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own +/// representation of a schema from a particular schema within kernel. +/// +/// The model is list based. When the kernel needs a list, it will ask engine to allocate one of a +/// particular size. Once allocated the engine returns an `id`, which can be any integer identifier +/// ([`usize`]) the engine wants, and will be passed back to the engine to identify the list in the +/// future. +/// +/// Every schema element the kernel visits belongs to some list of "sibling" elements. The schema +/// itself is a list of schema elements, and every complex type (struct, map, array) contains a list +/// of "child" elements. +/// 1. Before visiting schema or any complex type, the kernel asks the engine to allocate a list to +/// hold its children +/// 2. When visiting any schema element, the kernel passes its parent's "child list" as the +/// "sibling list" the element should be appended to: +/// - For the top-level schema, visit each top-level column, passing the column's name and type +/// - For a struct, first visit each struct field, passing the field's name, type, nullability, +/// and metadata +/// - For a map, visit the key and value, passing its special name ("map_key" or "map_value"), +/// type, and value nullability (keys are never nullable) +/// - For a list, visit the element, passing its special name ("array_element"), type, and +/// nullability +/// 3. When visiting a complex schema element, the kernel also passes the "child list" containing +/// that element's (already-visited) children. +/// 4. The [`visit_schema`] method returns the id of the list of top-level columns // WARNING: the visitor MUST NOT retain internal references to the string slices passed to visitor methods -// TODO: other types, nullability +// TODO: struct nullability and field metadata #[repr(C)] pub struct EngineSchemaVisitor { - // opaque state pointer - data: *mut c_void, - // Creates a new field list, optionally reserving capacity up front - make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, + /// opaque state pointer + pub data: *mut c_void, + /// Creates a new field list, optionally reserving capacity up front + pub make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, + // visitor methods that should instantiate and append the appropriate type to the field list - visit_struct: extern "C" fn( + /// Indicate that the schema contains a `Struct` type. The top level of a Schema is always a + /// `Struct`. The fields of the `Struct` are in the list identified by `child_list_id`. + pub visit_struct: extern "C" fn( + data: *mut c_void, + sibling_list_id: usize, + name: KernelStringSlice, + child_list_id: usize, + ), + + /// Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list + /// with the array's element type + pub visit_array: extern "C" fn( + data: *mut c_void, + sibling_list_id: usize, + name: KernelStringSlice, + contains_null: bool, // if this array can contain null values + child_list_id: usize, + ), + + /// Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list + /// where the first element is the map's key type and the second element is the + /// map's value type + pub visit_map: extern "C" fn( data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice, + value_contains_null: bool, // if this map can contain null values child_list_id: usize, ), - visit_string: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_integer: + + /// visit a `decimal` with the specified `precision` and `scale` + pub visit_decimal: extern "C" fn( + data: *mut c_void, + sibling_list_id: usize, + name: KernelStringSlice, + precision: u8, + scale: i8, + ), + + /// Visit a `string` belonging to the list identified by `sibling_list_id`. + pub visit_string: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `long` belonging to the list identified by `sibling_list_id`. + pub visit_long: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit an `integer` belonging to the list identified by `sibling_list_id`. + pub visit_integer: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `short` belonging to the list identified by `sibling_list_id`. + pub visit_short: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `byte` belonging to the list identified by `sibling_list_id`. + pub visit_byte: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `float` belonging to the list identified by `sibling_list_id`. + pub visit_float: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `double` belonging to the list identified by `sibling_list_id`. + pub visit_double: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. + pub visit_boolean: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit `binary` belonging to the list identified by `sibling_list_id`. + pub visit_binary: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `date` belonging to the list identified by `sibling_list_id`. + pub visit_date: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `timestamp` belonging to the list identified by `sibling_list_id`. + pub visit_timestamp: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + + /// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`. + pub visit_timestamp_ntz: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), - visit_long: extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), } +/// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the +/// documentation of [`EngineSchemaVisitor`] for a description of how this visitor works. +/// +/// This method returns the id of the list allocated to hold the top level schema columns. +/// /// # Safety /// -/// Caller is responsible for passing a valid handle. +/// Caller is responsible for passing a valid snapshot handle and schema visitor. #[no_mangle] pub unsafe extern "C" fn visit_schema( snapshot: *const SnapshotHandle, @@ -654,29 +761,63 @@ pub unsafe extern "C" fn visit_schema( fn visit_struct_fields(visitor: &EngineSchemaVisitor, s: &StructType) -> usize { let child_list_id = (visitor.make_field_list)(visitor.data, s.fields.len()); for field in s.fields() { - visit_field(visitor, child_list_id, field); + visit_schema_item(field.data_type(), field.name(), visitor, child_list_id); } child_list_id } + fn visit_array_item(visitor: &EngineSchemaVisitor, at: &ArrayType) -> usize { + let child_list_id = (visitor.make_field_list)(visitor.data, 1); + visit_schema_item(&at.element_type, "array_element", visitor, child_list_id); + child_list_id + } + + fn visit_map_types(visitor: &EngineSchemaVisitor, mt: &MapType) -> usize { + let child_list_id = (visitor.make_field_list)(visitor.data, 2); + visit_schema_item(&mt.key_type, "map_key", visitor, child_list_id); + visit_schema_item(&mt.value_type, "map_value", visitor, child_list_id); + child_list_id + } + // Visit a struct field (recursively) and add the result to the list of siblings. - fn visit_field(visitor: &EngineSchemaVisitor, sibling_list_id: usize, field: &StructField) { - let name: &str = field.name.as_ref(); - match &field.data_type { - DataType::Primitive(PrimitiveType::Integer) => { - (visitor.visit_integer)(visitor.data, sibling_list_id, name.into()) - } - DataType::Primitive(PrimitiveType::Long) => { - (visitor.visit_long)(visitor.data, sibling_list_id, name.into()) + fn visit_schema_item( + data_type: &DataType, + name: &str, + visitor: &EngineSchemaVisitor, + sibling_list_id: usize, + ) { + macro_rules! call { + ( $visitor_fn:ident $(, $extra_args:expr) *) => { + (visitor.$visitor_fn)(visitor.data, sibling_list_id, name.into() $(, $extra_args) *) + }; + } + match data_type { + DataType::Struct(st) => call!(visit_struct, visit_struct_fields(visitor, st)), + DataType::Map(mt) => { + call!( + visit_map, + mt.value_contains_null, + visit_map_types(visitor, mt) + ) } - DataType::Primitive(PrimitiveType::String) => { - (visitor.visit_string)(visitor.data, sibling_list_id, name.into()) + DataType::Array(at) => { + call!(visit_array, at.contains_null, visit_array_item(visitor, at)) } - DataType::Struct(s) => { - let child_list_id = visit_struct_fields(visitor, s); - (visitor.visit_struct)(visitor.data, sibling_list_id, name.into(), child_list_id); + DataType::Primitive(PrimitiveType::Decimal(precision, scale)) => { + call!(visit_decimal, *precision, *scale) } - other => println!("Unsupported data type: {}", other), + &DataType::STRING => call!(visit_string), + &DataType::LONG => call!(visit_long), + &DataType::INTEGER => call!(visit_integer), + &DataType::SHORT => call!(visit_short), + &DataType::BYTE => call!(visit_byte), + &DataType::FLOAT => call!(visit_float), + &DataType::DOUBLE => call!(visit_double), + &DataType::BOOLEAN => call!(visit_boolean), + &DataType::BINARY => call!(visit_binary), + &DataType::DATE => call!(visit_date), + &DataType::TIMESTAMP => call!(visit_timestamp), + &DataType::TIMESTAMP_NTZ => call!(visit_timestamp_ntz), } } @@ -748,14 +889,16 @@ impl KernelExpressionVisitorState { } } -// When invoking [[get_scan_files]], The engine provides a pointer to the (engine's native) -// predicate, along with a visitor function that can be invoked to recursively visit the -// predicate. This engine state is valid until the call to [[get_scan_files]] returns. Inside that -// method, the kernel allocates visitor state, which becomes the second argument to the predicate -// visitor invocation along with the engine-provided predicate pointer. The visitor state is valid -// for the lifetime of the predicate visitor invocation. Thanks to this double indirection, engine -// and kernel each retain ownership of their respective objects, with no need to coordinate memory -// lifetimes with the other. +/// A predicate that can be used to skip data when scanning. +/// +/// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, +/// along with a visitor function that can be invoked to recursively visit the predicate. This +/// engine state must be valid until the call to `scan::scan` returns. Inside that method, the +/// kernel allocates visitor state, which becomes the second argument to the predicate visitor +/// invocation along with the engine-provided predicate pointer. The visitor state is valid for the +/// lifetime of the predicate visitor invocation. Thanks to this double indirection, engine and +/// kernel each retain ownership of their respective objects, with no need to coordinate memory +/// lifetimes with the other. #[repr(C)] pub struct EnginePredicate { predicate: *mut c_void, diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index ac8d06e1..511b1a87 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -22,7 +22,7 @@ use super::handle::{ArcHandle, BoxHandle}; // that are the engine data /// an opaque struct that encapsulates data read by an engine. this handle can be passed back into /// some kernel calls to operate on the data, or can be converted into the raw data as read by the -/// [`Engine`] by calling [`get_raw_engine_data`] +/// [`delta_kernel::Engine`] by calling [`get_raw_engine_data`] pub struct EngineDataHandle { data: Box, } @@ -53,8 +53,8 @@ pub struct ArrowFFIData { /// the schema. /// /// # Safety -/// data_handle must be a valid EngineDataHandle as read by the [`DefaultEngine`] obtained -/// from `get_default_engine`. +/// data_handle must be a valid EngineDataHandle as read by the +/// [`delta_kernel::engine::default::DefaultEngine`] obtained from `get_default_engine`. #[cfg(feature = "default-engine")] pub unsafe extern "C" fn get_raw_arrow_data( data_handle: *mut EngineDataHandle, @@ -225,7 +225,7 @@ fn kernel_scan_data_next_impl( /// # Safety /// /// Caller is responsible for (at most once) passing a valid pointer returned by a call to -/// [kernel_scan_files_init]. +/// [`kernel_scan_data_init`]. // we should probably be consistent with drop vs. free on engine side (probably the latter is more // intuitive to non-rust code) #[no_mangle] diff --git a/kernel/src/schema.rs b/kernel/src/schema.rs index cbfb375b..faba56c6 100644 --- a/kernel/src/schema.rs +++ b/kernel/src/schema.rs @@ -261,7 +261,7 @@ pub struct MapType { pub key_type: DataType, /// The type of element used for the value of this map pub value_type: DataType, - /// Denoting whether this array can contain one or more null values + /// Denoting whether this map can contain one or more null values #[serde(default = "default_true")] pub value_contains_null: bool, }