-
Notifications
You must be signed in to change notification settings - Fork 400
/
schema.rs
201 lines (178 loc) · 6.19 KB
/
schema.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
//! Delta Table schema implementation.
#![allow(non_snake_case, non_camel_case_types)]
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::collections::HashMap;
/// Type alias for a string expected to match a GUID/UUID format
pub type Guid = String;
/// Type alias for i64/Delta long
pub type DeltaDataTypeLong = i64;
/// Type alias representing the expected type (i64) of a Delta table version.
pub type DeltaDataTypeVersion = DeltaDataTypeLong;
/// Type alias representing the expected type (i64/ms since Unix epoch) of a Delta timestamp.
pub type DeltaDataTypeTimestamp = DeltaDataTypeLong;
/// Type alias for i32/Delta int
pub type DeltaDataTypeInt = i32;
static STRUCT_TAG: &str = "struct";
static ARRAY_TAG: &str = "array";
static MAP_TAG: &str = "map";
/// Represents a struct field defined in the Delta table schema.
// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#Schema-Serialization-Format
#[derive(Serialize, Deserialize, PartialEq, Debug, Default, Clone)]
pub struct SchemaTypeStruct {
r#type: Cow<'static, str>,
fields: Vec<SchemaField>,
}
impl SchemaTypeStruct {
/// Create a new Schema using a vector of SchemaFields
pub fn new(fields: Vec<SchemaField>) -> Self {
let tag = Cow::Borrowed(STRUCT_TAG);
Self {
r#type: tag,
fields,
}
}
/// Returns the list of fields contained within the column struct.
pub fn get_fields(&self) -> &Vec<SchemaField> {
&self.fields
}
}
/// Describes a specific field of the Delta table schema.
#[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
pub struct SchemaField {
// Name of this (possibly nested) column
name: String,
r#type: SchemaDataType,
// Boolean denoting whether this field can be null
nullable: bool,
// A JSON map containing information about this column. Keys prefixed with Delta are reserved
// for the implementation.
metadata: HashMap<String, String>,
}
impl SchemaField {
/// Create a new SchemaField from scratch
pub fn new(
name: String,
r#type: SchemaDataType,
nullable: bool,
metadata: HashMap<String, String>,
) -> Self {
Self {
name,
r#type,
nullable,
metadata,
}
}
/// The column name of the schema field.
pub fn get_name(&self) -> &str {
&self.name
}
/// The data type of the schema field. SchemaDataType defines the possible values.
pub fn get_type(&self) -> &SchemaDataType {
&self.r#type
}
/// Whether the column/field is nullable.
pub fn is_nullable(&self) -> bool {
self.nullable
}
/// Additional metadata about the column/field.
pub fn get_metadata(&self) -> &HashMap<String, String> {
&self.metadata
}
}
/// Schema definition for array type fields.
#[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
pub struct SchemaTypeArray {
// type field is always the string "array", so we are ignoring it here
r#type: String,
// The type of element stored in this array represented as a string containing the name of a
// primitive type, a struct definition, an array definition or a map definition
elementType: Box<SchemaDataType>,
// Boolean denoting whether this array can contain one or more null values
containsNull: bool,
}
impl SchemaTypeArray {
/// Create a new SchemaTypeArray
pub fn new(elementType: Box<SchemaDataType>, containsNull: bool) -> Self {
Self {
r#type: String::from(ARRAY_TAG),
elementType,
containsNull,
}
}
/// The data type of each element contained in the array.
pub fn get_element_type(&self) -> &SchemaDataType {
&self.elementType
}
/// Whether the column/field is allowed to contain null elements.
pub fn contains_null(&self) -> bool {
self.containsNull
}
}
/// Schema definition for map type fields.
#[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
pub struct SchemaTypeMap {
r#type: String,
keyType: Box<SchemaDataType>,
valueType: Box<SchemaDataType>,
valueContainsNull: bool,
}
impl SchemaTypeMap {
/// Create a new SchemaTypeMap
pub fn new(
keyType: Box<SchemaDataType>,
valueType: Box<SchemaDataType>,
valueContainsNull: bool,
) -> Self {
Self {
r#type: String::from(MAP_TAG),
keyType,
valueType,
valueContainsNull,
}
}
/// The type of element used for the key of this map, represented as a string containing the
/// name of a primitive type, a struct definition, an array definition or a map definition
pub fn get_key_type(&self) -> &SchemaDataType {
&self.keyType
}
/// The type of element contained in the value of this map, represented as a string containing the
/// name of a primitive type, a struct definition, an array definition or a map definition
pub fn get_value_type(&self) -> &SchemaDataType {
&self.valueType
}
/// Whether the value field is allowed to contain null elements.
pub fn get_value_contains_null(&self) -> bool {
self.valueContainsNull
}
}
/// Enum with variants for each top level schema data type.
#[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
#[serde(untagged)]
pub enum SchemaDataType {
/// Variant representing non-array, non-map, non-struct fields. Wrapped value will contain the
/// the string name of the primitive type.
///
/// Valid values are:
/// * string: utf8
/// * long // undocumented, i64?
/// * integer: i32
/// * short: i16
/// * byte: i8
/// * float: f32
/// * double: f64
/// * boolean: bool
/// * binary: a sequence of binary data
/// * date: A calendar date, represented as a year-month-day triple without a timezone
/// * timestamp: Microsecond precision timestamp without a timezone
primitive(String),
/// Variant representing a struct.
r#struct(SchemaTypeStruct),
/// Variant representing an array.
array(SchemaTypeArray),
/// Variant representing a map.
map(SchemaTypeMap),
}
/// Represents the schema of the delta table.
pub type Schema = SchemaTypeStruct;