Skip to content

Commit

Permalink
feat: Type check DML (#1727)
Browse files Browse the repository at this point in the history
  • Loading branch information
joshua-spacetime authored Sep 24, 2024
1 parent f559f0a commit 06e5be5
Show file tree
Hide file tree
Showing 8 changed files with 497 additions and 26 deletions.
2 changes: 1 addition & 1 deletion crates/planner/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ license-file = "LICENSE"
derive_more.workspace = true
thiserror.workspace = true
spacetimedb-lib.workspace = true
spacetimedb-primitives.workspace = true
spacetimedb-sats.workspace = true
spacetimedb-schema.workspace = true
spacetimedb-sql-parser.workspace = true

[dev-dependencies]
spacetimedb-lib.workspace = true
spacetimedb-primitives.workspace = true
126 changes: 119 additions & 7 deletions crates/planner/src/logical/bind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,122 @@ pub trait SchemaView {
fn schema(&self, name: &str, case_sensitive: bool) -> Option<Arc<TableSchema>>;
}

pub trait TypeChecker {
type Ast;
type Set;

fn type_ast(ctx: &mut TyCtx, ast: Self::Ast, tx: &impl SchemaView) -> TypingResult<RelExpr>;

fn type_set(ctx: &mut TyCtx, ast: Self::Set, tx: &impl SchemaView) -> TypingResult<RelExpr>;

fn type_from(ctx: &mut TyCtx, from: SqlFrom<Self::Ast>, tx: &impl SchemaView) -> TypingResult<(RelExpr, Vars)> {
match from {
SqlFrom::Expr(expr, None) => Self::type_rel(ctx, expr, tx),
SqlFrom::Expr(expr, Some(alias)) => {
let (expr, _) = Self::type_rel(ctx, expr, tx)?;
let ty = expr.ty_id();
Ok((expr, vec![(alias.name, ty)].into()))
}
SqlFrom::Join(r, alias, joins) => {
let (mut vars, mut args, mut exprs) = (Vars::default(), Vec::new(), Vec::new());

let (r, _) = Self::type_rel(ctx, r, tx)?;
let ty = r.ty_id();

args.push(r);
vars.push((alias.name, ty));

for join in joins {
let (r, _) = Self::type_rel(ctx, join.expr, tx)?;
let ty = r.ty_id();

args.push(r);
vars.push((join.alias.name, ty));

if let Some(on) = join.on {
exprs.push(type_expr(ctx, &vars, on, Some(TyId::BOOL))?);
}
}
let types = vars.iter().map(|(_, ty)| *ty).collect();
let ty = Type::Tup(types);
let input = RelExpr::Join(args.into(), ctx.add(ty));
Ok((RelExpr::select(input, vars.clone(), exprs), vars))
}
}
}

fn type_rel(ctx: &mut TyCtx, expr: ast::RelExpr<Self::Ast>, tx: &impl SchemaView) -> TypingResult<(RelExpr, Vars)> {
match expr {
ast::RelExpr::Var(var) => {
let schema = tx
.schema(&var.name, var.case_sensitive)
.ok_or_else(|| Unresolved::table(&var.name))
.map_err(TypingError::from)?;
let mut types = Vec::new();
for ColumnSchema { col_name, col_type, .. } in schema.columns() {
let ty = Type::Alg(col_type.clone());
let id = ctx.add(ty);
types.push((col_name.to_string(), id));
}
let ty = Type::Var(types.into_boxed_slice());
let id = ctx.add(ty);
Ok((RelExpr::RelVar(schema, id), vec![(var.name, id)].into()))
}
ast::RelExpr::Ast(ast) => Ok((Self::type_ast(ctx, *ast, tx)?, Vars::default())),
}
}
}

/// Type checker for subscriptions
struct SubChecker;

impl TypeChecker for SubChecker {
type Ast = SqlAst;
type Set = SqlAst;

fn type_ast(ctx: &mut TyCtx, ast: Self::Ast, tx: &impl SchemaView) -> TypingResult<RelExpr> {
Self::type_set(ctx, ast, tx)
}

fn type_set(ctx: &mut TyCtx, ast: Self::Set, tx: &impl SchemaView) -> TypingResult<RelExpr> {
match ast {
SqlAst::Union(a, b) => {
let a = type_ast(ctx, *a, tx)?;
let b = type_ast(ctx, *b, tx)?;
assert_eq_types(a.ty_id().try_with_ctx(ctx)?, b.ty_id().try_with_ctx(ctx)?)?;
Ok(RelExpr::Union(Box::new(a), Box::new(b)))
}
SqlAst::Minus(a, b) => {
let a = type_ast(ctx, *a, tx)?;
let b = type_ast(ctx, *b, tx)?;
assert_eq_types(a.ty_id().try_with_ctx(ctx)?, b.ty_id().try_with_ctx(ctx)?)?;
Ok(RelExpr::Minus(Box::new(a), Box::new(b)))
}
SqlAst::Select(SqlSelect {
project,
from,
filter: None,
}) => {
let (arg, vars) = type_from(ctx, from, tx)?;
type_proj(ctx, project, arg, vars)
}
SqlAst::Select(SqlSelect {
project,
from,
filter: Some(expr),
}) => {
let (from, vars) = type_from(ctx, from, tx)?;
let arg = type_select(ctx, expr, from, vars.clone())?;
type_proj(ctx, project, arg, vars.clone())
}
}
}
}

/// Parse and type check a subscription query
pub fn parse_and_type_sub(sql: &str, tx: &impl SchemaView) -> TypingResult<RelExpr> {
let mut ctx = TyCtx::default();
let expr = type_ast(&mut ctx, parse_subscription(sql)?, tx)?;
let expr = SubChecker::type_ast(&mut ctx, parse_subscription(sql)?, tx)?;
expect_table_type(&ctx, expr)
}

Expand Down Expand Up @@ -128,13 +240,13 @@ fn type_rel(ctx: &mut TyCtx, expr: ast::RelExpr<SqlAst>, tx: &impl SchemaView) -
}

/// Type check and lower a [SqlExpr]
fn type_select(ctx: &mut TyCtx, expr: SqlExpr, input: RelExpr, vars: Vars) -> TypingResult<RelExpr> {
pub(crate) fn type_select(ctx: &mut TyCtx, expr: SqlExpr, input: RelExpr, vars: Vars) -> TypingResult<RelExpr> {
let exprs = vec![type_expr(ctx, &vars, expr, Some(TyId::BOOL))?];
Ok(RelExpr::select(input, vars, exprs))
}

/// Type check and lower a [ast::Project]
fn type_proj(ctx: &mut TyCtx, proj: ast::Project, input: RelExpr, vars: Vars) -> TypingResult<RelExpr> {
pub(crate) fn type_proj(ctx: &mut TyCtx, proj: ast::Project, input: RelExpr, vars: Vars) -> TypingResult<RelExpr> {
match proj {
ast::Project::Star(None) => Ok(input),
ast::Project::Star(Some(var)) => {
Expand Down Expand Up @@ -167,7 +279,7 @@ fn type_proj(ctx: &mut TyCtx, proj: ast::Project, input: RelExpr, vars: Vars) ->
}

/// Type check and lower a [SqlExpr] into a logical [Expr].
fn type_expr(ctx: &TyCtx, vars: &Vars, expr: SqlExpr, expected: Option<TyId>) -> TypingResult<Expr> {
pub(crate) fn type_expr(ctx: &TyCtx, vars: &Vars, expr: SqlExpr, expected: Option<TyId>) -> TypingResult<Expr> {
match (expr, expected) {
(SqlExpr::Lit(SqlLiteral::Bool(v)), None | Some(TyId::BOOL)) => Ok(Expr::bool(v)),
(SqlExpr::Lit(SqlLiteral::Bool(_)), Some(id)) => {
Expand Down Expand Up @@ -195,7 +307,7 @@ fn type_expr(ctx: &TyCtx, vars: &Vars, expr: SqlExpr, expected: Option<TyId>) ->
}

/// Parses a source text literal as a particular type
fn parse(ctx: &TyCtx, v: String, id: TyId) -> TypingResult<AlgebraicValue> {
pub(crate) fn parse(ctx: &TyCtx, v: String, id: TyId) -> TypingResult<AlgebraicValue> {
let err = |v, ty| TypingError::from(ConstraintViolation::lit(v, ty));
match ctx.try_resolve(id)? {
ty @ Type::Alg(AlgebraicType::I8) => v
Expand Down Expand Up @@ -260,7 +372,7 @@ fn parse(ctx: &TyCtx, v: String, id: TyId) -> TypingResult<AlgebraicValue> {
}

/// Returns a type constraint violation for an unexpected type
fn unexpected_type(expected: TypeWithCtx<'_>, inferred: TypeWithCtx<'_>) -> TypingError {
pub(crate) fn unexpected_type(expected: TypeWithCtx<'_>, inferred: TypeWithCtx<'_>) -> TypingError {
ConstraintViolation::eq(expected, inferred).into()
}

Expand All @@ -282,7 +394,7 @@ fn expect_op_type(ctx: &TyCtx, op: BinOp, expr: Expr) -> TypingResult<Expr> {
}
}

fn assert_eq_types(a: TypeWithCtx<'_>, b: TypeWithCtx<'_>) -> TypingResult<()> {
pub(crate) fn assert_eq_types(a: TypeWithCtx<'_>, b: TypeWithCtx<'_>) -> TypingResult<()> {
if a == b {
Ok(())
} else {
Expand Down
36 changes: 28 additions & 8 deletions crates/planner/src/logical/errors.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
use spacetimedb_sql_parser::{ast::BinOp, parser::errors::SqlParseError};
use thiserror::Error;

use super::ty::{InvalidTyId, TypeWithCtx};
use super::{
stmt::InvalidVar,
ty::{InvalidTyId, TypeWithCtx},
};

#[derive(Error, Debug)]
pub enum ConstraintViolation {
#[error("(expected) {expected} != {inferred} (inferred)")]
Eq { expected: String, inferred: String },
#[error("{ty} is not a numeric type")]
#[error("`{ty}` is not a numeric type")]
Num { ty: String },
#[error("{ty} cannot be interpreted as a byte array")]
#[error("`{ty}` cannot be interpreted as a byte array")]
Hex { ty: String },
#[error("{expr} cannot be parsed as type {ty}")]
#[error("`{expr}` cannot be parsed as type `{ty}`")]
Lit { expr: String, ty: String },
#[error("The binary operator {op} does not support type {ty}")]
#[error("The binary operator `{op}` does not support type `{ty}`")]
Bin { op: BinOp, ty: String },
}

Expand Down Expand Up @@ -52,11 +55,11 @@ impl ConstraintViolation {

#[derive(Error, Debug)]
pub enum Unresolved {
#[error("Cannot resolve {0}")]
#[error("Cannot resolve `{0}`")]
Var(String),
#[error("Cannot resolve table {0}")]
#[error("Cannot resolve table `{0}`")]
Table(String),
#[error("Cannot resolve field {1} in {0}")]
#[error("Cannot resolve field `{1}` in `{0}`")]
Field(String, String),
#[error("Cannot resolve type for literal expression")]
Literal,
Expand Down Expand Up @@ -87,6 +90,19 @@ pub enum Unsupported {
ProjectExpr,
#[error("Unqualified column projections are not supported")]
UnqualifiedProjectExpr,
#[error("ORDER BY is not supported")]
OrderBy,
#[error("LIMIT is not supported")]
Limit,
}

// TODO: It might be better to return the missing/extra fields
#[derive(Error, Debug)]
#[error("Inserting a row with {values} values into `{table}` which has {fields} fields")]
pub struct InsertError {
pub table: String,
pub values: usize,
pub fields: usize,
}

#[derive(Error, Debug)]
Expand All @@ -100,5 +116,9 @@ pub enum TypingError {
#[error(transparent)]
InvalidTyId(#[from] InvalidTyId),
#[error(transparent)]
InvalidVar(#[from] InvalidVar),
#[error(transparent)]
Insert(#[from] InsertError),
#[error(transparent)]
ParseError(#[from] SqlParseError),
}
1 change: 1 addition & 0 deletions crates/planner/src/logical/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod bind;
pub mod errors;
pub mod expr;
pub mod stmt;
pub mod ty;

/// Asserts that `$ty` is `$size` bytes in `static_assert_size($ty, $size)`.
Expand Down
Loading

2 comments on commit 06e5be5

@github-actions
Copy link

@github-actions github-actions bot commented on 06e5be5 Sep 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmarking failed. Please check the workflow run for details.

@github-actions
Copy link

@github-actions github-actions bot commented on 06e5be5 Sep 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Callgrind benchmark results

Callgrind Benchmark Report

These benchmarks were run using callgrind,
an instruction-level profiler. They allow comparisons between sqlite (sqlite), SpacetimeDB running through a module (stdb_module), and the underlying SpacetimeDB data storage engine (stdb_raw). Callgrind emulates a CPU to collect the below estimates.

Measurement changes larger than five percent are in bold.

In-memory benchmarks

callgrind: empty transaction

db total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw 5380 5380 0.00% 5414 5414 0.00%
sqlite 5555 5555 0.00% 5971 5971 0.00%

callgrind: filter

db schema indices count preload _column data_type total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str no_index 64 128 2 string 119003 119003 0.00% 119609 119609 0.00%
stdb_raw u32_u64_str no_index 64 128 1 u64 75374 75374 0.00% 75856 75856 0.00%
stdb_raw u32_u64_str btree_each_column 64 128 2 string 22611 22627 -0.07% 23125 23133 -0.03%
stdb_raw u32_u64_str btree_each_column 64 128 1 u64 21578 21578 0.00% 22020 22024 -0.02%
sqlite u32_u64_str no_index 64 128 2 string 144695 144677 0.01% 146247 146229 0.01%
sqlite u32_u64_str no_index 64 128 1 u64 124036 124036 0.00% 125350 125350 0.00%
sqlite u32_u64_str btree_each_column 64 128 2 string 134476 134476 0.00% 136064 136060 0.00%
sqlite u32_u64_str btree_each_column 64 128 1 u64 131343 131343 0.00% 132751 132759 -0.01%

callgrind: insert bulk

db schema indices count preload total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str unique_0 64 128 898644 898602 0.00% 952180 952190 -0.00%
stdb_raw u32_u64_str btree_each_column 64 128 1053120 1053582 -0.04% 1119712 1120106 -0.04%
sqlite u32_u64_str unique_0 64 128 398292 398292 0.00% 414950 414946 0.00%
sqlite u32_u64_str btree_each_column 64 128 983609 983609 0.00% 1023489 1023485 0.00%

callgrind: iterate

db schema indices count total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str unique_0 1024 152676 152676 0.00% 152806 152806 0.00%
stdb_raw u32_u64_str unique_0 64 15701 15701 0.00% 15827 15827 0.00%
sqlite u32_u64_str unique_0 1024 1068223 1068223 0.00% 1071455 1071455 0.00%
sqlite u32_u64_str unique_0 64 76215 76209 0.01% 77185 77179 0.01%

callgrind: serialize_product_value

count format total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
64 json 47182 47182 0.00% 49800 49800 0.00%
64 bsatn 25716 25716 0.00% 27994 27994 0.00%
16 json 12078 12078 0.00% 13948 13948 0.00%
16 bsatn 8117 8117 0.00% 9477 9477 0.00%

callgrind: update bulk

db schema indices count preload total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str unique_0 1024 1024 20571406 20576401 -0.02% 21393700 21398631 -0.02%
stdb_raw u32_u64_str unique_0 64 128 1296672 1296651 0.00% 1384000 1383943 0.00%
sqlite u32_u64_str unique_0 1024 1024 1802006 1802024 -0.00% 1810976 1811006 -0.00%
sqlite u32_u64_str unique_0 64 128 128358 128352 0.00% 131016 131010 0.00%
On-disk benchmarks

callgrind: empty transaction

db total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw 5390 5390 0.00% 5428 5428 0.00%
sqlite 5613 5613 0.00% 6157 6157 0.00%

callgrind: filter

db schema indices count preload _column data_type total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str no_index 64 128 2 string 117924 117924 0.00% 118574 118574 0.00%
stdb_raw u32_u64_str no_index 64 128 1 u64 75384 75384 0.00% 75894 75894 0.00%
stdb_raw u32_u64_str btree_each_column 64 128 2 string 22621 22625 -0.02% 23127 23131 -0.02%
stdb_raw u32_u64_str btree_each_column 64 128 1 u64 21588 21588 0.00% 22054 22054 0.00%
sqlite u32_u64_str no_index 64 128 2 string 146598 146598 0.00% 148426 148426 0.00%
sqlite u32_u64_str no_index 64 128 1 u64 125947 125947 0.00% 127491 127499 -0.01%
sqlite u32_u64_str btree_each_column 64 128 2 string 136598 136598 0.00% 138784 138780 0.00%
sqlite u32_u64_str btree_each_column 64 128 1 u64 133439 133439 0.00% 135353 135353 0.00%

callgrind: insert bulk

db schema indices count preload total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str unique_0 64 128 847946 847934 0.00% 870028 870056 -0.00%
stdb_raw u32_u64_str btree_each_column 64 128 997141 997750 -0.06% 1063037 1063638 -0.06%
sqlite u32_u64_str unique_0 64 128 415829 415829 0.00% 431835 431839 -0.00%
sqlite u32_u64_str btree_each_column 64 128 1021870 1021870 0.00% 1061178 1061162 0.00%

callgrind: iterate

db schema indices count total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str unique_0 1024 152686 152686 0.00% 152812 152812 0.00%
stdb_raw u32_u64_str unique_0 64 15711 15711 0.00% 15837 15837 0.00%
sqlite u32_u64_str unique_0 1024 1071291 1071291 0.00% 1075017 1075017 0.00%
sqlite u32_u64_str unique_0 64 77981 77981 0.00% 79295 79295 0.00%

callgrind: serialize_product_value

count format total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
64 json 47182 47182 0.00% 49800 49800 0.00%
64 bsatn 25716 25716 0.00% 27994 27994 0.00%
16 json 12078 12078 0.00% 13948 13948 0.00%
16 bsatn 8117 8117 0.00% 9477 9477 0.00%

callgrind: update bulk

db schema indices count preload total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str unique_0 1024 1024 19297627 19295738 0.01% 20179197 20177176 0.01%
stdb_raw u32_u64_str unique_0 64 128 1253289 1253239 0.00% 1340395 1340315 0.01%
sqlite u32_u64_str unique_0 1024 1024 1809567 1809567 0.00% 1818225 1818225 0.00%
sqlite u32_u64_str unique_0 64 128 132478 132478 0.00% 135432 135432 0.00%

Please sign in to comment.