Skip to content

Commit

Permalink
feat: Execute Python queries (#575)
Browse files Browse the repository at this point in the history
This removes the ability to inspect the prepared data from the Python
FFI. Instead, the Python FFI now has an `execute` method allowing you to
inspect the results of an expression, including the bare table.

This changes the execute methods a bit to have a simpler API to
interface with.

This relies on pushing "in-memory" data into the `DataContext`, which is
very brittle and hacky. For example, it currently only supports adding
data once, and requires that the schemas line up.
  • Loading branch information
bjchambers authored Aug 1, 2023
1 parent 786d566 commit a81973e
Show file tree
Hide file tree
Showing 25 changed files with 567 additions and 337 deletions.
5 changes: 5 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 10 additions & 2 deletions crates/sparrow-compiler/src/data_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::sync::Arc;

use anyhow::Context;
use arrow::datatypes::{DataType, SchemaRef};
use arrow::record_batch::RecordBatch;
use sparrow_api::kaskada::v1alpha::slice_plan::Slice;
use sparrow_api::kaskada::v1alpha::{compute_table, ComputeTable, PreparedFile, TableConfig};
use sparrow_core::context_code;
Expand All @@ -17,7 +18,7 @@ use crate::AstDfgRef;
///
/// Specifically, this holds the information about the tables
/// available to the compilation.
#[derive(Default, Debug)]
#[derive(Default, Debug, Clone)]
pub struct DataContext {
/// Information about the groupings in the context.
group_info: Vec<GroupInfo>,
Expand Down Expand Up @@ -60,6 +61,10 @@ impl DataContext {
self.table_info.get(&id)
}

pub fn table_info_mut(&mut self, id: TableId) -> Option<&mut TableInfo> {
self.table_info.get_mut(&id)
}

pub fn tables_for_grouping(&self, id: GroupId) -> impl Iterator<Item = &TableInfo> {
self.table_info
.iter()
Expand Down Expand Up @@ -296,7 +301,7 @@ impl DataContext {
}

/// Information about groups.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct GroupInfo {
name: String,
key_type: DataType,
Expand All @@ -314,6 +319,8 @@ pub struct TableInfo {
/// Each file set corresponds to the files for the table with a specific
/// slice configuration.
file_sets: Vec<compute_table::FileSet>,
/// An in-memory record batch for the contents of the table.
pub in_memory: Option<RecordBatch>,
}

impl TableInfo {
Expand All @@ -335,6 +342,7 @@ impl TableInfo {
schema,
config,
file_sets,
in_memory: None,
})
}

Expand Down
6 changes: 3 additions & 3 deletions crates/sparrow-compiler/src/dfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ impl Dfg {
}

/// Runs simplifications on the graph.
pub(crate) fn run_simplifications(&mut self, options: &CompilerOptions) -> anyhow::Result<()> {
pub fn run_simplifications(&mut self, options: &CompilerOptions) -> anyhow::Result<()> {
let span = info_span!("Running simplifications");
let _enter = span.enter();

Expand All @@ -375,7 +375,7 @@ impl Dfg {
}

/// Extract the simplest representation of the node `id` from the graph.
pub(crate) fn extract_simplest(&self, id: Id) -> DfgExpr {
pub fn extract_simplest(&self, id: Id) -> DfgExpr {
let span = info_span!("Extracting simplest DFG");
let _enter = span.enter();

Expand Down Expand Up @@ -447,7 +447,7 @@ impl Dfg {
/// Remove nodes that aren't needed for the `output` from the graph.
///
/// Returns the new ID of the `output`.
pub(crate) fn prune(&mut self, output: Id) -> anyhow::Result<Id> {
pub fn prune(&mut self, output: Id) -> anyhow::Result<Id> {
// The implementation is somewhat painful -- we extract a `RecExpr`, and then
// recreate the EGraph. This has the desired property -- only referenced nodes
// are extracted. But, it may cause the IDs to change.
Expand Down
2 changes: 1 addition & 1 deletion crates/sparrow-compiler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ mod frontend;
mod functions;
mod nearest_matches;
mod options;
mod plan;
pub mod plan;
mod time_domain;
mod types;

Expand Down
2 changes: 1 addition & 1 deletion crates/sparrow-compiler/src/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const DBG_PRINT_PLAN: bool = false;
/// TODO: The `DataContext` is used to get the table name from an ID, which is
/// only necessary to create the `slice_plan` because it uses a name instead of
/// an ID.
pub(super) fn extract_plan_proto(
pub fn extract_plan_proto(
data_context: &DataContext,
expr: DfgExpr,
per_entity_behavior: PerEntityBehavior,
Expand Down
1 change: 0 additions & 1 deletion crates/sparrow-main/tests/e2e/fixture/query_fixture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ impl QueryFixture {
plan: Some(plan),
destination: Some(output_to),
tables: data.tables(),

..self.execute_request.clone()
};

Expand Down
Loading

0 comments on commit a81973e

Please sign in to comment.