Skip to content

Commit

Permalink
Merge pull request #6 from SKalt/support-splitting-with-scanner
Browse files Browse the repository at this point in the history
feat: split_with_parser, split_with_scanner
  • Loading branch information
seanlinsley authored Apr 19, 2022
2 parents 4ebff3a + 8fc038e commit 15376c6
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 1 deletion.
4 changes: 4 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ pub enum Error {
InvalidJson(String),
#[error("Invalid pointer")]
InvalidPointer,
#[error("Error scanning: {0}")]
Scan(String),
#[error("Error splitting: {0}")]
Split(String),
}

/// Convenient Result alias for returning `pg_query::Error`.
Expand Down
116 changes: 115 additions & 1 deletion src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use std::os::raw::{c_char, c_uint};

use prost::Message;

// use crate::*;
use crate::bindings::*;
use crate::error::*;
use crate::parse_result::ParseResult;
Expand Down Expand Up @@ -166,3 +165,118 @@ pub fn parse_plpgsql(stmt: &str) -> Result<serde_json::Value> {
unsafe { pg_query_free_plpgsql_parse_result(result) };
structure
}

/// Split a well-formed query into separate statements.
///
/// # Example
///
/// ```rust
/// let query = r#"select /*;*/ 1; select "2;", (select 3);"#;
/// let statements = pg_query::split_with_parser(query).unwrap();
/// assert_eq!(statements, vec!["select /*;*/ 1", r#" select "2;", (select 3)"#]);
/// ```
///
/// However, `split_with_parser` will fail on malformed statements
///
/// ```rust
/// let query = "select 1; this statement is not sql; select 2;";
/// let result = pg_query::split_with_parser(query);
/// let err = r#"syntax error at or near "this""#;
/// assert_eq!(result, Err(pg_query::Error::Split(err.to_string())));
/// ```
pub fn split_with_parser(query: &str) -> Result<Vec<&str>> {
let input = CString::new(query)?;
let result = unsafe { pg_query_split_with_parser(input.as_ptr()) };
let split_result = if !result.error.is_null() {
let message = unsafe { CStr::from_ptr((*result.error).message) }.to_string_lossy().to_string();
Err(Error::Split(message))
} else {
let n_stmts = result.n_stmts as usize;
let mut statements = Vec::with_capacity(n_stmts);
for offset in 0..n_stmts {
let split_stmt = unsafe { *result.stmts.add(offset).read() };
let start = split_stmt.stmt_location as usize;
let end = start + split_stmt.stmt_len as usize;
statements.push(&query[start..end]);
// not sure the start..end slice'll hold up for non-utf8 charsets
}
Ok(statements)
};
unsafe { pg_query_free_split_result(result) };
split_result
}

/// Scan a sql query into a its component of tokens.
///
/// # Example
///
/// ```rust
/// use pg_query::protobuf::*;
/// let sql = "SELECT update AS left /* comment */ FROM between";
/// let result = pg_query::scan(sql).unwrap();
/// let tokens: Vec<std::string::String> = result.tokens.iter().map(|token| {
/// format!("{:?}", token)
/// }).collect();
/// assert_eq!(
/// tokens,
/// vec![
/// "ScanToken { start: 0, end: 6, token: Select, keyword_kind: ReservedKeyword }",
/// "ScanToken { start: 7, end: 13, token: Update, keyword_kind: UnreservedKeyword }",
/// "ScanToken { start: 14, end: 16, token: As, keyword_kind: ReservedKeyword }",
/// "ScanToken { start: 17, end: 21, token: Left, keyword_kind: TypeFuncNameKeyword }",
/// "ScanToken { start: 22, end: 35, token: CComment, keyword_kind: NoKeyword }",
/// "ScanToken { start: 36, end: 40, token: From, keyword_kind: ReservedKeyword }",
/// "ScanToken { start: 41, end: 48, token: Between, keyword_kind: ColNameKeyword }"
/// ]);
/// ```
pub fn scan(sql: &str) -> Result<protobuf::ScanResult> {
let input = CString::new(sql)?;
let result = unsafe { pg_query_scan(input.as_ptr()) };
let scan_result = if !result.error.is_null() {
let message = unsafe { CStr::from_ptr((*result.error).message) }.to_string_lossy().to_string();
Err(Error::Scan(message))
} else {
let data = unsafe { std::slice::from_raw_parts(result.pbuf.data as *const u8, result.pbuf.len as usize) };
protobuf::ScanResult::decode(data).map_err(Error::Decode).and_then(|result| Ok(result))
};
unsafe { pg_query_free_scan_result(result) };
scan_result
}

/// Split a potentially-malformed query into separate statements. Note that
/// invalid tokens will be skipped
/// ```rust
/// let query = r#"select /*;*/ 1; asdf; select "2;", (select 3); asdf"#;
/// let statements = pg_query::split_with_scanner(query).unwrap();
/// assert_eq!(statements, vec![
/// "select /*;*/ 1",
/// // skipped " asdf" since it was an invalid token
/// r#" select "2;", (select 3)"#,
/// ]);
/// ```
pub fn split_with_scanner(query: &str) -> Result<Vec<&str>> {
let input = CString::new(query)?;
let result = unsafe { pg_query_split_with_scanner(input.as_ptr()) };
let split_result = if !result.error.is_null() {
let message = unsafe { CStr::from_ptr((*result.error).message) }.to_string_lossy().to_string();
Err(Error::Split(message))
} else {
// don't use result.stderr_buffer since it appears unused unless
// libpg_query is compiled with DEBUG defined.
let n_stmts = result.n_stmts as usize;
let mut start: usize;
let mut end: usize;
let mut statements = Vec::with_capacity(n_stmts);
for offset in 0..n_stmts {
let split_stmt = unsafe { *result.stmts.add(offset).read() };
start = split_stmt.stmt_location as usize;
// TODO: consider comparing the new value of start to the old value
// of end to see if any region larger than a statement-separator got skipped
end = start + split_stmt.stmt_len as usize;
statements.push(&query[start..end]);
}
Ok(statements)
};
unsafe { pg_query_free_split_result(result) };
split_result
}

0 comments on commit 15376c6

Please sign in to comment.