Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Statement parse errors now refer to spans #80

Merged
merged 2 commits into from
Apr 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions src/diag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1190,19 +1190,19 @@ impl Diagnostic {
#[derive(Debug, Clone, Eq, PartialEq)]
#[allow(missing_docs)]
pub enum StmtParseError {
ParsedStatementTooShort(Option<Token>),
ParsedStatementTooShort(Span, Option<Token>),
ParsedStatementNoTypeCode,
ParsedStatementWrongTypeCode(Token),
UnknownToken(TokenIndex),
UnparseableStatement(TokenIndex),
UnknownToken(Span),
UnparseableStatement(Span),
}

impl StmtParseError {
/// The diagnostic's label
#[must_use]
pub fn label<'a>(&self) -> Cow<'a, str> {
match self {
StmtParseError::ParsedStatementTooShort(_) => "Parsed statement too short",
StmtParseError::ParsedStatementTooShort(_, _) => "Parsed statement too short",
StmtParseError::ParsedStatementWrongTypeCode(_) => {
"Parsed statement has wrong typecode"
}
Expand All @@ -1226,7 +1226,7 @@ impl StmtParseError {
}
let severity = self.severity();
let info = match self {
StmtParseError::ParsedStatementTooShort(ref opt_tok) => (
StmtParseError::ParsedStatementTooShort(span, ref opt_tok) => (
severity,
match opt_tok {
Some(tok) => format!(
Expand All @@ -1239,7 +1239,7 @@ impl StmtParseError {
}
},
stmt,
stmt.span(),
*span,
),
StmtParseError::ParsedStatementWrongTypeCode(ref found) => (
severity,
Expand All @@ -1251,17 +1251,17 @@ impl StmtParseError {
stmt,
stmt.span(),
),
StmtParseError::UnknownToken(index) => (
StmtParseError::UnknownToken(span) => (
severity,
"This token was not declared in any $v or $c statement".into(),
stmt,
stmt.math_span(*index),
*span,
),
StmtParseError::UnparseableStatement(index) => (
StmtParseError::UnparseableStatement(span) => (
severity,
"Could not parse this statement".into(),
stmt,
stmt.math_span(*index),
*span,
),
StmtParseError::ParsedStatementNoTypeCode => (
AnnotationType::Error,
Expand Down
147 changes: 108 additions & 39 deletions src/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::segment::Segment;
use crate::segment_set::SegmentSet;
use crate::statement::{CommandToken, SegmentId, StatementAddress, SymbolType, TokenRef};
use crate::util::HashMap;
use crate::{as_str, Database, StatementRef, StatementType};
use crate::{as_str, Database, Span, StatementRef, StatementType};
use log::{debug, warn};
use std::collections::hash_map::Entry;
use std::fmt;
Expand Down Expand Up @@ -401,8 +401,8 @@ impl Default for Grammar {
}
}

const fn undefined(token: TokenRef<'_>) -> StmtParseError {
StmtParseError::UnknownToken(token.address.token_index)
fn undefined(token: TokenRef<'_>, sref: &StatementRef<'_>) -> Diagnostic {
Diagnostic::UndefinedToken(sref.math_span(token.index()), token.slice.into())
}

fn undefined_cmd(token: &CommandToken, buf: &[u8]) -> Diagnostic {
Expand Down Expand Up @@ -466,8 +466,13 @@ impl Grammar {
}
}

fn too_short(map: &HashMap<(SymbolType, Atom), NextNode>, nset: &Nameset) -> StmtParseError {
fn too_short(
last_token: FormulaToken,
map: &HashMap<(SymbolType, Atom), NextNode>,
nset: &Nameset,
) -> StmtParseError {
StmtParseError::ParsedStatementTooShort(
last_token.span,
map.keys()
.find(|k| k.0 == SymbolType::Constant)
.map(|(_, expected_symbol)| nset.atom_name(*expected_symbol).into()),
Expand Down Expand Up @@ -587,7 +592,7 @@ impl Grammar {
while let Some(token) = tokens.next() {
let symbol = names
.lookup_symbol(token.slice)
.ok_or_else(|| undefined(token))?;
.ok_or_else(|| undefined(token, sref))?;
let atom = match symbol.stype {
SymbolType::Constant => symbol.atom,
SymbolType::Variable => {
Expand All @@ -596,7 +601,7 @@ impl Grammar {
// Ideally this information would be included in the LookupSymbol
names
.lookup_float(token.slice)
.ok_or_else(|| undefined(token))?
.ok_or_else(|| undefined(token, sref))?
.typecode_atom
}
};
Expand Down Expand Up @@ -1156,10 +1161,10 @@ impl Grammar {
Ok(())
}

fn do_shift(&self, symbol_iter: &mut dyn Iterator<Item = (usize, Symbol)>, nset: &Nameset) {
if let Some((_ix, symbol)) = symbol_iter.next() {
fn do_shift(&self, symbol_iter: &mut dyn Iterator<Item = FormulaToken>, nset: &Nameset) {
if let Some(token) = symbol_iter.next() {
if self.debug {
debug!(" SHIFT {:?}", as_str(nset.atom_name(symbol)));
debug!(" SHIFT {:?}", as_str(nset.atom_name(token.symbol)));
}
}
}
Expand All @@ -1184,7 +1189,7 @@ impl Grammar {
/// Parses the given list of symbols into a formula syntax tree.
pub fn parse_formula(
&self,
symbol_iter: &mut impl Iterator<Item = Symbol>,
symbol_iter: &mut impl Iterator<Item = FormulaToken>,
expected_typecodes: &[TypeCode],
nset: &Nameset,
) -> Result<Formula, StmtParseError> {
Expand All @@ -1194,8 +1199,10 @@ impl Grammar {
}

let mut formula_builder = FormulaBuilder::default();
let mut symbol_enum = symbol_iter.enumerate().peekable();
let mut ix = 0;
let mut symbol_enum = symbol_iter.peekable();
let mut last_token = *symbol_enum
.peek()
.ok_or(StmtParseError::ParsedStatementNoTypeCode)?;
let mut e = StackElement {
node_id: self.root,
expected_typecodes: expected_typecodes.to_vec().into_boxed_slice(),
Expand Down Expand Up @@ -1241,7 +1248,7 @@ impl Grammar {
// There are still symbols to parse, continue from root
let (next_node_id, leaf_label) = self
.next_var_node(self.root, typecode)
.ok_or(StmtParseError::UnparseableStatement(ix))?;
.ok_or(StmtParseError::UnparseableStatement(last_token.span))?;
for &reduce in leaf_label {
Self::do_reduce(&mut formula_builder, reduce, nset);
}
Expand All @@ -1264,22 +1271,22 @@ impl Grammar {
debug!(" ++ Wrong type obtained, continue.");
let (next_node_id, leaf_label) = self
.next_var_node(self.root, typecode)
.ok_or(StmtParseError::UnparseableStatement(ix))?;
.ok_or(StmtParseError::UnparseableStatement(last_token.span))?;
for &reduce in leaf_label {
Self::do_reduce(&mut formula_builder, reduce, nset);
}
e.node_id = next_node_id;
}
}
GrammarNode::Branch { ref map } => {
if let Some(&(index, symbol)) = symbol_enum.peek() {
ix = index as i32;
debug!(" {:?}", as_str(nset.atom_name(symbol)));
if let Some(&token) = symbol_enum.peek() {
last_token = token;
debug!(" {:?}", as_str(nset.atom_name(token.symbol)));

if let Some(NextNode {
next_node_id,
leaf_label,
}) = map.get(&(SymbolType::Constant, symbol))
}) = map.get(&(SymbolType::Constant, token.symbol))
{
// Found an atom matching one of our next nodes: First optionally REDUCE and continue
for &reduce in leaf_label {
Expand All @@ -1293,7 +1300,7 @@ impl Grammar {
} else {
// No matching constant, search among variables
if map.is_empty() || e.node_id == self.root {
return Err(StmtParseError::UnparseableStatement(ix));
return Err(StmtParseError::UnparseableStatement(token.span));
}

debug!(
Expand All @@ -1313,39 +1320,96 @@ impl Grammar {
};
}
} else {
return Err(Grammar::too_short(map, nset));
return Err(Grammar::too_short(last_token, map, nset));
}
}
}
}
}
}

/// An Atom which remembers its position in the source, for error handling
#[derive(Clone, Copy, Debug)]
pub struct FormulaToken {
/// The symbol's atom
pub symbol: Symbol,
/// The span of the original source string this token has been read from, used for error reporting.
pub span: Span,
}

/// An iterator through the tokens of a string
struct FormulaTokenIter<'a> {
string: &'a str,
chars: core::str::Chars<'a>,
nset: &'a Arc<Nameset>,
last_pos: usize,
done: bool,
}

impl<'a> FormulaTokenIter<'a> {
/// Builds a `FormulaTokenIter` from a string.
/// Characters are expected to be ASCII
fn from_str(string: &'a str, nset: &'a Arc<Nameset>) -> Self {
Self {
string,
chars: string.chars(),
nset,
last_pos: 0,
done: false,
}
}
}

impl Iterator for FormulaTokenIter<'_> {
type Item = Result<FormulaToken, StmtParseError>;

fn next(&mut self) -> Option<Self::Item> {
if self.done {
None
} else {
let span = if let Some(next_pos) =
self.chars.position(|c| c == ' ' || c == '\t' || c == '\n')
{
Span::new(self.last_pos, self.last_pos + next_pos)
} else {
self.done = true;
Span::new(self.last_pos, self.string.len())
};
self.last_pos = span.end as usize + 1;
let t = &self.string[span.start as usize..span.end as usize];
if let Some(l) = self.nset.lookup_symbol(t.as_bytes()) {
Some(Ok(FormulaToken {
symbol: l.atom,
span,
}))
} else {
Some(Err(StmtParseError::UnknownToken(span)))
}
}
}
}

impl Grammar {
/// Parses a character string into a formula
/// As a first math token, the string is expected to contain the typecode for the formula.
/// Diagnostics mark the errors with [Span]s based on the position in the input string.
pub fn parse_string(
&self,
formula_string: &str,
nset: &Arc<Nameset>,
) -> Result<Formula, StmtParseError> {
// TODO an iterator taking notes of the start and end of the math tokens would allow to return richer error messages, including actual spans rather than indices.
let mut symbols = formula_string.trim().split(&[' ', '\t', '\n']);
let typecode_name = symbols
let mut symbols = FormulaTokenIter::from_str(formula_string, nset)
.collect::<Result<Vec<_>, _>>()?
.into_iter();
let typecode = symbols
.next()
.ok_or(StmtParseError::ParsedStatementNoTypeCode)?;
let typecode = nset
.lookup_symbol(typecode_name.as_bytes())
.ok_or(StmtParseError::UnknownToken(0))?
.atom;
let expected_typecode = if typecode == self.provable_type {
let expected_typecode = if typecode.symbol == self.provable_type {
self.logic_type
} else {
typecode
typecode.symbol
};
self.parse_formula(
&mut symbols.map(|t| nset.lookup_symbol(t.as_bytes()).unwrap().atom),
&[expected_typecode],
nset,
)
self.parse_formula(&mut symbols, &[expected_typecode], nset)
}

fn parse_statement(
Expand Down Expand Up @@ -1383,10 +1447,14 @@ impl Grammar {
.math_iter()
.skip(1)
.map(|token| {
let span = sref.math_span(token.index());
if let Some(lookup) = names.lookup_symbol(token.slice) {
Ok(lookup.atom)
Ok(FormulaToken {
symbol: lookup.atom,
span,
})
} else {
Err(undefined(token))
Err(StmtParseError::UnknownToken(span))
}
})
.collect();
Expand Down Expand Up @@ -1505,9 +1573,7 @@ impl Grammar {
}
Ok(())
}
}

impl Grammar {
/// Called by [`crate::Database`] to build the grammar from the syntax axioms in the database.
///
/// The provided `sset`, and `nset` shall be the result of previous phases over the database.
Expand Down Expand Up @@ -1599,7 +1665,10 @@ impl StmtParse {
let math_iter = sref.math_iter().flat_map(|token| {
nset.lookup_symbol(token.slice)
.ok_or_else(|| {
(sref.address(), StmtParseError::UnknownToken(token.index()))
(
sref.address(),
StmtParseError::UnknownToken(sref.math_span(token.index())),
)
})
.map(|l| l.atom)
});
Expand Down
Loading