Skip to content

Commit

Permalink
expression: start tracking parenthesis-type and string position
Browse files Browse the repository at this point in the history
When we change the expression parser to start parsing both ()s and {}s
at once, we will need to know the parenthesis type. To return nice
errors we also need to store some position information in the Tree type.

Adding these new fields (which need to be pub to make them accessible
from descriptor/tr.rs, but which we will later encapsulate better) is
mechanical and pretty noisy, so we do it in its own commit to reduce the
size of the real "fix Taproot parsing" commit.
  • Loading branch information
apoelstra committed Nov 23, 2024
1 parent 853a01a commit f5dcafd
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 28 deletions.
32 changes: 26 additions & 6 deletions src/descriptor/tr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -495,11 +495,11 @@ impl<Pk: FromStrKey> Tr<Pk> {
// Helper function to parse taproot script path
fn parse_tr_script_spend(tree: &expression::Tree,) -> Result<TapTree<Pk>, Error> {
match tree {
expression::Tree { name, args } if !name.is_empty() && args.is_empty() => {
expression::Tree { name, args, .. } if !name.is_empty() && args.is_empty() => {
let script = Miniscript::<Pk, Tap>::from_str(name)?;
Ok(TapTree::Leaf(Arc::new(script)))
}
expression::Tree { name, args } if name.is_empty() && args.len() == 2 => {
expression::Tree { name, args, .. } if name.is_empty() && args.len() == 2 => {
let left = Self::parse_tr_script_spend(&args[0])?;
let right = Self::parse_tr_script_spend(&args[1])?;
Ok(TapTree::combine(left, right))
Expand Down Expand Up @@ -597,8 +597,18 @@ fn parse_tr_tree(s: &str) -> Result<expression::Tree, Error> {
if !key.args.is_empty() {
return Err(Error::Unexpected("invalid taproot internal key".to_string()));
}
let internal_key = expression::Tree { name: key.name, args: vec![] };
return Ok(expression::Tree { name: "tr", args: vec![internal_key] });
let internal_key = expression::Tree {
name: key.name,
parens: expression::Parens::None,
children_pos: 0,
args: vec![],
};
return Ok(expression::Tree {
name: "tr",
parens: expression::Parens::Round,
children_pos: 0,
args: vec![internal_key],
});
}
// use str::split_once() method to refactor this when compiler version bumps up
let (key, script) = split_once(rest, ',')
Expand All @@ -608,10 +618,20 @@ fn parse_tr_tree(s: &str) -> Result<expression::Tree, Error> {
if !key.args.is_empty() {
return Err(Error::Unexpected("invalid taproot internal key".to_string()));
}
let internal_key = expression::Tree { name: key.name, args: vec![] };
let internal_key = expression::Tree {
name: key.name,
parens: expression::Parens::None,
children_pos: 0,
args: vec![],
};
let tree = expression::Tree::from_slice_delim(script, expression::Delimiter::Taproot)
.map_err(Error::ParseTree)?;
Ok(expression::Tree { name: "tr", args: vec![internal_key, tree] })
Ok(expression::Tree {
name: "tr",
parens: expression::Parens::Round,
children_pos: 0,
args: vec![internal_key, tree],
})
} else {
Err(Error::Unexpected("invalid taproot descriptor".to_string()))
}
Expand Down
82 changes: 60 additions & 22 deletions src/expression/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ pub const INPUT_CHARSET: &str = "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVW
pub struct Tree<'a> {
/// The name `x`
pub name: &'a str,
/// Position one past the last character of the node's name. If it has
/// children, the position of the '(' or '{'.
pub children_pos: usize,
/// The type of parentheses surrounding the node's children.
pub parens: Parens,
/// The comma-separated contents of the `(...)`, if any
pub args: Vec<Tree<'a>>,
}
Expand All @@ -38,11 +43,17 @@ impl PartialEq for Tree<'_> {
}
}
impl Eq for Tree<'_> {}
// or_b(pk(A),pk(B))
//
// A = musig(musig(B,C),D,E)
// or_b()
// pk(A), pk(B)

/// The type of parentheses surrounding a node's children.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum Parens {
/// Node has no children.
None,
/// Round parentheses: `(` and `)`.
Round,
/// Curly braces: `{` and `}`.
Curly,
}

/// Whether to treat `{` and `}` as deliminators when parsing an expression.
#[derive(Copy, Clone, PartialEq, Eq)]
Expand Down Expand Up @@ -166,31 +177,45 @@ impl<'a> Tree<'a> {
// Now, knowing it is sane and well-formed, we can easily parse it backward,
// which will yield a post-order right-to-left iterator of its nodes.
let mut stack = Vec::with_capacity(max_depth);
let mut children = None;
let mut children_parens: Option<(Vec<_>, usize, Parens)> = None;
let mut node_name_end = s.len();
let mut tapleaf_depth = 0;
for (pos, ch) in s.bytes().enumerate().rev() {
if ch == cparen {
stack.push(vec![]);
node_name_end = pos;
} else if tapleaf_depth == 0 && ch == b',' {
let (mut args, children_pos, parens) =
children_parens
.take()
.unwrap_or((vec![], node_name_end, Parens::None));
args.reverse();

let top = stack.last_mut().unwrap();
let mut new_tree = Tree {
name: &s[pos + 1..node_name_end],
args: children.take().unwrap_or(vec![]),
};
new_tree.args.reverse();
let new_tree =
Tree { name: &s[pos + 1..node_name_end], children_pos, parens, args };
top.push(new_tree);
node_name_end = pos;
} else if ch == oparen {
let (mut args, children_pos, parens) =
children_parens
.take()
.unwrap_or((vec![], node_name_end, Parens::None));
args.reverse();

let mut top = stack.pop().unwrap();
let mut new_tree = Tree {
name: &s[pos + 1..node_name_end],
args: children.take().unwrap_or(vec![]),
};
new_tree.args.reverse();
let new_tree =
Tree { name: &s[pos + 1..node_name_end], children_pos, parens, args };
top.push(new_tree);
children = Some(top);
children_parens = Some((
top,
pos,
match ch {
b'(' => Parens::Round,
b'{' => Parens::Curly,
_ => unreachable!(),
},
));
node_name_end = pos;
} else if delim == Delimiter::Taproot && ch == b'(' {
tapleaf_depth += 1;
Expand All @@ -200,9 +225,12 @@ impl<'a> Tree<'a> {
}

assert_eq!(stack.len(), 0);
let mut children = children.take().unwrap_or(vec![]);
children.reverse();
Ok(Tree { name: &s[..node_name_end], args: children })
let (mut args, children_pos, parens) =
children_parens
.take()
.unwrap_or((vec![], node_name_end, Parens::None));
args.reverse();
Ok(Tree { name: &s[..node_name_end], children_pos, parens, args })
}

/// Parses a tree from a string
Expand Down Expand Up @@ -300,9 +328,19 @@ mod tests {
use super::*;

/// Test functions to manually build trees
fn leaf(name: &str) -> Tree { Tree { name, args: vec![] } }
fn leaf(name: &str) -> Tree {
Tree { name, parens: Parens::None, children_pos: name.len(), args: vec![] }
}

fn paren_node<'a>(name: &'a str, mut args: Vec<Tree<'a>>) -> Tree<'a> {
let mut offset = name.len() + 1; // +1 for open paren
for arg in &mut args {
arg.children_pos += offset;
offset += arg.name.len() + 1; // +1 for comma
}

fn paren_node<'a>(name: &'a str, args: Vec<Tree<'a>>) -> Tree<'a> { Tree { name, args } }
Tree { name, parens: Parens::Round, children_pos: name.len(), args }
}

#[test]
fn test_parse_num() {
Expand Down

0 comments on commit f5dcafd

Please sign in to comment.