From 2b401e7e8a53a8e90544e06aab16bc5f504d3b6a Mon Sep 17 00:00:00 2001 From: Liber <5831248+Liberxue@users.noreply.github.com> Date: Tue, 17 Sep 2024 23:44:29 +0800 Subject: [PATCH] Refactor Transform String Resugar Term Scott (#714) Co-authored-by: Nicolas Abril --- src/fun/transform/resugar_string.rs | 287 ++++++++++------------------ 1 file changed, 101 insertions(+), 186 deletions(-) diff --git a/src/fun/transform/resugar_string.rs b/src/fun/transform/resugar_string.rs index 031bdf0f1..ec47533c9 100644 --- a/src/fun/transform/resugar_string.rs +++ b/src/fun/transform/resugar_string.rs @@ -4,107 +4,101 @@ use crate::{ }; impl Term { - /// Converts lambda-encoded strings ending with String/nil to a string literals. + /// Converts lambda-encoded strings ending with String/nil to string literals. pub fn resugar_strings(&mut self, adt_encoding: AdtEncoding) { match adt_encoding { - AdtEncoding::Scott => self.resugar_strings_scott(), - AdtEncoding::NumScott => self.resugar_strings_num_scott(), + AdtEncoding::Scott => self.try_resugar_strings_with(Self::resugar_strings_scott), + AdtEncoding::NumScott => self.try_resugar_strings_with(Self::resugar_strings_num_scott), } } - /// Converts num-scott-encoded strings ending with String/nil to a string literals. - fn resugar_strings_num_scott(&mut self) { + /// Converts encoded strings to string literals using the provided extraction function. + fn try_resugar_strings_with(&mut self, extract_fn: fn(&Term) -> Option<(char, &Term)>) { maybe_grow(|| { - // Search for a String/cons pattern in the term and try to build a string from that point on. - // If successful, replace the term with the string. - // If not, keep as-is. - - // Nil: String/nil - if let Term::Ref { nam } = self { - if nam == builtins::SNIL { - *self = Term::str(""); + // Try to resugar nil or cons patterns. If unsuccessful, recurse into child terms. + if !self.try_resugar_strings_nil() && !self.try_resugar_strings_cons(extract_fn) { + for child in self.children_mut() { + child.try_resugar_strings_with(extract_fn); } } - // Cons: @x (x CONS_TAG ) - if let Term::Lam { tag: Tag::Static, pat, bod } = self { - if let Pattern::Var(Some(var_lam)) = pat.as_mut() { - if let Term::App { tag: Tag::Static, fun, arg: tail } = bod.as_mut() { - if let Term::App { tag: Tag::Static, fun, arg: head } = fun.as_mut() { - if let Term::App { tag: Tag::Static, fun, arg } = fun.as_mut() { - if let Term::Var { nam: var_app } = fun.as_mut() { - if let Term::Ref { nam: Name(nam) } = arg.as_mut() { - if let Term::Num { val: Num::U24(head) } = head.as_mut() { - if var_lam == var_app && nam == builtins::SCONS_TAG_REF { - let head = char::from_u32(*head).unwrap_or(char::REPLACEMENT_CHARACTER); - if let Some(str) = build_string_num_scott(tail, head.to_string()) { - *self = Term::str(&str); - } else { - // Not a string term, keep as-is. - } - } - } - } - } - } - } + }) + } + + /// Attempts to resugar a nil term (String/nil) to an empty string literal. + fn try_resugar_strings_nil(&mut self) -> bool { + matches!(self, Term::Ref { nam } if nam == builtins::SNIL).then(|| *self = Term::str("")).is_some() + } + + /// Attempts to resugar a cons term to a string literal. + fn try_resugar_strings_cons(&mut self, extract_fn: fn(&Term) -> Option<(char, &Term)>) -> bool { + self + .try_resugar_strings_cons_with(extract_fn) + .or_else(|| self.try_resugar_strings_cons_common()) + .map(|str| *self = Term::str(&str)) + .is_some() + } + + /// Attempts to resugar a cons term using the provided extraction function. + fn try_resugar_strings_cons_with(&self, extract_fn: fn(&Term) -> Option<(char, &Term)>) -> Option { + extract_fn(self) + .and_then(|(head_char, tail)| Self::build_strings_common(tail, head_char.to_string(), extract_fn)) + } + + /// Attempts to resugar a cons term using the common extraction method. + fn try_resugar_strings_cons_common(&self) -> Option { + if let Term::App { tag: Tag::Static, fun, arg: tail } = self { + if let Term::App { tag: Tag::Static, fun: inner_fun, arg: head } = fun.as_ref() { + if let (Term::Ref { nam }, Term::Num { val: Num::U24(head_val) }) = + (inner_fun.as_ref(), head.as_ref()) + { + if nam == builtins::SCONS { + let head_char = char::from_u32(*head_val).unwrap_or(char::REPLACEMENT_CHARACTER); + return Self::build_strings_common(tail, head_char.to_string(), Self::extract_strings_common); } } } - // Cons: (String/cons ) - if let Term::App { tag: Tag::Static, fun, arg: tail } = self { - if let Term::App { tag: Tag::Static, fun, arg: head } = fun.as_mut() { - if let Term::Ref { nam } = fun.as_mut() { - if let Term::Num { val: Num::U24(head) } = head.as_mut() { - if nam == builtins::SCONS { - let head = char::from_u32(*head).unwrap_or(char::REPLACEMENT_CHARACTER); - if let Some(str) = build_string_num_scott(tail, head.to_string()) { - *self = Term::str(&str); - } else { - // Not a string term, keep as-is. - } - } - } + } + None + } + + /// Builds a string from a term structure using the provided extraction function. + fn build_strings_common( + term: &Term, + mut s: String, + extract_fn: fn(&Term) -> Option<(char, &Term)>, + ) -> Option { + maybe_grow(|| { + let mut current = term; + loop { + match current { + // If we reach a nil term, we've completed the string + Term::Ref { nam } if nam == builtins::SNIL => return Some(s), + _ => { + // Extract the next character and continue building the string + let (head, next) = extract_fn(current).or_else(|| Self::extract_strings_common(current))?; + s.push(head); + current = next; } } } - - for child in self.children_mut() { - child.resugar_strings_num_scott(); - } }) } - /// Converts scott-encoded strings ending with String/nil to a string literals. - fn resugar_strings_scott(&mut self) { - maybe_grow(|| { - // Search for a String/cons pattern in the term and try to build a string from that point on. - // If successful, replace the term with the string. - // If not, keep as-is. - - // Nil: String/nil - if let Term::Ref { nam } = self { - if nam == builtins::SNIL { - *self = Term::str(""); - } - } - // Cons: @* @c (c ) - if let Term::Lam { tag: Tag::Static, pat, bod } = self { - if let Pattern::Var(None) = pat.as_mut() { - if let Term::Lam { tag: Tag::Static, pat, bod } = bod.as_mut() { - if let Pattern::Var(Some(var_lam)) = pat.as_mut() { - if let Term::App { tag: Tag::Static, fun, arg: tail } = bod.as_mut() { - if let Term::App { tag: Tag::Static, fun, arg: head } = fun.as_mut() { - if let Term::Var { nam: var_app } = fun.as_mut() { - if let Term::Num { val: Num::U24(head) } = head.as_mut() { - if var_lam == var_app { - let head = char::from_u32(*head).unwrap_or(char::REPLACEMENT_CHARACTER); - if let Some(str) = build_string_scott(tail, head.to_string()) { - *self = Term::str(&str); - } else { - // Not a string term, keep as-is. - } - } - } + /// Extracts a character and the remaining term from a Scott-encoded string term. + /// The structure of this function mimics the shape of the AST for easier visualization. + fn resugar_strings_scott(term: &Term) -> Option<(char, &Term)> { + if let Term::Lam { tag: Tag::Static, pat: outer_pat, bod } = term { + if let Pattern::Var(None) = outer_pat.as_ref() { + if let Term::Lam { tag: Tag::Static, pat: inner_pat, bod: inner_bod } = bod.as_ref() { + if let Pattern::Var(Some(var_lam)) = inner_pat.as_ref() { + if let Term::App { tag: Tag::Static, fun, arg: tail } = inner_bod.as_ref() { + if let Term::App { tag: Tag::Static, fun: inner_fun, arg: head } = fun.as_ref() { + if let (Term::Var { nam: var_app }, Term::Num { val: Num::U24(head_val) }) = + (inner_fun.as_ref(), head.as_ref()) + { + if var_lam == var_app { + let head_char = char::from_u32(*head_val).unwrap_or(char::REPLACEMENT_CHARACTER); + return Some((head_char, tail)); } } } @@ -112,55 +106,27 @@ impl Term { } } } - // Cons: (String/cons ) - if let Term::App { tag: Tag::Static, fun, arg: tail } = self { - if let Term::App { tag: Tag::Static, fun, arg: head } = fun.as_mut() { - if let Term::Ref { nam } = fun.as_mut() { - if let Term::Num { val: Num::U24(head) } = head.as_mut() { - if nam == builtins::SCONS { - let head = char::from_u32(*head).unwrap_or(char::REPLACEMENT_CHARACTER); - if let Some(str) = build_string_num_scott(tail, head.to_string()) { - *self = Term::str(&str); - } else { - // Not a string term, keep as-is. - } - } - } - } - } - } - - for child in self.children_mut() { - child.resugar_strings_scott(); - } - }) + } + None } -} -fn build_string_num_scott(term: &Term, mut s: String) -> Option { - maybe_grow(|| { - // Nil: String/nil - if let Term::Ref { nam } = term { - if nam == builtins::SNIL { - return Some(s); - } - } - // Cons: @x (x CONS_TAG ) + /// Extracts a character and the remaining term from a NumScott-encoded string term. + /// The structure of this function mimics the shape of the AST for easier visualization. + fn resugar_strings_num_scott(term: &Term) -> Option<(char, &Term)> { if let Term::Lam { tag: Tag::Static, pat, bod } = term { if let Pattern::Var(Some(var_lam)) = pat.as_ref() { if let Term::App { tag: Tag::Static, fun, arg: tail } = bod.as_ref() { if let Term::App { tag: Tag::Static, fun, arg: head } = fun.as_ref() { if let Term::App { tag: Tag::Static, fun, arg } = fun.as_ref() { - if let Term::Var { nam: var_app } = fun.as_ref() { - if let Term::Ref { nam } = arg.as_ref() { - if let Term::Num { val: Num::U24(head) } = head.as_ref() { - if var_lam == var_app && nam == builtins::SCONS_TAG_REF { - // New string character, append and recurse - let head = char::from_u32(*head).unwrap_or(char::REPLACEMENT_CHARACTER); - s.push(head); - return build_string_num_scott(tail, s); - } - } + if let ( + Term::Var { nam: var_app }, + Term::Ref { nam: Name(ref_nam) }, + Term::Num { val: Num::U24(head_val) }, + ) = (fun.as_ref(), arg.as_ref(), head.as_ref()) + { + if var_lam == var_app && ref_nam == builtins::SCONS_TAG_REF { + let head_char = char::from_u32(*head_val).unwrap_or(char::REPLACEMENT_CHARACTER); + return Some((head_char, tail)); } } } @@ -168,73 +134,22 @@ fn build_string_num_scott(term: &Term, mut s: String) -> Option { } } } - // Cons: (String/cons ) - if let Term::App { tag: Tag::Static, fun, arg: tail } = term { - if let Term::App { tag: Tag::Static, fun, arg: head } = fun.as_ref() { - if let Term::Ref { nam } = fun.as_ref() { - if let Term::Num { val: Num::U24(head) } = head.as_ref() { - if nam == builtins::SCONS { - // New string character, append and recurse - let head = char::from_u32(*head).unwrap_or(char::REPLACEMENT_CHARACTER); - s.push(head); - return build_string_num_scott(tail, s); - } - } - } - } - } - // Not a string term, stop None - }) -} + } -fn build_string_scott(term: &Term, mut s: String) -> Option { - maybe_grow(|| { - // Nil: String/nil - if let Term::Ref { nam } = term { - if nam == builtins::SNIL { - return Some(s); - } - } - // Cons: @* @c (c ) - if let Term::Lam { tag: Tag::Static, pat, bod } = term { - if let Pattern::Var(None) = pat.as_ref() { - if let Term::Lam { tag: Tag::Static, pat, bod } = bod.as_ref() { - if let Pattern::Var(Some(var_lam)) = pat.as_ref() { - if let Term::App { tag: Tag::Static, fun, arg: tail } = bod.as_ref() { - if let Term::App { tag: Tag::Static, fun, arg: head } = fun.as_ref() { - if let Term::Var { nam: var_app } = fun.as_ref() { - if let Term::Num { val: Num::U24(head) } = head.as_ref() { - if var_lam == var_app { - // New string character, append and recurse - let head = char::from_u32(*head).unwrap_or(char::REPLACEMENT_CHARACTER); - s.push(head); - return build_string_scott(tail, s); - } - } - } - } - } - } - } - } - } - // Cons: (String/cons ) + /// Extracts a character and the remaining term from a common-encoded string term. + /// The structure of this function mimics the shape of the AST for easier visualization. + fn extract_strings_common(term: &Term) -> Option<(char, &Term)> { if let Term::App { tag: Tag::Static, fun, arg: tail } = term { if let Term::App { tag: Tag::Static, fun, arg: head } = fun.as_ref() { - if let Term::Ref { nam } = fun.as_ref() { - if let Term::Num { val: Num::U24(head) } = head.as_ref() { - if nam == builtins::SCONS { - // New string character, append and recurse - let head = char::from_u32(*head).unwrap_or(char::REPLACEMENT_CHARACTER); - s.push(head); - return build_string_scott(tail, s); - } + if let (Term::Ref { nam }, Term::Num { val: Num::U24(head_val) }) = (fun.as_ref(), head.as_ref()) { + if nam == builtins::SCONS { + let head_char = char::from_u32(*head_val).unwrap_or(char::REPLACEMENT_CHARACTER); + return Some((head_char, tail)); } } } } - // Not a string term, stop None - }) + } }