Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backtracking support through &try/backtrack() #606

Merged
merged 3 commits into from
Dec 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/autogen/types/unit.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
.. rubric:: Methods

.. spicy:method:: unit::backtrack unit backtrack False void ()

Aborts parsing at the current position and returns back to the most
recent ``&try`` attribute. Turns into a parse error if there's no
``&try`` in scope.

.. spicy:method:: unit::connect_filter unit connect_filter False void (filter: strong_ref<unit>)

Connects a separate filter unit to transform the unit's input
Expand Down
22 changes: 22 additions & 0 deletions doc/programming/examples/_backtrack.spicy
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Automatically generated; edit in Sphinx source code, not here.
module Test;

public type test = unit {
foo: Foo &try;
bar: Bar;

on %done { print self; }
};

type Foo = unit {
a: int8 {
if ( $$ != 1 )
self.backtrack();
}
b: int8;
};

type Bar = unit {
a: int8;
b: int8;
};
22 changes: 22 additions & 0 deletions doc/programming/examples/_parse-backtrack.spicy
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Automatically generated; edit in Sphinx source code, not here.
module Test;

public type test = unit {
foo: Foo &try;
bar: Bar;

on %done { print self; }
};

type Foo = unit {
a: int8 {
if ( $$ != 1 )
self.backtrack();
}
b: int8;
};

type Bar = unit {
a: int8;
b: int8;
};
6 changes: 6 additions & 0 deletions doc/programming/examples/_parse-backtrack.spicy.output
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Automatically generated; do not edit. -- <HASH> printf '\001\002\003\004' | spicy-driver %INPUT; printf '\003\004' | spicy-driver %INPUT/printf '\001\002\003\004' | spicy-driver %INPUT; printf '\003\004' | spicy-driver %INPUT/False
# printf '\001\002\003\004' | spicy-driver backtrack.spicy
[$foo=[$a=1, $b=2], $bar=[$a=3, $b=4]]

# printf '\003\004' | spicy-driver backtrack.spicy
[$foo=[$a=3, $b=(not set)], $bar=[$a=3, $b=4]]
4 changes: 2 additions & 2 deletions doc/programming/examples/_parse-if.spicy.output
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Automatically generated; do not edit. -- <HASH> printf '\01\02\03\04' | spicy-driver %INPUT; printf '\02\02\03\04' | spicy-driver %INPUT/printf '\01\02\03\04' | spicy-driver %INPUT; printf '\02\02\03\04' | spicy-driver %INPUT/False
# printf '\01\02\03\04' | spicy-driver foo.spicy; printf '\02\02\03\04' | spicy-driver foo.spicy
# printf '\01\02\03\04' | spicy-driver foo.spicy
[$a=1, $b=2, $c=(not set), $d=3]

# printf '\01\02\03\04' | spicy-driver foo.spicy; printf '\02\02\03\04' | spicy-driver foo.spicy
# printf '\02\02\03\04' | spicy-driver foo.spicy
[$a=2, $b=(not set), $c=2, $d=3]
Binary file modified doc/programming/examples/_parse-random-access.spicy.output
Binary file not shown.
4 changes: 2 additions & 2 deletions doc/programming/examples/_parse-switch-lhead-2.spicy.output
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Automatically generated; do not edit. -- <HASH> printf 'A ' | spicy-driver %INPUT; printf '\377\377' | spicy-driver %INPUT/printf 'A ' | spicy-driver %INPUT; printf '\377\377' | spicy-driver %INPUT/False
# printf 'A ' | spicy-driver foo.spicy; printf '\377\377' | spicy-driver foo.spicy
# printf 'A ' | spicy-driver foo.spicy
[$a=[$a=b"A"], $b=(not set)]

# printf 'A ' | spicy-driver foo.spicy; printf '\377\377' | spicy-driver foo.spicy
# printf '\377\377' | spicy-driver foo.spicy
[$a=(not set), $b=[$b=65535]]
4 changes: 2 additions & 2 deletions doc/programming/examples/_parse-switch.spicy.output
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Automatically generated; do not edit. -- <HASH> printf 'A\01' | spicy-driver %INPUT; printf 'B\01\02' | spicy-driver %INPUT/printf 'A\01' | spicy-driver %INPUT; printf 'B\01\02' | spicy-driver %INPUT/False
# printf 'A\01' | spicy-driver foo.spicy; printf 'B\01\02' | spicy-driver foo.spicy
# printf 'A\01' | spicy-driver foo.spicy
[$x=b"A", $a8=1, $a16=(not set), $a32=(not set)]

# printf 'A\01' | spicy-driver foo.spicy; printf 'B\01\02' | spicy-driver foo.spicy
# printf 'B\01\02' | spicy-driver foo.spicy
[$x=b"B", $a8=(not set), $a16=258, $a32=(not set)]
2 changes: 1 addition & 1 deletion doc/programming/examples/_parse-unit-params.spicy.output
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Automatically generated; do not edit. -- <HASH> printf '\01\02' | spicy-driver %INPUT/printf '\01\02' | spicy-driver %INPUT/False
# printf '\01\02' | spicy-driver foo.spicy
"Spicy": 1
Spicy: 1
[$y=[$x=1]]
2 changes: 1 addition & 1 deletion doc/programming/examples/_unit-params.spicy.output
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# Automatically generated; do not edit. -- <HASH> printf '\05' | spicy-driver %INPUT/printf '\05' | spicy-driver %INPUT/False
# printf '\05' | spicy-driver foo.spicy
"My multiplied integer": 25
My multiplied integer: 25
60 changes: 60 additions & 0 deletions doc/programming/parsing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,9 @@ It is possible to skip the ``SIZE`` (e.g., ``x: uint8[]``) and instead
use another kind of end conditions to terminate a vector's parsing
loop. To that end, vectors support the following attributes:

``&eod``
Parses elements until the end of the input stream is reached.

``&size=N``
Parses the vector from the subsequent ``N`` bytes of input data.
This effectively limits the available input to the corresponding
Expand Down Expand Up @@ -1213,6 +1216,63 @@ once you have subunits that are recognizable by how they start:
:exec: printf 'A ' | spicy-driver %INPUT; printf '\377\377' | spicy-driver %INPUT
:show-with: foo.spicy

.. _backtracking:

Backtracking
^^^^^^^^^^^^

Spicy supports a simple form of manual backtracking. If a field is
marked with ``&try``, a later call to the unit's ``backtrack()``
method anywhere down in the parse tree originating at that field will
immediately transfer control over to the field following the ``&try``.
When doing so, the data position inside the input stream will be reset
to where it was when the ``&try`` field started its processing. Units
along the original path will be left in whatever state they were at
the time ``backtrack()`` executed (i.e., they will probably remain
just partially initialized). When ``backtrack()`` is called on a path
that involves multiple ``&try`` fields, control continues after the
most recent.

Example:

.. spicy-code:: parse-backtrack.spicy

module Test;

public type test = unit {
foo: Foo &try;
bar: Bar;

on %done { print self; }
};

type Foo = unit {
a: int8 {
if ( $$ != 1 )
self.backtrack();
}
b: int8;
};

type Bar = unit {
a: int8;
b: int8;
};


.. spicy-output:: parse-backtrack.spicy
:exec: printf '\001\002\003\004' | spicy-driver %INPUT; printf '\003\004' | spicy-driver %INPUT
:show-with: backtrack.spicy

``backtrack()`` can be called from inside :ref:`%error hooks
<on_error>`, so this provides a simple form of error recovery
as well.

.. note::

This mechanism is preliminary and will probably see refinement
over time, both in terms of more automated backtracking and by
providing better control where to continue after backtracking.

Changing Input
==============
Expand Down
10 changes: 8 additions & 2 deletions doc/scripts/spicy.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,10 @@ def update(self, source, destination, cmd):
all_good = True
first = True

show_as = []
if self.show_as:
show_as = self.show_as.split(";")

for one_cmd in cmd.split(";"):
one_cmd = one_cmd.strip()

Expand Down Expand Up @@ -418,12 +422,14 @@ def update(self, source, destination, cmd):
out = open(destination, "ab")
out.write(b"\n")

if self.show_as:
one_cmd = "# %s\n" % self.show_as
if show_as:
one_cmd = "# %s\n" % show_as[0].strip()
one_cmd = one_cmd.replace("%INPUT", self.show_with)
output = output.replace(
source.encode(), self.show_with.encode())
out.write(one_cmd.encode())
show_as = show_as[1:]

out.write(output)
out.close()
first = False
Expand Down
3 changes: 3 additions & 0 deletions spicy/lib/spicy_rt.hlt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module spicy_rt {

public type ParseError = exception &cxxname="spicy::rt::ParseError";
public type Backtrack = exception &cxxname="spicy::rt::Backtrack";
public type UnitAlreadyConnected = exception &cxxname="spicy::rt::UnitAlreadyConnected";

# State stored inside a unit to allow connecting it to a sink.
Expand Down Expand Up @@ -67,6 +68,8 @@ declare public bool waitForEod(inout value_ref<stream> data, view<stream> cur, i
declare public bool atEod(inout value_ref<stream> data, view<stream> cur) &cxxname="spicy::rt::detail::atEod" &have_prototype;
declare public bool haveEod(inout value_ref<stream> data, view<stream> cur) &cxxname="spicy::rt::detail::haveEod" &have_prototype;

declare public void backtrack() &cxxname="spicy::rt::detail::backtrack" &have_prototype;

public type BitOrder = enum { LSB0, MSB0 } &cxxname="hilti::rt::integer::BitOrder";
# TODO: Should accept BitOrder instead of enum<*> here.
declare public uint<*> extractBits(uint<*> v, uint<64> lower, uint<64> upper, enum<*> order) &cxxname="hilti::rt::integer::bits" &have_prototype;
Expand Down
17 changes: 17 additions & 0 deletions spicy/runtime/include/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,17 @@ class ParseError : public hilti::rt::UserException {
virtual ~ParseError(); /* required to create vtable, see hilti::rt::Exception */
};

/**
* Exception triggering backtracking to the most recent ``&try``. Derived from
* ``ParseError`` so that if it's not caught, it turns into a regular parsing
* error.
*/
class Backtrack : public ParseError {
public:
Backtrack() : ParseError("backtracking outside of &try scope") {}
virtual ~Backtrack();
};

namespace detail {

/**
Expand Down Expand Up @@ -320,5 +331,11 @@ extern bool atEod(const hilti::rt::ValueReference<hilti::rt::Stream>& data, cons
* @return true if end-of-data has been seen, but not necessarily reached.
*/
extern bool haveEod(const hilti::rt::ValueReference<hilti::rt::Stream>& data, const hilti::rt::stream::View& cur);

/**
* Manually trigger a backtrack operation, reverting back to the most revent &try.
*/
inline void backtrack() { throw Backtrack(); }

} // namespace detail
} // namespace spicy::rt
1 change: 1 addition & 0 deletions spicy/runtime/src/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
using namespace spicy::rt;
using namespace spicy::rt::detail;

HILTI_EXCEPTION_IMPL(Backtrack)
rsmmr marked this conversation as resolved.
Show resolved Hide resolved
HILTI_EXCEPTION_IMPL(ParseError)

void detail::printParserState(const std::string& unit_id, const hilti::rt::ValueReference<hilti::rt::Stream>& data,
Expand Down
14 changes: 14 additions & 0 deletions spicy/toolchain/include/ast/operators/unit.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,18 @@ this method will not do anything.
}
END_METHOD

BEGIN_METHOD(unit, Backtrack)
auto signature() const {
return hilti::operator_::Signature{.self = hilti::type::constant(spicy::type::Unit(type::Wildcard())),
.result = hilti::type::Void(),
.id = "backtrack",
.args = {},
.doc = R"(
Aborts parsing at the current position and returns back to the most recent
``&try`` attribute. Turns into a parse error if there's no ``&try`` in scope.
rsmmr marked this conversation as resolved.
Show resolved Hide resolved
)"};
}
END_METHOD


} // namespace spicy::operator_
5 changes: 1 addition & 4 deletions spicy/toolchain/include/ast/types/unit-items/switch.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,7 @@ class Switch : public hilti::NodeBase, public spicy::trait::isUnitItem {
_hooks_start(_cases_end),
_hooks_end(-1) {}

auto expression() const {
return childs()[0].tryAs<Expression>();
;
}
auto expression() const { return childs()[0].tryReferenceAs<Expression>(); }
Engine engine() const { return _engine; }
auto condition() const { return childs()[1].tryReferenceAs<Expression>(); }
auto cases() const { return childs<switch_::Case>(_cases_start, _cases_end); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,12 @@ class ParserBuilder {
*/
void finalizeUnit(bool success, const Location& l);

/** Prepare for backtracking via ``&try``. */
void initBacktracking();

/** Clean up after potential backtracking via ``&try``. */
void finishBacktracking();

CodeGen* cg() const { return _cg; }
const std::shared_ptr<hilti::Context>& context() const;
const hilti::Options& options() const;
Expand Down
5 changes: 5 additions & 0 deletions spicy/toolchain/src/compiler/codegen/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,11 @@ struct VisitorPassIterate : public hilti::visitor::PreOrder<void, VisitorPassIte
replaceNode(&p, builder::assign(cur, argument(n.op2(), 0)));
}

result_t operator()(const operator_::unit::Backtrack& n, position_t p) {
auto x = builder::call("spicy_rt::backtrack", {});
replaceNode(&p, std::move(x));
}

result_t operator()(const operator_::unit::ConnectFilter& n, position_t p) {
auto x = builder::call("spicy_rt::filter_connect", {n.op0(), argument(n.op2(), 0)});
replaceNode(&p, std::move(x));
Expand Down
5 changes: 3 additions & 2 deletions spicy/toolchain/src/compiler/codegen/grammar-builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ struct Visitor : public hilti::visitor::PreOrder<Production, Visitor> {
const auto& loc = p.node.location();
auto& field = currentField().first;
auto id = cg->uniquer()->get(field.id());
auto eod = AttributeSet::find(field.attributes(), "&eod");
auto count = AttributeSet::find(field.attributes(), "&count");
auto size = AttributeSet::find(field.attributes(), "&size");
auto parse_at = AttributeSet::find(field.attributes(), "&parse-at");
Expand Down Expand Up @@ -97,9 +98,9 @@ struct Visitor : public hilti::visitor::PreOrder<Production, Visitor> {
// Custom input, just iterate until EOD.
return production::ForEach(id, sub, true, loc);

if ( while_ || until || until_including )
if ( while_ || until || until_including || eod )
// The container parsing will evaluate the corresponding stop
// conditon.
// condition as necessary.
return production::ForEach(id, sub, true, loc);

// Nothing specified, use look-ahead to figure out when to stop
Expand Down
24 changes: 24 additions & 0 deletions spicy/toolchain/src/compiler/codegen/parser-builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,9 @@ struct ProductionVisitor
builder()->addAssign(builder::index(__offsets, *field->index()),
builder::tuple({cur_offset, builder::optional(hilti::type::UnsignedInteger(64))}));
}

if ( auto a = AttributeSet::find(field->attributes(), "&try") )
pb->initBacktracking();
}
}

Expand All @@ -535,6 +538,9 @@ struct ProductionVisitor
}
}
else {
if ( auto a = AttributeSet::find(field->attributes(), "&try") )
pb->finishBacktracking();

// We are the field's owner, record offsets and post-process the various attributes.
if ( pb->options().getAuxOption<bool>("spicy.track_offsets", false) ) {
assert(field->index());
Expand Down Expand Up @@ -1412,3 +1418,21 @@ void ParserBuilder::consumeLookAhead(std::optional<Expression> dst) {
builder()->addAssign(state().lahead, look_ahead::None);
advanceInput(state().lahead_end);
}

void ParserBuilder::initBacktracking() {
auto try_cur = builder()->addTmp("try_cur", state().cur);
auto [body, try_] = builder()->addTry();
auto catch_ = try_.addCatch(builder::parameter(ID("e"), builder::typeByID("spicy_rt::Backtrack")));
pushBuilder(catch_, [&]() { builder()->addAssign(state().cur, try_cur); });

auto pstate = state();
pstate.trim = builder::bool_(false);
pushState(std::move(pstate));
pushBuilder(body);
}

void ParserBuilder::finishBacktracking() {
popBuilder();
popState();
trimInput();
}
2 changes: 1 addition & 1 deletion spicy/toolchain/src/compiler/parser/scanner.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ static std::string expandEscapes(Driver* driver, std::string s, spicy::detail::p
address4 ({digits}"."){3}{digits}
address6 ("["({hexs}:){7}{hexs}"]")|("["0x{hexs}({hexs}|:)*"::"({hexs}|:)*"]")|("["({hexs}|:)*"::"({hexs}|:)*"]")|("["({hexs}|:)*"::"({hexs}|:)*({digits}"."){3}{digits}"]")

attribute \&(bit-order|byte-order|chunked|convert|count|cxxname|default|eod|internal|ipv4|ipv6|length|no-emit|nosub|on-heap|optional|originator|parse-at|parse-from|priority|requires|responder|size|static|synchronize|transient|type|until|until-including|while|have_prototype)
attribute \&(bit-order|byte-order|chunked|convert|count|cxxname|default|eod|internal|ipv4|ipv6|length|no-emit|nosub|on-heap|optional|originator|parse-at|parse-from|priority|requires|responder|size|static|synchronize|transient|try|type|until|until-including|while|have_prototype)
blank [ \t]
comment [ \t]*#[^\n]*\n?
digit [0-9]
Expand Down
4 changes: 2 additions & 2 deletions spicy/toolchain/src/compiler/visitors/validator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,8 @@ struct PreTransformVisitor : public hilti::visitor::PreOrder<void, PreTransformV

else if ( a.tag() == "&eod" ) {
if ( auto f = getAttrField(p) ) {
if ( ! f->parseType().isA<type::Bytes>() || f->ctor() )
error("&eod is only valid for bytes fields", p);
if ( ! (f->parseType().isA<type::Bytes>() || f->parseType().isA<type::Vector>()) || f->ctor() )
error("&eod is only valid for bytes and vector fields", p);
}
}

Expand Down
Loading