Skip to content

Commit

Permalink
Merge branch 'unstable' into ik/funAsSeq2
Browse files Browse the repository at this point in the history
  • Loading branch information
konnov authored Mar 9, 2022
2 parents 1dc6558 + 386c1c8 commit 2aa752d
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 26 deletions.
3 changes: 2 additions & 1 deletion UNRELEASED.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@

### Bug fixes
* Fixed bug where TLA+ `LAMBDA`s wouldn't inline outside `Fold` and `MkSeq`, see #1446
* Fix the failing property-based test, see #1454
* Fix the comment preprocessor to extract annotations after a linefeed, see #1456
* Fix the failing property-based test, see #1454
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,17 @@ package at.forsyte.apalache.io.annotations.parser
* respect the structure of the comments, but it may return ill-formed annotations, if they are ill-formed in the
* original text. This preprocessor prunes the comment tokens, even in arguably valid cases (e.g., inside a string).
* Although it is usually not a good idea to write an ad-hoc lexer, it is not obvious to me, how to use a lexer
* generator here. However, if someone knows how to write this preprocessor with a lexer generator, it would be great!
* </p>
* generator here. However, if someone knows how to write this preprocessor with a lexer generator, let me know.
* Perhaps, the first step would be to write a precise grammar for this lexer, instead of write the spaghetti of
* if-then-else expressions, as we have in this preprocessor.</p>
*
* <p>See the annotations HOWTO: https://apalache.informal.systems/docs/HOWTOs/howto-write-type-annotations.html</p>
*
* @author
* Igor Konnov
*/
class CommentPreprocessor {
private val tokenRegex = """(\n|\\\*|\(\*|\*\)|[ \t\n\r]@[a-zA-Z_][a-zA-Z0-9_]*|\)|;|")""".r
private val tokenRegex = """(\n|\\\*|\(\*|\*\)|@[a-zA-Z_][a-zA-Z0-9_]*|\)|;|")""".r

// internal state of the preprocessor
private case class State(
Expand Down Expand Up @@ -64,7 +65,7 @@ class CommentPreprocessor {
if (!inOneLineComment) {
this.copy(multiCommentLevel = multiCommentLevel + 1)
} else {
// a multi-level comment is ignored inside a multi-level comment
// a multi-level comment is ignored inside a single-line comment
// \* ... (* ....
this
}
Expand Down Expand Up @@ -106,6 +107,7 @@ class CommentPreprocessor {
val matchIterator = tokenRegex.findAllIn(text)
while (matchIterator.hasNext) {
val group = matchIterator.next()
val isGoodStartForAnnotation = isValidCharBeforeAnnotation(text, matchIterator.start)
if (matchIterator.start > lastIndexOfUnmatchedText && !state.isExcludingText) {
// add the text between the last match and this match
val fragment = text.substring(lastIndexOfUnmatchedText, matchIterator.start)
Expand All @@ -124,21 +126,26 @@ class CommentPreprocessor {
// look one character ahead to figure out the annotation type
val lookaheadChar = if (lastIndexOfUnmatchedText < text.length) Some(text(lastIndexOfUnmatchedText)) else None
// figure out whether we have met an annotation or not
if (!state.inAnnotation && !state.inString && group.startsWith(" @")) {
if (lookaheadChar.contains(':') || lookaheadChar.contains('(')) {
// Advance the last unmatched index beyond "@annotation:" or "@annotation(".
lastIndexOfUnmatchedText += 1
} else if (isEndOfParameterlessAnnotation(lookaheadChar)) {
// Add this annotation immediately.
// Do not advance the index in case of "@annotation " or similar.
annotations = annotations :+ group.trim
} else {
if (!state.inAnnotation && !state.inString && group.startsWith("@")) {
if (!isGoodStartForAnnotation) {
// this is not an annotation, push it back to the free text
textBuilder.append(group)
} else {
if (lookaheadChar.contains(':') || lookaheadChar.contains('(')) {
// Advance the last unmatched index beyond "@annotation:" or "@annotation(".
lastIndexOfUnmatchedText += 1
} else if (isEndOfParameterlessAnnotation(lookaheadChar)) {
// Add this annotation immediately.
// Do not advance the index in case of "@annotation " or similar.
annotations = annotations :+ group.trim
} else {
// this is not an annotation, push it back to the free text
textBuilder.append(group)
}
}
}

val nextState = getNextState(state, group, lookaheadChar)
val nextState = getNextState(state, group, lookaheadChar, isGoodStartForAnnotation)

(state.inAnnotation, nextState.inAnnotation) match {
case (false, false) => ()
Expand Down Expand Up @@ -178,7 +185,21 @@ class CommentPreprocessor {
(textBuilder.toString(), annotations)
}

private def getNextState(state: State, group: String, lookaheadChar: Option[Char]): State = {
private def isValidCharBeforeAnnotation(text: String, index: Int): Boolean = {
if (index == 0) {
true
} else {
val char = text(index - 1)
// @foo is written either after a whitespace (to exclude emails), or after the comment token, e.g., (* or \*
char.isWhitespace || char == '*'
}
}

private def getNextState(
state: State,
group: String,
lookaheadChar: Option[Char],
isGoodStartForAnnotation: Boolean): State = {
group match {
case "\\*" =>
state.enterOneLineComment()
Expand Down Expand Up @@ -225,7 +246,7 @@ class CommentPreprocessor {
}

case _ =>
if (!group.startsWith(" @")) {
if (!group.startsWith("@") || !isGoodStartForAnnotation) {
state
} else {
if (lookaheadChar.contains('(') && !state.inAnnotation) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,14 @@ class TestCommentPreprocessor extends AnyFunSuite with Checkers {
|""".stripMargin
val (output, potentialAnnotations) = CommentPreprocessor()(input)
val expected =
""" (aaa)
""" (aaa)
| not an annotation: john@example.org
| xxx
|ddd
| xxx
| ddd
| zzz@bbb(1)
|""".stripMargin
assert(output == expected)
assert(potentialAnnotations == List("@annotation(\"a\", 1)", "@semi: foo ;", "@multi: aaa bbb ccc;"))
assert(output == expected)
}

test("unclosed annotations") {
Expand All @@ -95,7 +95,7 @@ class TestCommentPreprocessor extends AnyFunSuite with Checkers {
|""".stripMargin
val (output, potentialAnnotations) = CommentPreprocessor()(input)
val expected =
""" xx: bar
""" xx : bar
|""".stripMargin
assert(output == expected)
// The extracted annotation is ill-formed. This will be detected by the annotation parser.
Expand All @@ -113,10 +113,10 @@ class TestCommentPreprocessor extends AnyFunSuite with Checkers {
|""".stripMargin
val (output, potentialAnnotations) = CommentPreprocessor()(input)
val expected =
""" (aaa)
|
| type annotation
|
""" (aaa)
|
| type annotation
|
|""".stripMargin
assert(output == expected)
assert(potentialAnnotations.size == 4)
Expand All @@ -126,6 +126,28 @@ class TestCommentPreprocessor extends AnyFunSuite with Checkers {
assert(potentialAnnotations(3) == "@type: a => Int ;")
}

test("annotation starting with @") {
// Regression: https://github.com/informalsystems/apalache/issues/1304
val input =
"""(*
|@type: Int;
|""".stripMargin
val (output, potentialAnnotations) = CommentPreprocessor()(input)
assert(output.trim.isEmpty)
assert(potentialAnnotations.size == 1)
assert(potentialAnnotations.head == "@type: Int;")
}

test("annotation in the very beginning") {
// Regression: https://github.com/informalsystems/apalache/issues/1304
val input =
"""\*@type: Int;"""
val (output, potentialAnnotations) = CommentPreprocessor()(input)
assert(output.trim.isEmpty)
assert(potentialAnnotations.size == 1)
assert(potentialAnnotations.head == "@type: Int;")
}

test("Single-line comment only") {
hasAnnotationsWhenNonEmpty("""\*""")
}
Expand Down

0 comments on commit 2aa752d

Please sign in to comment.