Skip to content

Commit

Permalink
style: tidying
Browse files Browse the repository at this point in the history
  • Loading branch information
amaanq committed Jul 5, 2023
1 parent 83d56df commit 3b910b6
Show file tree
Hide file tree
Showing 2 changed files with 475 additions and 361 deletions.
239 changes: 123 additions & 116 deletions tree-sitter-markdown-inline/src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,8 @@ typedef enum {

// Determines if a character is punctuation as defined by the markdown spec.
static bool is_punctuation(char chr) {
return
(chr >= '!' && chr <= '/') ||
(chr >= ':' && chr <= '@') ||
(chr >= '[' && chr <= '`') ||
(chr >= '{' && chr <= '~');
return (chr >= '!' && chr <= '/') || (chr >= ':' && chr <= '@') ||
(chr >= '[' && chr <= '`') || (chr >= '{' && chr <= '~');
}

// State bitflags used with `Scanner.state`
Expand All @@ -35,27 +32,30 @@ static const uint8_t STATE_EMPHASIS_DELIMITER_MOD_3 = 0x3;
// Current delimiter run is opening
static const uint8_t STATE_EMPHASIS_DELIMITER_IS_OPEN = 0x1 << 2;

// Convenience function to emit the error token. This is done to stop invalid parse branches.
// Specifically:
// 1. When encountering a newline after a line break that ended a paragraph, and no new block
// Convenience function to emit the error token. This is done to stop invalid
// parse branches. Specifically:
// 1. When encountering a newline after a line break that ended a paragraph, and
// no new block
// has been opened.
// 2. When encountering a new block after a soft line break.
// 3. When a `$._trigger_error` token is valid, which is used to stop parse branches through
// 3. When a `$._trigger_error` token is valid, which is used to stop parse
// branches through
// normal tree-sitter grammar rules.
//
// See also the `$._soft_line_break` and `$._paragraph_end_newline` tokens in grammar.js
// See also the `$._soft_line_break` and `$._paragraph_end_newline` tokens in
// grammar.js
static bool error(TSLexer *lexer) {
lexer->result_symbol = ERROR;
return true;
}

typedef struct {

// Parser state flags
uint8_t state;
uint8_t code_span_delimiter_length;
uint8_t latex_span_delimiter_length;
// The number of characters remaining in the currrent emphasis delimiter run.
// The number of characters remaining in the currrent emphasis delimiter
// run.
uint8_t num_emphasis_delimiters_left;

} Scanner;
Expand Down Expand Up @@ -86,8 +86,11 @@ static void deserialize(Scanner *s, const char *buffer, unsigned length) {
}
}

static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, const bool *valid_symbols,
const char delimiter, const TokenType open_token, const TokenType close_token) {
static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t *delimiter_length,
const bool *valid_symbols,
const char delimiter,
const TokenType open_token,
const TokenType close_token) {
uint8_t level = 0;
while (lexer->lookahead == delimiter) {
lexer->advance(lexer, false);
Expand Down Expand Up @@ -127,24 +130,30 @@ static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, cons
return false;
}

static bool parse_backtick(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
return parse_leaf_delimiter(lexer, &s->code_span_delimiter_length, valid_symbols, '`',
CODE_SPAN_START, CODE_SPAN_CLOSE);
static bool parse_backtick(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
return parse_leaf_delimiter(lexer, &s->code_span_delimiter_length,
valid_symbols, '`', CODE_SPAN_START,
CODE_SPAN_CLOSE);
}

static bool parse_dollar(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
return parse_leaf_delimiter(lexer, &s->latex_span_delimiter_length, valid_symbols, '$',
LATEX_SPAN_START, LATEX_SPAN_CLOSE);
static bool parse_dollar(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
return parse_leaf_delimiter(lexer, &s->latex_span_delimiter_length,
valid_symbols, '$', LATEX_SPAN_START,
LATEX_SPAN_CLOSE);
}

static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
lexer->advance(lexer, false);
// If `num_emphasis_delimiters_left` is not zero then we already decided that this should be
// part of an emphasis delimiter run, so interpret it as such.
// If `num_emphasis_delimiters_left` is not zero then we already decided
// that this should be part of an emphasis delimiter run, so interpret it as
// such.
if (s->num_emphasis_delimiters_left > 0) {
// The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it should be open
// or close.
if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) && valid_symbols[EMPHASIS_OPEN_STAR]) {
// The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
// should be open or close.
if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) &&
valid_symbols[EMPHASIS_OPEN_STAR]) {
s->state &= (~STATE_EMPHASIS_DELIMITER_IS_OPEN);
lexer->result_symbol = EMPHASIS_OPEN_STAR;
s->num_emphasis_delimiters_left--;
Expand All @@ -163,37 +172,32 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
star_count++;
lexer->advance(lexer, false);
}
bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' || lexer->eof(lexer);
if (valid_symbols[EMPHASIS_OPEN_STAR] || valid_symbols[EMPHASIS_CLOSE_STAR]) {
// The desicion made for the first star also counts for all the following stars in the
// delimiter run. Rembemer how many there are.
bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
lexer->eof(lexer);
if (valid_symbols[EMPHASIS_OPEN_STAR] ||
valid_symbols[EMPHASIS_CLOSE_STAR]) {
// The desicion made for the first star also counts for all the
// following stars in the delimiter run. Rembemer how many there are.
s->num_emphasis_delimiters_left = star_count - 1;
// Look ahead to the next symbol (after the last star) to find out if it is whitespace
// punctuation or other.
bool next_symbol_whitespace = line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
// Look ahead to the next symbol (after the last star) to find out if it
// is whitespace punctuation or other.
bool next_symbol_whitespace =
line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
// Information about the last token is in valid_symbols. See grammar.js for these
// tokens for how this is done.
if (
valid_symbols[EMPHASIS_CLOSE_STAR] &&
!valid_symbols[LAST_TOKEN_WHITESPACE] && (
!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
next_symbol_punctuation ||
next_symbol_whitespace
)
) {
// Information about the last token is in valid_symbols. See grammar.js
// for these tokens for how this is done.
if (valid_symbols[EMPHASIS_CLOSE_STAR] &&
!valid_symbols[LAST_TOKEN_WHITESPACE] &&
(!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
next_symbol_punctuation || next_symbol_whitespace)) {
// Closing delimiters take precedence
s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = EMPHASIS_CLOSE_STAR;
return true;
}
if (
!next_symbol_whitespace && (
!next_symbol_punctuation ||
valid_symbols[LAST_TOKEN_PUNCTUATION] ||
valid_symbols[LAST_TOKEN_WHITESPACE]
)
) {
if (!next_symbol_whitespace && (!next_symbol_punctuation ||
valid_symbols[LAST_TOKEN_PUNCTUATION] ||
valid_symbols[LAST_TOKEN_WHITESPACE])) {
s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = EMPHASIS_OPEN_STAR;
return true;
Expand All @@ -204,12 +208,14 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {

static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
lexer->advance(lexer, false);
// If `num_emphasis_delimiters_left` is not zero then we already decided that this should be
// part of an emphasis delimiter run, so interpret it as such.
// If `num_emphasis_delimiters_left` is not zero then we already decided
// that this should be part of an emphasis delimiter run, so interpret it as
// such.
if (s->num_emphasis_delimiters_left > 0) {
// The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it should be open
// or close.
if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) && valid_symbols[STRIKETHROUGH_OPEN]) {
// The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
// should be open or close.
if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) &&
valid_symbols[STRIKETHROUGH_OPEN]) {
s->state &= (~STATE_EMPHASIS_DELIMITER_IS_OPEN);
lexer->result_symbol = STRIKETHROUGH_OPEN;
s->num_emphasis_delimiters_left--;
Expand All @@ -228,37 +234,32 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
star_count++;
lexer->advance(lexer, false);
}
bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' || lexer->eof(lexer);
if (valid_symbols[STRIKETHROUGH_OPEN] || valid_symbols[STRIKETHROUGH_CLOSE]) {
// The desicion made for the first star also counts for all the following stars in the
// delimiter run. Rembemer how many there are.
bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
lexer->eof(lexer);
if (valid_symbols[STRIKETHROUGH_OPEN] ||
valid_symbols[STRIKETHROUGH_CLOSE]) {
// The desicion made for the first star also counts for all the
// following stars in the delimiter run. Rembemer how many there are.
s->num_emphasis_delimiters_left = star_count - 1;
// Look ahead to the next symbol (after the last star) to find out if it is whitespace
// punctuation or other.
bool next_symbol_whitespace = line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
// Look ahead to the next symbol (after the last star) to find out if it
// is whitespace punctuation or other.
bool next_symbol_whitespace =
line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
// Information about the last token is in valid_symbols. See grammar.js for these
// tokens for how this is done.
if (
valid_symbols[STRIKETHROUGH_CLOSE] &&
!valid_symbols[LAST_TOKEN_WHITESPACE] && (
!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
next_symbol_punctuation ||
next_symbol_whitespace
)
) {
// Information about the last token is in valid_symbols. See grammar.js
// for these tokens for how this is done.
if (valid_symbols[STRIKETHROUGH_CLOSE] &&
!valid_symbols[LAST_TOKEN_WHITESPACE] &&
(!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
next_symbol_punctuation || next_symbol_whitespace)) {
// Closing delimiters take precedence
s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = STRIKETHROUGH_CLOSE;
return true;
}
if (
!next_symbol_whitespace && (
!next_symbol_punctuation ||
valid_symbols[LAST_TOKEN_PUNCTUATION] ||
valid_symbols[LAST_TOKEN_WHITESPACE]
)
) {
if (!next_symbol_whitespace && (!next_symbol_punctuation ||
valid_symbols[LAST_TOKEN_PUNCTUATION] ||
valid_symbols[LAST_TOKEN_WHITESPACE])) {
s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = STRIKETHROUGH_OPEN;
return true;
Expand All @@ -267,14 +268,17 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
return false;
}

static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
static bool parse_underscore(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
lexer->advance(lexer, false);
// If `num_emphasis_delimiters_left` is not zero then we already decided that this should be
// part of an emphasis delimiter run, so interpret it as such.
// If `num_emphasis_delimiters_left` is not zero then we already decided
// that this should be part of an emphasis delimiter run, so interpret it as
// such.
if (s->num_emphasis_delimiters_left > 0) {
// The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it should be open
// or close.
if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) && valid_symbols[EMPHASIS_OPEN_UNDERSCORE]) {
// The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
// should be open or close.
if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) &&
valid_symbols[EMPHASIS_OPEN_UNDERSCORE]) {
lexer->result_symbol = EMPHASIS_OPEN_UNDERSCORE;
s->num_emphasis_delimiters_left--;
return true;
Expand All @@ -292,21 +296,30 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
underscore_count++;
lexer->advance(lexer, false);
}
bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' || lexer->eof(lexer);
if (valid_symbols[EMPHASIS_OPEN_UNDERSCORE] || valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
lexer->eof(lexer);
if (valid_symbols[EMPHASIS_OPEN_UNDERSCORE] ||
valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
s->num_emphasis_delimiters_left = underscore_count - 1;
bool next_symbol_whitespace = line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
bool next_symbol_whitespace =
line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
bool right_flanking = !valid_symbols[LAST_TOKEN_WHITESPACE] &&
(!valid_symbols[LAST_TOKEN_PUNCTUATION] || next_symbol_punctuation || next_symbol_whitespace);
bool left_flanking = !next_symbol_whitespace &&
(!next_symbol_punctuation || valid_symbols[LAST_TOKEN_PUNCTUATION] || valid_symbols[LAST_TOKEN_WHITESPACE]);
if (valid_symbols[EMPHASIS_CLOSE_UNDERSCORE] && right_flanking && (!left_flanking || next_symbol_punctuation)) {
bool right_flanking =
!valid_symbols[LAST_TOKEN_WHITESPACE] &&
(!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
next_symbol_punctuation || next_symbol_whitespace);
bool left_flanking =
!next_symbol_whitespace && (!next_symbol_punctuation ||
valid_symbols[LAST_TOKEN_PUNCTUATION] ||
valid_symbols[LAST_TOKEN_WHITESPACE]);
if (valid_symbols[EMPHASIS_CLOSE_UNDERSCORE] && right_flanking &&
(!left_flanking || next_symbol_punctuation)) {
s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = EMPHASIS_CLOSE_UNDERSCORE;
return true;
}
if (left_flanking && (!right_flanking || valid_symbols[LAST_TOKEN_PUNCTUATION])) {
if (left_flanking &&
(!right_flanking || valid_symbols[LAST_TOKEN_PUNCTUATION])) {
s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = EMPHASIS_OPEN_UNDERSCORE;
return true;
Expand All @@ -316,25 +329,26 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
}

static bool scan(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
// A normal tree-sitter rule decided that the current branch is invalid and now "requests"
// an error to stop the branch
// A normal tree-sitter rule decided that the current branch is invalid and
// now "requests" an error to stop the branch
if (valid_symbols[TRIGGER_ERROR]) {
return error(lexer);
}

// Decide which tokens to consider based on the first non-whitespace character
// Decide which tokens to consider based on the first non-whitespace
// character
switch (lexer->lookahead) {
case '`':
// A backtick could mark the beginning or ending of a code span or a fenced
// code block.
// A backtick could mark the beginning or ending of a code span or a
// fenced code block.
return parse_backtick(s, lexer, valid_symbols);
case '$':
return parse_dollar(s,lexer, valid_symbols);
return parse_dollar(s, lexer, valid_symbols);
case '*':
// A star could either mark the beginning or ending of emphasis, a list item or
// thematic break.
// This code is similar to the code for '_' and '+'.
return parse_star(s,lexer, valid_symbols);
// A star could either mark the beginning or ending of emphasis, a
// list item or thematic break. This code is similar to the code for
// '_' and '+'.
return parse_star(s, lexer, valid_symbols);
case '_':
return parse_underscore(s, lexer, valid_symbols);
case '~':
Expand All @@ -350,27 +364,20 @@ void *tree_sitter_markdown_inline_external_scanner_create() {
}

bool tree_sitter_markdown_inline_external_scanner_scan(
void *payload,
TSLexer *lexer,
const bool *valid_symbols
) {
void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload;
return scan(scanner, lexer, valid_symbols);
}

unsigned tree_sitter_markdown_inline_external_scanner_serialize(
void *payload,
char* buffer
) {
unsigned tree_sitter_markdown_inline_external_scanner_serialize(void *payload,
char *buffer) {
Scanner *scanner = (Scanner *)payload;
return serialize(scanner, buffer);
}

void tree_sitter_markdown_inline_external_scanner_deserialize(
void *payload,
char* buffer,
unsigned length
) {
void tree_sitter_markdown_inline_external_scanner_deserialize(void *payload,
char *buffer,
unsigned length) {
Scanner *scanner = (Scanner *)payload;
deserialize(scanner, buffer, length);
}
Expand Down
Loading

0 comments on commit 3b910b6

Please sign in to comment.