Skip to content

Commit

Permalink
refactor: fix some implicit casts, bug in push_block, & make global v…
Browse files Browse the repository at this point in the history
…ariables const/static
  • Loading branch information
amaanq committed Jul 5, 2023
1 parent 936cc84 commit 83d56df
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 120 deletions.
87 changes: 42 additions & 45 deletions tree-sitter-markdown-inline/src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,20 @@ typedef enum {
} TokenType;

// Determines if a character is punctuation as defined by the markdown spec.
static bool is_punctuation(char c) {
static bool is_punctuation(char chr) {
return
(c >= '!' && c <= '/') ||
(c >= ':' && c <= '@') ||
(c >= '[' && c <= '`') ||
(c >= '{' && c <= '~');
}

// Determines if a character is ascii whitespace as defined by the markdown spec.
static bool is_whitespace(char c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
(chr >= '!' && chr <= '/') ||
(chr >= ':' && chr <= '@') ||
(chr >= '[' && chr <= '`') ||
(chr >= '{' && chr <= '~');
}

// State bitflags used with `Scanner.state`

// TODO
const uint8_t STATE_EMPHASIS_DELIMITER_MOD_3 = 0x3;
static const uint8_t STATE_EMPHASIS_DELIMITER_MOD_3 = 0x3;
// Current delimiter run is opening
const uint8_t STATE_EMPHASIS_DELIMITER_IS_OPEN = 0x1 << 2;
static const uint8_t STATE_EMPHASIS_DELIMITER_IS_OPEN = 0x1 << 2;

// Convenience function to emit the error token. This is done to stop invalid parse branches.
// Specifically:
Expand Down Expand Up @@ -67,12 +62,12 @@ typedef struct {

// Write the whole state of a Scanner to a byte buffer
static unsigned serialize(Scanner *s, char *buffer) {
size_t i = 0;
buffer[i++] = s->state;
buffer[i++] = s->code_span_delimiter_length;
buffer[i++] = s->latex_span_delimiter_length;
buffer[i++] = s->num_emphasis_delimiters_left;
return i;
unsigned size = 0;
buffer[size++] = (char)s->state;
buffer[size++] = (char)s->code_span_delimiter_length;
buffer[size++] = (char)s->latex_span_delimiter_length;
buffer[size++] = (char)s->num_emphasis_delimiters_left;
return size;
}

// Read the whole state of a Scanner from a byte buffer
Expand All @@ -83,17 +78,17 @@ static void deserialize(Scanner *s, const char *buffer, unsigned length) {
s->latex_span_delimiter_length = 0;
s->num_emphasis_delimiters_left = 0;
if (length > 0) {
size_t i = 0;
s->state = buffer[i++];
s->code_span_delimiter_length = buffer[i++];
s->latex_span_delimiter_length = buffer[i++];
s->num_emphasis_delimiters_left = buffer[i++];
size_t size = 0;
s->state = (uint8_t)buffer[size++];
s->code_span_delimiter_length = (uint8_t)buffer[size++];
s->latex_span_delimiter_length = (uint8_t)buffer[size++];
s->num_emphasis_delimiters_left = (uint8_t)buffer[size++];
}
}

static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, const bool *valid_symbols,
const char delimiter, const TokenType open_token, const TokenType close_token) {
size_t level = 0;
uint8_t level = 0;
while (lexer->lookahead == delimiter) {
lexer->advance(lexer, false);
level++;
Expand All @@ -103,7 +98,8 @@ static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, cons
*delimiter_length = 0;
lexer->result_symbol = close_token;
return true;
} else if (valid_symbols[open_token]) {
}
if (valid_symbols[open_token]) {
// Parse ahead to check if there is a closing delimiter
size_t close_level = 0;
while (!lexer->eof(lexer)) {
Expand All @@ -113,17 +109,17 @@ static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, cons
if (close_level == level) {
// Found a matching delimiter
break;
} else {
close_level = 0;
}
close_level = 0;
}
lexer->advance(lexer, false);
}
if (close_level == level) {
*delimiter_length = level;
lexer->result_symbol = open_token;
return true;
} else if (valid_symbols[UNCLOSED_SPAN]) {
}
if (valid_symbols[UNCLOSED_SPAN]) {
lexer->result_symbol = UNCLOSED_SPAN;
return true;
}
Expand Down Expand Up @@ -153,15 +149,16 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
lexer->result_symbol = EMPHASIS_OPEN_STAR;
s->num_emphasis_delimiters_left--;
return true;
} else if (valid_symbols[EMPHASIS_CLOSE_STAR]) {
}
if (valid_symbols[EMPHASIS_CLOSE_STAR]) {
lexer->result_symbol = EMPHASIS_CLOSE_STAR;
s->num_emphasis_delimiters_left--;
return true;
}
}
lexer->mark_end(lexer);
// Otherwise count the number of stars
size_t star_count = 1;
uint8_t star_count = 1;
while (lexer->lookahead == '*') {
star_count++;
lexer->advance(lexer, false);
Expand All @@ -174,7 +171,7 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
// Look ahead to the next symbol (after the last star) to find out if it is whitespace
// punctuation or other.
bool next_symbol_whitespace = line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
bool next_symbol_punctuation = is_punctuation(lexer->lookahead);
bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
// Information about the last token is in valid_symbols. See grammar.js for these
// tokens for how this is done.
if (
Expand All @@ -189,7 +186,8 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = EMPHASIS_CLOSE_STAR;
return true;
} else if (
}
if (
!next_symbol_whitespace && (
!next_symbol_punctuation ||
valid_symbols[LAST_TOKEN_PUNCTUATION] ||
Expand All @@ -216,15 +214,16 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
lexer->result_symbol = STRIKETHROUGH_OPEN;
s->num_emphasis_delimiters_left--;
return true;
} else if (valid_symbols[STRIKETHROUGH_CLOSE]) {
}
if (valid_symbols[STRIKETHROUGH_CLOSE]) {
lexer->result_symbol = STRIKETHROUGH_CLOSE;
s->num_emphasis_delimiters_left--;
return true;
}
}
lexer->mark_end(lexer);
// Otherwise count the number of tildes
size_t star_count = 1;
uint8_t star_count = 1;
while (lexer->lookahead == '~') {
star_count++;
lexer->advance(lexer, false);
Expand All @@ -237,7 +236,7 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
// Look ahead to the next symbol (after the last star) to find out if it is whitespace
// punctuation or other.
bool next_symbol_whitespace = line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
bool next_symbol_punctuation = is_punctuation(lexer->lookahead);
bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
// Information about the last token is in valid_symbols. See grammar.js for these
// tokens for how this is done.
if (
Expand All @@ -252,7 +251,8 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = STRIKETHROUGH_CLOSE;
return true;
} else if (
}
if (
!next_symbol_whitespace && (
!next_symbol_punctuation ||
valid_symbols[LAST_TOKEN_PUNCTUATION] ||
Expand All @@ -278,15 +278,16 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
lexer->result_symbol = EMPHASIS_OPEN_UNDERSCORE;
s->num_emphasis_delimiters_left--;
return true;
} else if (valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
}
if (valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
lexer->result_symbol = EMPHASIS_CLOSE_UNDERSCORE;
s->num_emphasis_delimiters_left--;
return true;
}
}
lexer->mark_end(lexer);
// Otherwise count the number of stars
size_t underscore_count = 1;
uint8_t underscore_count = 1;
while (lexer->lookahead == '_') {
underscore_count++;
lexer->advance(lexer, false);
Expand All @@ -295,7 +296,7 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
if (valid_symbols[EMPHASIS_OPEN_UNDERSCORE] || valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
s->num_emphasis_delimiters_left = underscore_count - 1;
bool next_symbol_whitespace = line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
bool next_symbol_punctuation = is_punctuation(lexer->lookahead);
bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
bool right_flanking = !valid_symbols[LAST_TOKEN_WHITESPACE] &&
(!valid_symbols[LAST_TOKEN_PUNCTUATION] || next_symbol_punctuation || next_symbol_whitespace);
bool left_flanking = !next_symbol_whitespace &&
Expand All @@ -304,7 +305,8 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = EMPHASIS_CLOSE_UNDERSCORE;
return true;
} else if (left_flanking && (!right_flanking || valid_symbols[LAST_TOKEN_PUNCTUATION])) {
}
if (left_flanking && (!right_flanking || valid_symbols[LAST_TOKEN_PUNCTUATION])) {
s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = EMPHASIS_OPEN_UNDERSCORE;
return true;
Expand All @@ -326,22 +328,17 @@ static bool scan(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
// A backtick could mark the beginning or ending of a code span or a fenced
// code block.
return parse_backtick(s, lexer, valid_symbols);
break;
case '$':
return parse_dollar(s,lexer, valid_symbols);
break;
case '*':
// A star could either mark the beginning or ending of emphasis, a list item or
// thematic break.
// This code is similar to the code for '_' and '+'.
return parse_star(s,lexer, valid_symbols);
break;
case '_':
return parse_underscore(s, lexer, valid_symbols);
break;
case '~':
return parse_tilde(s, lexer, valid_symbols);
break;
}
return false;
}
Expand Down
Loading

0 comments on commit 83d56df

Please sign in to comment.