Skip to content

Commit

Permalink
Quarto: new parser
Browse files Browse the repository at this point in the history
This parser is based on the RMarkdown parser.

The way of extracting chunk labels is enhanced for Quarto.

A. recognizing unexecuted blocks like

       ```{{python}}
       ...
       ```
   Quarto makes an anonymous tag for the code block.

B. recognizing " label: " in code blocks like

       ```{python}
       #| label: optimization-techniques
       ...
       ```
   Quarto extracts "optimization-techniques" as a tag.

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
  • Loading branch information
masatake committed Mar 25, 2023
1 parent 81b1d82 commit 94d623f
Show file tree
Hide file tree
Showing 13 changed files with 260 additions and 0 deletions.
1 change: 1 addition & 0 deletions Tmain/list-subparsers-all.d/stdout-expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ OpenAPI Yaml base <> sub {bidirectional}
PlistXML XML base <> sub {bidirectional}
PythonLoggingConfig Iniconf base <> sub {bidirectional}
QtMoc C++ base <> sub {bidirectional}
Quarto Markdown base <= sub {dedicated}
R6Class R base <> sub {bidirectional}
RMarkdown Markdown base <= sub {dedicated}
RSpec Ruby base => sub {shared}
Expand Down
3 changes: 3 additions & 0 deletions Units/parser-quarto.r/simple.d/args.ctags
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--sort=no
--extras=+{guest}
--fields=+{end}{line}{language}
2 changes: 2 additions & 0 deletions Units/parser-quarto.r/simple.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
defun input.qmd /^#| label: defun$/;" l line:2 language:Quarto end:6
f input.qmd /^def f():$/;" f line:4 language:Python end:5
7 changes: 7 additions & 0 deletions Units/parser-quarto.r/simple.d/input.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
```{python}
#| label: defun
#| fig-cap: "Define a function"
def f():
pass
```

3 changes: 3 additions & 0 deletions Units/parser-quarto.r/unexecuted-block.d/args.ctags
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--sort=no
--extras=+{guest}
--fields=+{end}{line}{language}
4 changes: 4 additions & 0 deletions Units/parser-quarto.r/unexecuted-block.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
__anon53b260bc0100 input.qmd /^```{python}$/;" l line:1 language:Quarto end:4
__anon53b260bc0200 input.qmd /^```{{python}}$/;" l line:6 language:Quarto end:9
f input.qmd /^def f():$/;" f line:2 language:Python end:3
g input.qmd /^def g():$/;" f line:7 language:Python end:8
9 changes: 9 additions & 0 deletions Units/parser-quarto.r/unexecuted-block.d/input.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
```{python}
def f():
pass
```

```{{python}}
def g():
pass
```
1 change: 1 addition & 0 deletions docs/news.rst
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,7 @@ The following parsers have been added:
* PythonLoggingConfig
* QemuHX *optlib*
* QtMoc
* Quarto *Markdown based subparser*
* R
* R6Class *R based subparser*
* Rake *Ruby based subparser*
Expand Down
1 change: 1 addition & 0 deletions main/parsers_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@
PythonLoggingConfigParser, \
QemuHXParser, \
QtMocParser, \
QuartoParser, \
RMarkdownParser, \
RParser, \
RakeParser, \
Expand Down
224 changes: 224 additions & 0 deletions parsers/quarto.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
/*
*
* Copyright (c) 2023, Masatake YAMATO
*
* This source code is released for free distribution under the terms of the
* GNU General Public License version 2 or (at your option) any later version.
*
* This module contains functions for generating tags for Quarto files.
* https://quarto.org/docs/guide/
* https://quarto.org/docs/reference/
* https://www.jaysong.net/RBook/quarto.html#sec-quarto-howtouse <Japanese>
*
*/

/*
* INCLUDE FILES
*/
#include "general.h" /* must always come first */
#include "markdown.h"

#include "entry.h"
#include "parse.h"
#include "read.h"

#include <ctype.h>
#include <string.h>

/*
* DATA DEFINITIONS
*/
typedef enum {
K_CHUNK_LABEL = 0,
} quartoKind;

static kindDefinition QuartoKinds[] = {
{ true, 'l', "chunklabel", "chunk labels"},
};

struct sQuartoSubparser {
markdownSubparser markdown;
int lastChunkLabel;
};

/*
* FUNCTION DEFINITIONS
*/

static void findQuartoTags (void)
{
scheduleRunningBaseparser (0);
}

#define skip_space(CP) while (*CP == ' ' || *CP == '\t') CP++;

static int makeQuartoTag (vString *name, int kindIndex, bool anonymous)
{
tagEntryInfo e;
initTagEntry (&e, vStringValue (name), kindIndex);
if (anonymous)
markTagExtraBit (&e, XTAG_ANONYMOUS);
return makeTagEntry (&e);
}

static bool extractLanguageForCodeBlock (markdownSubparser *s,
const char *langMarker,
vString *langName)
{
struct sQuartoSubparser *quarto = (struct sQuartoSubparser *)s;
const char *cp = langMarker;
bool unexecutedBlock = false;

if (*cp != '{')
return false;
cp++;

/* Handle unexecuted blocks like ```{{python}} */
if (*cp == '{') {
unexecutedBlock = true;
cp++;
}

const char *end = strpbrk(cp, " \t,}");
if (!end)
return false;

if (end - cp == 0)
return false;

vStringNCatS (langName, cp, end - cp);

if (unexecutedBlock) {
end = strpbrk(cp, " \t,}");
if (!end)
{
vStringClear (langName);
return false;
}
}

cp = end;
if (*cp == ',' || *cp == '}')
{
vString *name = anonGenerateNew("__anon", K_CHUNK_LABEL);
quarto->lastChunkLabel = makeQuartoTag (name,
K_CHUNK_LABEL,
true);
vStringDelete (name);
return true;
}

skip_space(cp);

vString *chunk_label = vStringNew ();
bool anonymous = false;
while (isalnum((unsigned char)*cp) || *cp == '-')
vStringPut (chunk_label, *cp++);

if (vStringLength (chunk_label) == 0)
{
anonGenerate (chunk_label, "__anon", K_CHUNK_LABEL);
anonymous = true;
}

skip_space(cp);
if (*cp == ',' || *cp == '}')
quarto->lastChunkLabel = makeQuartoTag (chunk_label,
K_CHUNK_LABEL,
anonymous);

vStringDelete (chunk_label);
return true;
}

static void notifyCodeBlockLine (markdownSubparser *s,
const unsigned char *line)
{
struct sQuartoSubparser *quarto = (struct sQuartoSubparser *)s;

if (strncmp ((const char *)line, "#| ", 3))
return;

line += 3;
skip_space (line);

if (strncmp ((const char *)line, "label:", 6))
return;

line += 6;
skip_space (line);

if (!*line)
return;

vString *label = vStringNewInit ((const char *)line);
vStringStripTrailing (label);
if (!vStringIsEmpty (label))
{
/* If an anonymous tag is made for the label of this code chunk,
* it becomes unnecessary; the real one can be made from
* "#! label: ..." */
if (quarto->lastChunkLabel != CORK_NIL)
{
tagEntryInfo *e = getEntryInCorkQueue (quarto->lastChunkLabel);
if (e && isTagExtraBitMarked (e, XTAG_ANONYMOUS))
markTagAsPlaceholder (e, true);
}

quarto->lastChunkLabel = makeQuartoTag (label,
K_CHUNK_LABEL,
false);
}
vStringDelete (label);
}

static void notifyEndOfCodeBlock (markdownSubparser *s)
{
struct sQuartoSubparser *quarto = (struct sQuartoSubparser *)s;

if (quarto->lastChunkLabel == CORK_NIL)
return;

tagEntryInfo *e = getEntryInCorkQueue (quarto->lastChunkLabel);
if (e)
e->extensionFields.endLine = getInputLineNumber ();

quarto->lastChunkLabel = CORK_NIL;
}

static void inputStart (subparser *s)
{
struct sQuartoSubparser *quatro = (struct sQuartoSubparser*)s;

quatro->lastChunkLabel = CORK_NIL;
}

extern parserDefinition* QuartoParser (void)
{
static const char *const extensions [] = { "qmd", NULL };
static struct sQuartoSubparser quartoSubparser = {
.markdown = {
.subparser = {
.direction = SUBPARSER_SUB_RUNS_BASE,
.inputStart = inputStart,
},
.extractLanguageForCodeBlock = extractLanguageForCodeBlock,
.notifyCodeBlockLine = notifyCodeBlockLine,
.notifyEndOfCodeBlock = notifyEndOfCodeBlock,
},
};
static parserDependency dependencies [] = {
[0] = { DEPTYPE_SUBPARSER, "Markdown", &quartoSubparser },
};

parserDefinition* const def = parserNew ("Quarto");


def->dependencies = dependencies;
def->dependencyCount = ARRAY_SIZE(dependencies);
def->kindTable = QuartoKinds;
def->kindCount = ARRAY_SIZE (QuartoKinds);
def->extensions = extensions;
def->parser = findQuartoTags;
return def;
}
1 change: 1 addition & 0 deletions source.mak
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ PARSER_SRCS = \
parsers/protobuf.c \
parsers/python.c \
parsers/pythonloggingconfig.c \
parsers/quarto.c \
parsers/r-r6class.c \
parsers/r-s4class.c \
parsers/r.c \
Expand Down
1 change: 1 addition & 0 deletions win32/ctags_vs2013.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@
<ClCompile Include="..\parsers\protobuf.c" />
<ClCompile Include="..\parsers\python.c" />
<ClCompile Include="..\parsers\pythonloggingconfig.c" />
<ClCompile Include="..\parsers\quarto.c" />
<ClCompile Include="..\parsers\r-r6class.c" />
<ClCompile Include="..\parsers\r-s4class.c" />
<ClCompile Include="..\parsers\r.c" />
Expand Down
3 changes: 3 additions & 0 deletions win32/ctags_vs2013.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,9 @@
<ClCompile Include="..\parsers\pythonloggingconfig.c">
<Filter>Source Files\parsers</Filter>
</ClCompile>
<ClCompile Include="..\parsers\quarto.c">
<Filter>Source Files\parsers</Filter>
</ClCompile>
<ClCompile Include="..\parsers\r-r6class.c">
<Filter>Source Files\parsers</Filter>
</ClCompile>
Expand Down

0 comments on commit 94d623f

Please sign in to comment.