Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add mention links extension #171

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions md2html/md2html.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ static const CMDLINE_OPTION cmdline_options[] = {
{ 0, "funderline", '_', 0 },
{ 0, "fverbatim-entities", 'E', 0 },
{ 0, "fwiki-links", 'K', 0 },
{ 0, "fmention-links", 'M', 0 },


{ 0, "fno-html-blocks", 'F', 0 },
{ 0, "fno-html-spans", 'G', 0 },
Expand Down Expand Up @@ -269,6 +271,7 @@ usage(void)
" --ftasklists Enable task lists\n"
" --funderline Enable underline spans\n"
" --fwiki-links Enable wiki links\n"
" --fmention-links Enable mention links\n"
"\n"
"Markdown suppression options:\n"
" --fno-html-blocks\n"
Expand Down Expand Up @@ -335,6 +338,7 @@ cmdline_callback(int opt, char const* value, void* data)
case 'K': parser_flags |= MD_FLAG_WIKILINKS; break;
case 'X': parser_flags |= MD_FLAG_TASKLISTS; break;
case '_': parser_flags |= MD_FLAG_UNDERLINE; break;
case 'M': parser_flags |= MD_FLAG_MENTIONS; break;

default:
fprintf(stderr, "Illegal option: %s\n", value);
Expand Down
4 changes: 4 additions & 0 deletions scripts/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ echo
echo "Underline extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/underline.txt" -p "$PROGRAM --funderline"

echo
echo "Mention links extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/mention-links.txt" -p "$PROGRAM --fmention-links"

echo
echo "Pathological input:"
$PYTHON "$TEST_DIR/pathological_tests.py" -p "$PROGRAM"
9 changes: 9 additions & 0 deletions src/md4c-html.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,13 @@ render_attribute(MD_HTML* r, const MD_ATTRIBUTE* attr,
}
}

static void
render_mention_link(MD_HTML* r, const MD_SPAN_MENTION_DETAIL* det)
{
RENDER_VERBATIM(r, "<x-mention data-target=\"");
render_entity(r, det->text, det->size, render_html_escaped);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should likely call render_verbatim() instead. No entity translation should place here I guess.

RENDER_VERBATIM(r, "\">");
}

static void
render_open_ol_block(MD_HTML* r, const MD_BLOCK_OL_DETAIL* det)
Expand Down Expand Up @@ -466,6 +473,7 @@ enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
case MD_SPAN_LATEXMATH: RENDER_VERBATIM(r, "<x-equation>"); break;
case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "<x-equation type=\"display\">"); break;
case MD_SPAN_WIKILINK: render_open_wikilink_span(r, (MD_SPAN_WIKILINK_DETAIL*) detail); break;
case MD_SPAN_MENTION: render_mention_link(r, (MD_SPAN_MENTION_DETAIL*) detail); break;
}

return 0;
Expand Down Expand Up @@ -495,6 +503,7 @@ leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
case MD_SPAN_LATEXMATH: /*fall through*/
case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "</x-equation>"); break;
case MD_SPAN_WIKILINK: RENDER_VERBATIM(r, "</x-wikilink>"); break;
case MD_SPAN_MENTION: RENDER_VERBATIM(r, "</x-mention>"); break;
}

return 0;
Expand Down
33 changes: 31 additions & 2 deletions src/md4c.c
Original file line number Diff line number Diff line change
Expand Up @@ -2709,7 +2709,7 @@ md_build_mark_char_map(MD_CTX* ctx)
if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS)
ctx->mark_char_map['$'] = 1;

if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS)
if ((ctx->parser.flags & MD_FLAG_MENTIONS) || (ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS))
ctx->mark_char_map['@'] = 1;

if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS)
Expand Down Expand Up @@ -3190,9 +3190,26 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
continue;
}

/* A potential mention link. */
/* A potential permissive e-mail autolink. */
if(ch == _T('@')) {
if(line->beg + 1 <= off && ISALNUM(off-1) &&
if( (ctx->parser.flags & MD_FLAG_MENTIONS) && (line->beg == off || (CH(off-1) == _T(' '))) )
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why only a space? I think any whitespace char would be ok here.

Even maybe some listed (but likely not all) punctuation chars could validly proceed or follow respectivelly. I think things like the following can be common e.g. in a process of any collaborative document writing.

# The Most Important Secret (draft)

The Answer to the Ultimate Question of Life, the Universe, and Everything is 41.
(@zaphod_beeblebrox: Please verify whether it shouldn't be 42)

Or

@alice, @bob and @charlie are sending some packets to @daniel.

{
OFF index = off + 1;
if (index == line->end || CH(index) == ' ') {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

off++;
continue;
}
while (index <= line->end)
{
if (!(ISALNUM(index) || (CH(index) == '_')))
break;
index++;
}
PUSH_MARK('@', off, index, MD_MARK_RESOLVED);
off = index;
}
else if(line->beg + 1 <= off && ISALNUM(off-1) &&
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This condition now should likely explicitly check whether are enabled MD_FLAG_PERMISSIVEEMAILAUTOLINKS. That was not needed previously because @ was added into the mark_char_map[] in md_build_mark_char_map() if and only if that extension was enabled.

off + 3 < line->end && ISALNUM(off+1))
{
PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
Expand Down Expand Up @@ -4291,6 +4308,7 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
MD_FALLTHROUGH();

case '@': /* Permissive e-mail autolink. */
/* Mention link */
case ':': /* Permissive URL autolink. */
case '.': /* Permissive WWW autolink. */
{
Expand All @@ -4299,6 +4317,17 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
const CHAR* dest = STR(opener->end);
SZ dest_size = closer->beg - opener->end;

MD_SPAN_MENTION_DETAIL det;
if (CH(mark->beg) == '@')
{
det.text = (char *) ctx->text + mark->beg + 1;
det.size = mark->end - mark->beg - 1;
MD_ENTER_SPAN(MD_SPAN_MENTION, &det);
MD_TEXT(text_type, STR(mark->beg), mark->end - mark->beg);
MD_LEAVE_SPAN(MD_SPAN_MENTION, &det);
break;
}

Comment on lines +4320 to +4330
Copy link
Owner

@mity mity Jan 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do you know at this point whether it's a mention link or a permissive e-mail auto-link? Both extensions may be enabled at the same time.

/* For permissive auto-links we do not know closer mark
* position at the time of md_collect_marks(), therefore
* it can be out-of-order in ctx->marks[].
Expand Down
11 changes: 10 additions & 1 deletion src/md4c.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,9 @@ typedef enum MD_SPANTYPE {

/* <u>...</u>
* Note: Recognized only when MD_FLAG_UNDERLINE is enabled. */
MD_SPAN_U
MD_SPAN_U,

MD_SPAN_MENTION
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer renamimg it to MD_SPAN_MENTIONLINK for the sake of consistency e.g. with MD_SPAN_WIKILINK etc.

} MD_SPANTYPE;

/* Text is the actual textual contents of span. */
Expand Down Expand Up @@ -297,6 +299,12 @@ typedef struct MD_SPAN_WIKILINK {
MD_ATTRIBUTE target;
} MD_SPAN_WIKILINK_DETAIL;

/* Detailed info for MD_SPAN_MENTION. */
typedef struct MD_SPAN_MENTION {
unsigned char size;
MD_CHAR* text;
Comment on lines +304 to +305
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand it's supposed to contain only a verbatim text (the username) with no possible nested formatting options, yet I wonder whether rather using MD_ATTRIBUTE target (and initializing it to a single substr_type of type MD_TEXT_NORMAL) would not be better, also for the sake of consistency with other detail structures.

} MD_SPAN_MENTION_DETAIL;

/* Flags specifying extensions/deviations from CommonMark specification.
*
* By default (when MD_PARSER::flags == 0), we follow CommonMark specification.
Expand All @@ -316,6 +324,7 @@ typedef struct MD_SPAN_WIKILINK {
#define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */
#define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */
#define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */
#define MD_FLAG_MENTIONS 0x8000 /* Enable mention links extension. */
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly please rename to MD_FLAG_MENTIONLINKS.


#define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
#define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
Expand Down
29 changes: 29 additions & 0 deletions test/mention-links.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Mention links

With the flag `MD_FLAG_MENTIONS`, MD4C enables extension for recognition of mention (@) style links.

A link ends when a non-alphanumeric character is hit. The only such character allowed is an underscore.

```````````````````````````````` example
someone said @me said such and such.
.
<p>someone said <x-mention data-target="me">@me</x-mention> said such and such.</p>
````````````````````````````````

```````````````````````````````` example
@me said such and such.
.
<p><x-mention data-target="me">@me</x-mention> said such and such.</p>
````````````````````````````````

```````````````````````````````` example
An empty at character is not recognized as a mention:

@

@
.
<p>An empty at character is not recognized as a mention:</p>
<p>@</p>
<p>@</p>
````````````````````````````````