From 994b2e6127850bfa85547a202048f29b6fc26913 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Tue, 30 Aug 2022 09:36:16 -0500 Subject: [PATCH] feat: add async option (#2474) * fix: return values from walkTokens * docs: add async docs * test: add async test * docs: add nav to async * Update docs/USING_PRO.md Co-authored-by: Steven * test: expect promise * Update docs/USING_ADVANCED.md Co-authored-by: Steven Co-authored-by: Steven --- docs/USING_ADVANCED.md | 1 + docs/USING_PRO.md | 72 ++++++++++++++ docs/_document.html | 1 + src/Tokenizer.js | 51 +++++----- src/defaults.js | 1 + src/marked.js | 45 ++++++--- test/bench.js | 196 ++++++++++++++------------------------- test/unit/marked-spec.js | 19 ++++ 8 files changed, 215 insertions(+), 171 deletions(-) diff --git a/docs/USING_ADVANCED.md b/docs/USING_ADVANCED.md index e2adba9d43..f49d320b4d 100644 --- a/docs/USING_ADVANCED.md +++ b/docs/USING_ADVANCED.md @@ -44,6 +44,7 @@ console.log(marked.parse(markdownString)); |Member |Type |Default |Since |Notes | |:-----------|:---------|:--------|:--------|:-------------| +|async |`boolean` |`false` |4.1.0 |If true, `walkTokens` functions can be async and `marked.parse` will return a promise that resolves when all walk tokens functions resolve.| |baseUrl |`string` |`null` |0.3.9 |A prefix url for any relative link. | |breaks |`boolean` |`false` |v0.2.7 |If true, add `
` on a single line break (copies GitHub behavior on comments, but not on rendered markdown files). Requires `gfm` be `true`.| |gfm |`boolean` |`true` |v0.2.1 |If true, use approved [GitHub Flavored Markdown (GFM) specification](https://github.github.com/gfm/).| diff --git a/docs/USING_PRO.md b/docs/USING_PRO.md index c621f505fd..e162bdf437 100644 --- a/docs/USING_PRO.md +++ b/docs/USING_PRO.md @@ -438,6 +438,78 @@ console.log(marked.parse('A Description List:\n' *** +

Async Marked : async

+ +Marked will return a promise if the `async` option is true. The `async` option will tell marked to await any `walkTokens` functions before parsing the tokens and returning an HTML string. + +Simple Example: + +```js +const walkTokens = async (token) => { + if (token.type === 'link') { + try { + await fetch(token.href); + } catch (ex) { + token.title = 'invalid'; + } + } +}; + +marked.use({ walkTokens, async: true }); + +const markdown = ` +[valid link](https://example.com) + +[invalid link](https://invalidurl.com) +`; + +const html = await marked.parse(markdown); +``` + +Custom Extension Example: + +```js +const importUrl = { + extensions: [{ + name: 'importUrl', + level: 'block', + start(src) { return src.indexOf('\n:'); }, + tokenizer(src) { + const rule = /^:(https?:\/\/.+?):/; + const match = rule.exec(src); + if (match) { + return { + type: 'importUrl', + raw: match[0], + url: match[1], + html: '' // will be replaced in walkTokens + }; + } + }, + renderer(token) { + return token.html; + } + }], + async: true, // needed to tell marked to return a promise + async walkTokens(token) { + if (token.type === 'importUrl') { + const res = await fetch(token.url); + token.html = await res.text(); + } + } +}; + +marked.use(importUrl); + +const markdown = ` +# example.com + +:https://example.com: +`; + +const html = await marked.parse(markdown); +``` +

The Lexer

The lexer takes a markdown string and calls the tokenizer functions. diff --git a/docs/_document.html b/docs/_document.html index 3527ebc1e9..c3dea889cc 100644 --- a/docs/_document.html +++ b/docs/_document.html @@ -51,6 +51,7 @@

Marked Documentation

  • Tokenizer
  • Walk Tokens
  • Custom Extensions
  • +
  • Async Marked
  • Lexer
  • Parser
  • diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 214669c290..76f26e67fb 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -19,7 +19,7 @@ function outputLink(cap, link, raw, lexer) { href, title, text, - tokens: lexer.inlineTokens(text, []) + tokens: lexer.inlineTokens(text) }; lexer.state.inLink = false; return token; @@ -125,15 +125,13 @@ export class Tokenizer { } } - const token = { + return { type: 'heading', raw: cap[0], depth: cap[1].length, text, - tokens: [] + tokens: this.lexer.inline(text) }; - this.lexer.inline(token.text, token.tokens); - return token; } } @@ -355,10 +353,10 @@ export class Tokenizer { text: cap[0] }; if (this.options.sanitize) { + const text = this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]); token.type = 'paragraph'; - token.text = this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]); - token.tokens = []; - this.lexer.inline(token.text, token.tokens); + token.text = text; + token.tokens = this.lexer.inline(text); } return token; } @@ -416,8 +414,7 @@ export class Tokenizer { // header child tokens l = item.header.length; for (j = 0; j < l; j++) { - item.header[j].tokens = []; - this.lexer.inline(item.header[j].text, item.header[j].tokens); + item.header[j].tokens = this.lexer.inline(item.header[j].text); } // cell child tokens @@ -425,8 +422,7 @@ export class Tokenizer { for (j = 0; j < l; j++) { row = item.rows[j]; for (k = 0; k < row.length; k++) { - row[k].tokens = []; - this.lexer.inline(row[k].text, row[k].tokens); + row[k].tokens = this.lexer.inline(row[k].text); } } @@ -438,45 +434,40 @@ export class Tokenizer { lheading(src) { const cap = this.rules.block.lheading.exec(src); if (cap) { - const token = { + return { type: 'heading', raw: cap[0], depth: cap[2].charAt(0) === '=' ? 1 : 2, text: cap[1], - tokens: [] + tokens: this.lexer.inline(cap[1]) }; - this.lexer.inline(token.text, token.tokens); - return token; } } paragraph(src) { const cap = this.rules.block.paragraph.exec(src); if (cap) { - const token = { + const text = cap[1].charAt(cap[1].length - 1) === '\n' + ? cap[1].slice(0, -1) + : cap[1]; + return { type: 'paragraph', raw: cap[0], - text: cap[1].charAt(cap[1].length - 1) === '\n' - ? cap[1].slice(0, -1) - : cap[1], - tokens: [] + text, + tokens: this.lexer.inline(text) }; - this.lexer.inline(token.text, token.tokens); - return token; } } text(src) { const cap = this.rules.block.text.exec(src); if (cap) { - const token = { + return { type: 'text', raw: cap[0], text: cap[0], - tokens: [] + tokens: this.lexer.inline(cap[0]) }; - this.lexer.inline(token.text, token.tokens); - return token; } } @@ -645,7 +636,7 @@ export class Tokenizer { type: 'em', raw: src.slice(0, lLength + match.index + rLength + 1), text, - tokens: this.lexer.inlineTokens(text, []) + tokens: this.lexer.inlineTokens(text) }; } @@ -655,7 +646,7 @@ export class Tokenizer { type: 'strong', raw: src.slice(0, lLength + match.index + rLength + 1), text, - tokens: this.lexer.inlineTokens(text, []) + tokens: this.lexer.inlineTokens(text) }; } } @@ -696,7 +687,7 @@ export class Tokenizer { type: 'del', raw: cap[0], text: cap[2], - tokens: this.lexer.inlineTokens(cap[2], []) + tokens: this.lexer.inlineTokens(cap[2]) }; } } diff --git a/src/defaults.js b/src/defaults.js index 3a68802cdf..e295ad2ebc 100644 --- a/src/defaults.js +++ b/src/defaults.js @@ -1,5 +1,6 @@ export function getDefaults() { return { + async: false, baseUrl: null, breaks: false, extensions: null, diff --git a/src/marked.js b/src/marked.js index 10f543336c..feb500397f 100644 --- a/src/marked.js +++ b/src/marked.js @@ -105,13 +105,7 @@ export function marked(src, opt, callback) { return; } - try { - const tokens = Lexer.lex(src, opt); - if (opt.walkTokens) { - marked.walkTokens(tokens, opt.walkTokens); - } - return Parser.parse(tokens, opt); - } catch (e) { + function onError(e) { e.message += '\nPlease report this to https://github.com/markedjs/marked.'; if (opt.silent) { return '

    An error occurred:

    '
    @@ -120,6 +114,23 @@ export function marked(src, opt, callback) {
         }
         throw e;
       }
    +
    +  try {
    +    const tokens = Lexer.lex(src, opt);
    +    if (opt.walkTokens) {
    +      if (opt.async) {
    +        return Promise.all(marked.walkTokens(tokens, opt.walkTokens))
    +          .then(() => {
    +            return Parser.parse(tokens, opt);
    +          })
    +          .catch(onError);
    +      }
    +      marked.walkTokens(tokens, opt.walkTokens);
    +    }
    +    return Parser.parse(tokens, opt);
    +  } catch (e) {
    +    onError(e);
    +  }
     }
     
     /**
    @@ -236,10 +247,12 @@ marked.use = function(...args) {
         if (pack.walkTokens) {
           const walkTokens = marked.defaults.walkTokens;
           opts.walkTokens = function(token) {
    -        pack.walkTokens.call(this, token);
    +        let values = [];
    +        values.push(pack.walkTokens.call(this, token));
             if (walkTokens) {
    -          walkTokens.call(this, token);
    +          values = values.concat(walkTokens.call(this, token));
             }
    +        return values;
           };
         }
     
    @@ -256,35 +269,37 @@ marked.use = function(...args) {
      */
     
     marked.walkTokens = function(tokens, callback) {
    +  let values = [];
       for (const token of tokens) {
    -    callback.call(marked, token);
    +    values = values.concat(callback.call(marked, token));
         switch (token.type) {
           case 'table': {
             for (const cell of token.header) {
    -          marked.walkTokens(cell.tokens, callback);
    +          values = values.concat(marked.walkTokens(cell.tokens, callback));
             }
             for (const row of token.rows) {
               for (const cell of row) {
    -            marked.walkTokens(cell.tokens, callback);
    +            values = values.concat(marked.walkTokens(cell.tokens, callback));
               }
             }
             break;
           }
           case 'list': {
    -        marked.walkTokens(token.items, callback);
    +        values = values.concat(marked.walkTokens(token.items, callback));
             break;
           }
           default: {
             if (marked.defaults.extensions && marked.defaults.extensions.childTokens && marked.defaults.extensions.childTokens[token.type]) { // Walk any extensions
               marked.defaults.extensions.childTokens[token.type].forEach(function(childTokens) {
    -            marked.walkTokens(token[childTokens], callback);
    +            values = values.concat(marked.walkTokens(token[childTokens], callback));
               });
             } else if (token.tokens) {
    -          marked.walkTokens(token.tokens, callback);
    +          values = values.concat(marked.walkTokens(token.tokens, callback));
             }
           }
         }
       }
    +  return values;
     };
     
     /**
    diff --git a/test/bench.js b/test/bench.js
    index 7afd24f0e6..7b3d9e71b2 100644
    --- a/test/bench.js
    +++ b/test/bench.js
    @@ -3,6 +3,7 @@ import { fileURLToPath } from 'url';
     import { isEqual } from './helpers/html-differ.js';
     import { loadFiles } from './helpers/load.js';
     
    +import { marked as cjsMarked } from '../lib/marked.cjs';
     import { marked as esmMarked } from '../lib/marked.esm.js';
     
     const __dirname = dirname(fileURLToPath(import.meta.url));
    @@ -30,9 +31,10 @@ export function load() {
     export async function runBench(options) {
       options = options || {};
       const specs = load();
    +  const tests = {};
     
       // Non-GFM, Non-pedantic
    -  marked.setOptions({
    +  cjsMarked.setOptions({
         gfm: false,
         breaks: false,
         pedantic: false,
    @@ -40,9 +42,9 @@ export async function runBench(options) {
         smartLists: false
       });
       if (options.marked) {
    -    marked.setOptions(options.marked);
    +    cjsMarked.setOptions(options.marked);
       }
    -  await bench('cjs marked', specs, marked.parse);
    +  tests['cjs marked'] = cjsMarked.parse;
     
       esmMarked.setOptions({
         gfm: false,
    @@ -54,113 +56,76 @@ export async function runBench(options) {
       if (options.marked) {
         esmMarked.setOptions(options.marked);
       }
    -  await bench('esm marked', specs, esmMarked.parse);
    +  tests['esm marked'] = esmMarked.parse;
     
    -  // GFM
    -  marked.setOptions({
    -    gfm: true,
    -    breaks: false,
    -    pedantic: false,
    -    sanitize: false,
    -    smartLists: false
    -  });
    -  if (options.marked) {
    -    marked.setOptions(options.marked);
    -  }
    -  await bench('cjs marked (gfm)', specs, marked.parse);
    -
    -  esmMarked.setOptions({
    -    gfm: true,
    -    breaks: false,
    -    pedantic: false,
    -    sanitize: false,
    -    smartLists: false
    -  });
    -  if (options.marked) {
    -    esmMarked.setOptions(options.marked);
    -  }
    -  await bench('esm marked (gfm)', specs, esmMarked.parse);
    -
    -  // Pedantic
    -  marked.setOptions({
    -    gfm: false,
    -    breaks: false,
    -    pedantic: true,
    -    sanitize: false,
    -    smartLists: false
    -  });
    -  if (options.marked) {
    -    marked.setOptions(options.marked);
    -  }
    -  await bench('cjs marked (pedantic)', specs, marked.parse);
    -
    -  esmMarked.setOptions({
    -    gfm: false,
    -    breaks: false,
    -    pedantic: true,
    -    sanitize: false,
    -    smartLists: false
    -  });
    -  if (options.marked) {
    -    esmMarked.setOptions(options.marked);
    -  }
    -  await bench('esm marked (pedantic)', specs, esmMarked.parse);
    +  // esmMarked.setOptions({
    +  //   gfm: true,
    +  //   breaks: false,
    +  //   pedantic: false,
    +  //   sanitize: false,
    +  //   smartLists: false
    +  // });
    +  // if (options.marked) {
    +  //   esmMarked.setOptions(options.marked);
    +  // }
    +  // tests['esm marked (gfm)'] = esmMarked.parse;
     
       try {
    -    await bench('commonmark', specs, (await (async() => {
    +    tests.commonmark = (await (async() => {
           const { Parser, HtmlRenderer } = await import('commonmark');
           const parser = new Parser();
           const writer = new HtmlRenderer();
           return function(text) {
             return writer.render(parser.parse(text));
           };
    -    })()));
    +    })());
       } catch (e) {
         console.error('Could not bench commonmark. (Error: %s)', e.message);
       }
     
       try {
    -    await bench('markdown-it', specs, (await (async() => {
    +    tests['markdown-it'] = (await (async() => {
           const MarkdownIt = (await import('markdown-it')).default;
           const md = new MarkdownIt();
           return md.render.bind(md);
    -    })()));
    +    })());
       } catch (e) {
         console.error('Could not bench markdown-it. (Error: %s)', e.message);
       }
    +
    +  await bench(tests, specs);
     }
     
    -export async function bench(name, specs, engine) {
    -  const before = process.hrtime();
    -  for (let i = 0; i < 1e3; i++) {
    -    for (const spec of specs) {
    -      await engine(spec.markdown);
    +export async function bench(tests, specs) {
    +  const stats = {};
    +  for (const name in tests) {
    +    stats[name] = {
    +      elapsed: 0n,
    +      correct: 0
    +    };
    +  }
    +
    +  console.log();
    +  for (let i = 0; i < specs.length; i++) {
    +    const spec = specs[i];
    +    process.stdout.write(`${(i * 100 / specs.length).toFixed(1).padStart(5)}% ${i.toString().padStart(specs.length.toString().length)} of ${specs.length}\r`);
    +    for (const name in tests) {
    +      const test = tests[name];
    +      const before = process.hrtime.bigint();
    +      for (let n = 0; n < 1e3; n++) {
    +        await test(spec.markdown);
    +      }
    +      const after = process.hrtime.bigint();
    +      stats[name].elapsed += after - before;
    +      stats[name].correct += (await isEqual(spec.html, await test(spec.markdown)) ? 1 : 0);
         }
       }
    -  const elapsed = process.hrtime(before);
    -  const ms = prettyElapsedTime(elapsed).toFixed();
     
    -  let correct = 0;
    -  for (const spec of specs) {
    -    if (await isEqual(spec.html, await engine(spec.markdown))) {
    -      correct++;
    -    }
    +  for (const name in tests) {
    +    const ms = prettyElapsedTime(stats[name].elapsed);
    +    const percent = (stats[name].correct / specs.length * 100).toFixed(2);
    +    console.log(`${name} completed in ${ms}ms and passed ${percent}%`);
       }
    -  const percent = (correct / specs.length * 100).toFixed(2);
    -
    -  console.log('%s completed in %sms and passed %s%', name, ms, percent);
    -}
    -
    -/**
    - * A simple one-time benchmark
    - */
    -export async function time(options) {
    -  options = options || {};
    -  const specs = load();
    -  if (options.marked) {
    -    marked.setOptions(options.marked);
    -  }
    -  await bench('marked', specs, marked);
     }
     
     /**
    @@ -204,35 +169,23 @@ function parseArg(argv) {
     
       while (argv.length) {
         const arg = getarg();
    -    switch (arg) {
    -      case '-t':
    -      case '--time':
    -        options.time = true;
    -        break;
    -      case '-m':
    -      case '--minified':
    -        options.minified = true;
    -        break;
    -      default:
    -        if (arg.indexOf('--') === 0) {
    -          const opt = camelize(arg.replace(/^--(no-)?/, ''));
    -          if (!defaults.hasOwnProperty(opt)) {
    -            continue;
    -          }
    -          options.marked = options.marked || {};
    -          if (arg.indexOf('--no-') === 0) {
    -            options.marked[opt] = typeof defaults[opt] !== 'boolean'
    -              ? null
    -              : false;
    -          } else {
    -            options.marked[opt] = typeof defaults[opt] !== 'boolean'
    -              ? argv.shift()
    -              : true;
    -          }
    -        } else {
    -          orphans.push(arg);
    -        }
    -        break;
    +    if (arg.indexOf('--') === 0) {
    +      const opt = camelize(arg.replace(/^--(no-)?/, ''));
    +      if (!defaults.hasOwnProperty(opt)) {
    +        continue;
    +      }
    +      options.marked = options.marked || {};
    +      if (arg.indexOf('--no-') === 0) {
    +        options.marked[opt] = typeof defaults[opt] !== 'boolean'
    +          ? null
    +          : false;
    +      } else {
    +        options.marked[opt] = typeof defaults[opt] !== 'boolean'
    +          ? argv.shift()
    +          : true;
    +      }
    +    } else {
    +      orphans.push(arg);
         }
       }
     
    @@ -257,28 +210,19 @@ function camelize(text) {
      * Main
      */
     export default async function main(argv) {
    -  marked = (await import('../lib/marked.cjs')).marked;
    +  marked = cjsMarked;
     
       const opt = parseArg(argv);
     
    -  if (opt.minified) {
    -    marked = (await import('../marked.min.js')).marked;
    -  }
    -
    -  if (opt.time) {
    -    await time(opt);
    -  } else {
    -    await runBench(opt);
    -  }
    +  await runBench(opt);
     }
     
     /**
      * returns time to millisecond granularity
    + * @param hrtimeElapsed {bigint}
      */
     function prettyElapsedTime(hrtimeElapsed) {
    -  const seconds = hrtimeElapsed[0];
    -  const frac = Math.round(hrtimeElapsed[1] / 1e3) / 1e3;
    -  return seconds * 1e3 + frac;
    +  return Number(hrtimeElapsed / 1_000_000n);
     }
     
     process.title = 'marked bench';
    diff --git a/test/unit/marked-spec.js b/test/unit/marked-spec.js
    index 0ad5ac6fec..d72d0fba72 100644
    --- a/test/unit/marked-spec.js
    +++ b/test/unit/marked-spec.js
    @@ -1058,4 +1058,23 @@ br
         });
         expect(marked('*text*').trim()).toBe('

    text walked

    '); }); + + it('should wait for async `walkTokens` function', async() => { + marked.use({ + async: true, + async walkTokens(token) { + if (token.type === 'em') { + await new Promise((resolve) => { + setTimeout(resolve, 100); + }); + token.text += ' walked'; + token.tokens = this.Lexer.lexInline(token.text); + } + } + }); + const promise = marked('*text*'); + expect(promise).toBeInstanceOf(Promise); + const html = await promise; + expect(html.trim()).toBe('

    text walked

    '); + }); });