Skip to content

Commit

Permalink
Refactor codespan detection in markdown link provider (microsoft#139770)
Browse files Browse the repository at this point in the history
  • Loading branch information
WaqasAliAbbasi committed Jan 20, 2022
1 parent e5fa447 commit f15ede5
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/

import Token = require('markdown-it/lib/token');
import * as vscode from 'vscode';
import * as nls from 'vscode-nls';
import { OpenDocumentLinkCommand } from '../commands/openDocumentLink';
Expand Down Expand Up @@ -104,78 +103,69 @@ export function stripAngleBrackets(link: string) {
return link.replace(angleBracketLinkRe, '$1');
}

const codeSpanAndLinkPattern = /(?:(?<!`)(`+)(?!`)(?:.+?|.*?(?:(?:\r?\n).+?)*?)(?:\r?\n)?(?<!`)\1(?!`))|(?:(\[((!\[[^\]]*?\]\(\s*)([^\s\(\)]+?)\s*\)\]|(?:\\\]|[^\]])*\])\(\s*)(([^\s\(\)]|\([^\s\(\)]*?\))+)\s*(".*?")?\))/g;
const linkPattern = /(\[((!\[[^\]]*?\]\(\s*)([^\s\(\)]+?)\s*\)\]|(?:\\\]|[^\]])*\])\(\s*)(([^\s\(\)]|\([^\s\(\)]*?\))+)\s*(".*?")?\)/g;
const referenceLinkPattern = /(\[((?:\\\]|[^\]])+)\]\[\s*?)([^\s\]]*?)\]/g;
const definitionPattern = /^([\t ]*\[(?!\^)((?:\\\]|[^\]])+)\]:\s*)([^<]\S*|<[^>]+>)/gm;

/**
* Asserts if number is inside at least one interval in the array of intervals.
*
* @param intervals An array of [a,b) sorted by `a` in ascending order.
*/
const isNumberInIntervals = (intervals: [number, number][], start: number, end: number, target: number): Boolean => {
if (start > end) {
return false;
}
const mid = start + Math.floor((end - start) / 2);
const pair = intervals[mid];
if (target >= pair[0] && target < pair[1]) {
return true;
}
if (target >= pair[1]) {
return isNumberInIntervals(intervals, mid + 1, end, target);
}
return isNumberInIntervals(intervals, start, mid - 1, target);
const inlineCodePattern = /(?:(?<!`)(`+)(?!`)(?:.+?|.*?(?:(?:\r?\n).+?)*?)(?:\r?\n)?(?<!`)\1(?!`))/g;

type CodeInDocument = {
/**
* code blocks and fences each represented by [line_start,line_end).
*/
multiline: [number, number][];
/**
* inline code spans each represented by {@link vscode.Range}.
*/
inline: vscode.Range[];
};

const extractLineIntervalsOfCodeblocksAndFences = (tokens: Token[]): [number, number][] =>
tokens.reduce<[number, number][]>((acc, t) => {
if ((t.type === 'code_block' || t.type === 'fence') && t.map) {
return [...acc, t.map];
}
return acc;
}, []);
async function findCode(document: vscode.TextDocument, engine: MarkdownEngine): Promise<CodeInDocument> {
const tokens = await engine.parse(document);
const multiline = tokens.filter(t => (t.type === 'code_block' || t.type === 'fence') && !!t.map).map(t => t.map) as [number, number][];

export default class LinkProvider implements vscode.DocumentLinkProvider {
const text = document.getText();
const inline = [...text.matchAll(inlineCodePattern)].map(match => {
const start = match.index || 0;
return new vscode.Range(document.positionAt(start), document.positionAt(start + match[0].length));
});

return { multiline, inline };
}

private _codeLineIntervals: [number, number][] = [];
function isLinkInsideCode(code: CodeInDocument, link: vscode.DocumentLink) {
return code.multiline.some(interval => link.range.start.line >= interval[0] && link.range.start.line < interval[1]) ||
code.inline.some(position => position.intersection(link.range));
}

export default class LinkProvider implements vscode.DocumentLinkProvider {
constructor(
private readonly engine: MarkdownEngine
) { }

private isLineInsideIndentedOrFencedCode(line: number): Boolean {
return isNumberInIntervals(this._codeLineIntervals, 0, this._codeLineIntervals.length - 1, line);
}

public async provideDocumentLinks(
document: vscode.TextDocument,
_token: vscode.CancellationToken
): Promise<vscode.DocumentLink[]> {
const text = document.getText();
const tokens = await this.engine.parse(document);
this._codeLineIntervals = extractLineIntervalsOfCodeblocksAndFences(tokens);
return [
...this.providerInlineLinks(text, document),
...(await this.providerInlineLinks(text, document)),
...this.provideReferenceLinks(text, document)
];
}

private providerInlineLinks(
private async providerInlineLinks(
text: string,
document: vscode.TextDocument,
): vscode.DocumentLink[] {
): Promise<vscode.DocumentLink[]> {
const results: vscode.DocumentLink[] = [];
for (const match of text.matchAll(codeSpanAndLinkPattern)) {
if (match[1]) {
continue;
}
const matchImage = match[5] && extractDocumentLink(document, match[4].length + 1, match[5], match.index);
if (matchImage && !this.isLineInsideIndentedOrFencedCode(matchImage.range.start.line)) {
const codeInDocument = await findCode(document, this.engine);
for (const match of text.matchAll(linkPattern)) {
const matchImage = match[4] && extractDocumentLink(document, match[3].length + 1, match[4], match.index);
if (matchImage && !isLinkInsideCode(codeInDocument, matchImage)) {
results.push(matchImage);
}
const matchLink = extractDocumentLink(document, match[2].length, match[6], match.index);
if (matchLink && !this.isLineInsideIndentedOrFencedCode(matchLink.range.start.line)) {
const matchLink = extractDocumentLink(document, match[1].length, match[5], match.index);
if (matchLink && !isLinkInsideCode(codeInDocument, matchLink)) {
results.push(matchLink);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import * as vscode from 'vscode';
import LinkProvider from '../features/documentLinkProvider';
import { createNewMarkdownEngine } from './engine';
import { InMemoryDocument } from './inMemoryDocument';
import { noopToken } from './util';
import { joinLines, noopToken } from './util';


const testFile = vscode.Uri.joinPath(vscode.workspace.workspaceFolders![0].uri, 'x.md');
Expand Down Expand Up @@ -151,28 +151,73 @@ suite('markdown.DocumentLinkProvider', () => {
assertRangeEqual(link2.range, new vscode.Range(1, 6, 1, 8));
});

test('Should not consider links in fenced, indented and inline code', async () => {
const links = await getLinksForFile(['```',
'[ignore](https://1.com)',
test('Should not consider links in code fenced with backticks', async () => {
const text = joinLines(
'```',
'[b](https://example.com)',
'```');
const links = await getLinksForFile(text);
assert.strictEqual(links.length, 0);
});

test('Should not consider links in code fenced with tilda', async () => {
const text = joinLines(
'~~~',
'[ignore](https://2.com)',
'~~~',
' [ignore](https://3.com)',
'[b](https://example.com)',
'~~~');
const links = await getLinksForFile(text);
assert.strictEqual(links.length, 0);
});

test('Should not consider links in indented code', async () => {
const links = await getLinksForFile(' [b](https://example.com)');
assert.strictEqual(links.length, 0);
});

test('Should not consider links in inline code span', async () => {
const links = await getLinksForFile('`[b](https://example.com)`');
assert.strictEqual(links.length, 0);
});

test('Should not consider links with code span inside', async () => {
const links = await getLinksForFile('[li`nk](https://example.com`)');
assert.strictEqual(links.length, 0);
});

test('Should not consider links in multiline inline code span', async () => {
const text = joinLines(
'`` ',
'[ignore](https://4.com) ',
'``',
'[b](https://example.com)',
'``');
const links = await getLinksForFile(text);
assert.strictEqual(links.length, 0);
});

test('Should not consider links in multiline inline code span between between text', async () => {
const text = joinLines(
'[b](https://1.com) `[b](https://2.com)',
'` [b](https://3.com)');
const links = await getLinksForFile(text);
assert.deepStrictEqual(links.map(l => l.target?.authority), ['1.com', '3.com'])
});

test('Should not consider links in multiline inline code span with new line after the first backtick', async () => {
const text = joinLines(
'`',
'[b](https://example.com)`');
const links = await getLinksForFile(text);
assert.strictEqual(links.length, 0);
});

test('Should not miss links in invalid multiline inline code span', async () => {
const text = joinLines(
'`` ',
'',
'[link](https://5.com)',
'[b](https://example.com)',
'',
'``',
'`[ignore](https://6.com)`',
'[link](https://7.com) `[b](https://8.com)',
'` [link](https://9.com)',
'`',
'[ignore](https://10.com)`'].join('\n'));
assert.deepStrictEqual(links.map(l => l.target?.authority), ['5.com', '7.com', '9.com']);
'``');
const links = await getLinksForFile(text);
assert.strictEqual(links.length, 1);
});
});

Expand Down

0 comments on commit f15ede5

Please sign in to comment.