Skip to content

Commit 1f88deb

Browse files
authored
fix: Move all regexps to rules (#3519)
1 parent 58d66e5 commit 1f88deb

File tree

5 files changed

+148
-97
lines changed

5 files changed

+148
-97
lines changed

src/Lexer.ts

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { _Tokenizer } from './Tokenizer.ts';
22
import { _defaults } from './defaults.ts';
3-
import { block, inline } from './rules.ts';
3+
import { other, block, inline } from './rules.ts';
44
import type { Token, TokensList, Tokens } from './Tokens.ts';
55
import type { MarkedOptions, TokenizerExtension } from './MarkedOptions.ts';
66

@@ -36,6 +36,7 @@ export class _Lexer {
3636
};
3737

3838
const rules = {
39+
other,
3940
block: block.normal,
4041
inline: inline.normal,
4142
};
@@ -85,7 +86,7 @@ export class _Lexer {
8586
*/
8687
lex(src: string) {
8788
src = src
88-
.replace(/\r\n|\r/g, '\n');
89+
.replace(other.carriageReturn, '\n');
8990

9091
this.blockTokens(src, this.tokens);
9192

@@ -105,7 +106,7 @@ export class _Lexer {
105106
blockTokens(src: string, tokens?: TokensList, lastParagraphClipped?: boolean): TokensList;
106107
blockTokens(src: string, tokens: Token[] = [], lastParagraphClipped = false) {
107108
if (this.options.pedantic) {
108-
src = src.replace(/\t/g, ' ').replace(/^ +$/gm, '');
109+
src = src.replace(other.tabCharGlobal, ' ').replace(other.spaceLine, '');
109110
}
110111

111112
let token: Tokens.Generic | undefined;

src/Renderer.ts

+3-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
cleanUrl,
44
escape,
55
} from './helpers.ts';
6+
import { other } from './rules.ts';
67
import type { MarkedOptions } from './MarkedOptions.ts';
78
import type { Tokens } from './Tokens.ts';
89
import type { _Parser } from './Parser.ts';
@@ -22,9 +23,9 @@ export class _Renderer {
2223
}
2324

2425
code({ text, lang, escaped }: Tokens.Code): string {
25-
const langString = (lang || '').match(/^\S*/)?.[0];
26+
const langString = (lang || '').match(other.notSpaceStart)?.[0];
2627

27-
const code = text.replace(/\n$/, '') + '\n';
28+
const code = text.replace(other.endingNewline, '') + '\n';
2829

2930
if (!langString) {
3031
return '<pre><code>'

src/Tokenizer.ts

+53-53
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ import type { _Lexer } from './Lexer.ts';
99
import type { Links, Tokens, Token } from './Tokens.ts';
1010
import type { MarkedOptions } from './MarkedOptions.ts';
1111

12-
function outputLink(cap: string[], link: Pick<Tokens.Link, 'href' | 'title'>, raw: string, lexer: _Lexer): Tokens.Link | Tokens.Image {
12+
function outputLink(cap: string[], link: Pick<Tokens.Link, 'href' | 'title'>, raw: string, lexer: _Lexer, rules: Rules): Tokens.Link | Tokens.Image {
1313
const href = link.href;
1414
const title = link.title || null;
15-
const text = cap[1].replace(/\\([\[\]])/g, '$1');
15+
const text = cap[1].replace(rules.other.outputLinkReplace, '$1');
1616

1717
if (cap[0].charAt(0) !== '!') {
1818
lexer.state.inLink = true;
@@ -36,8 +36,8 @@ function outputLink(cap: string[], link: Pick<Tokens.Link, 'href' | 'title'>, ra
3636
};
3737
}
3838

39-
function indentCodeCompensation(raw: string, text: string) {
40-
const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
39+
function indentCodeCompensation(raw: string, text: string, rules: Rules) {
40+
const matchIndentToCode = raw.match(rules.other.indentCodeCompensation);
4141

4242
if (matchIndentToCode === null) {
4343
return text;
@@ -48,7 +48,7 @@ function indentCodeCompensation(raw: string, text: string) {
4848
return text
4949
.split('\n')
5050
.map(node => {
51-
const matchIndentInNode = node.match(/^\s+/);
51+
const matchIndentInNode = node.match(rules.other.beginningSpace);
5252
if (matchIndentInNode === null) {
5353
return node;
5454
}
@@ -89,7 +89,7 @@ export class _Tokenizer {
8989
code(src: string): Tokens.Code | undefined {
9090
const cap = this.rules.block.code.exec(src);
9191
if (cap) {
92-
const text = cap[0].replace(/^(?: {1,4}| {0,3}\t)/gm, '');
92+
const text = cap[0].replace(this.rules.other.codeRemoveIndent, '');
9393
return {
9494
type: 'code',
9595
raw: cap[0],
@@ -105,7 +105,7 @@ export class _Tokenizer {
105105
const cap = this.rules.block.fences.exec(src);
106106
if (cap) {
107107
const raw = cap[0];
108-
const text = indentCodeCompensation(raw, cap[3] || '');
108+
const text = indentCodeCompensation(raw, cap[3] || '', this.rules);
109109

110110
return {
111111
type: 'code',
@@ -122,11 +122,11 @@ export class _Tokenizer {
122122
let text = cap[2].trim();
123123

124124
// remove trailing #s
125-
if (/#$/.test(text)) {
125+
if (this.rules.other.endingHash.test(text)) {
126126
const trimmed = rtrim(text, '#');
127127
if (this.options.pedantic) {
128128
text = trimmed.trim();
129-
} else if (!trimmed || / $/.test(trimmed)) {
129+
} else if (!trimmed || this.rules.other.endingSpaceChar.test(trimmed)) {
130130
// CommonMark requires space before trailing #s
131131
text = trimmed.trim();
132132
}
@@ -167,7 +167,7 @@ export class _Tokenizer {
167167
let i;
168168
for (i = 0; i < lines.length; i++) {
169169
// get lines up to a continuation
170-
if (/^ {0,3}>/.test(lines[i])) {
170+
if (this.rules.other.blockquoteStart.test(lines[i])) {
171171
currentLines.push(lines[i]);
172172
inBlockquote = true;
173173
} else if (!inBlockquote) {
@@ -181,8 +181,8 @@ export class _Tokenizer {
181181
const currentRaw = currentLines.join('\n');
182182
const currentText = currentRaw
183183
// precede setext continuation with 4 spaces so it isn't a setext
184-
.replace(/\n {0,3}((?:=+|-+) *)(?=\n|$)/g, '\n $1')
185-
.replace(/^ {0,3}>[ \t]?/gm, '');
184+
.replace(this.rules.other.blockquoteSetextReplace, '\n $1')
185+
.replace(this.rules.other.blockquoteSetextReplace2, '');
186186
raw = raw ? `${raw}\n${currentRaw}` : currentRaw;
187187
text = text ? `${text}\n${currentText}` : currentText;
188188

@@ -258,7 +258,7 @@ export class _Tokenizer {
258258
}
259259

260260
// Get next list item
261-
const itemRegex = new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`);
261+
const itemRegex = this.rules.other.listItemRegex(bull);
262262
let endsWithBlankLine = false;
263263
// Check if current bullet point can start a new List Item
264264
while (src) {
@@ -276,7 +276,7 @@ export class _Tokenizer {
276276
raw = cap[0];
277277
src = src.substring(raw.length);
278278

279-
let line = cap[2].split('\n', 1)[0].replace(/^\t+/, (t: string) => ' '.repeat(3 * t.length));
279+
let line = cap[2].split('\n', 1)[0].replace(this.rules.other.listReplaceTabs, (t: string) => ' '.repeat(3 * t.length));
280280
let nextLine = src.split('\n', 1)[0];
281281
let blankLine = !line.trim();
282282

@@ -287,24 +287,24 @@ export class _Tokenizer {
287287
} else if (blankLine) {
288288
indent = cap[1].length + 1;
289289
} else {
290-
indent = cap[2].search(/[^ ]/); // Find first non-space char
290+
indent = cap[2].search(this.rules.other.nonSpaceChar); // Find first non-space char
291291
indent = indent > 4 ? 1 : indent; // Treat indented code blocks (> 4 spaces) as having only 1 indent
292292
itemContents = line.slice(indent);
293293
indent += cap[1].length;
294294
}
295295

296-
if (blankLine && /^[ \t]*$/.test(nextLine)) { // Items begin with at most one blank line
296+
if (blankLine && this.rules.other.blankLine.test(nextLine)) { // Items begin with at most one blank line
297297
raw += nextLine + '\n';
298298
src = src.substring(nextLine.length + 1);
299299
endEarly = true;
300300
}
301301

302302
if (!endEarly) {
303-
const nextBulletRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`);
304-
const hrRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`);
305-
const fencesBeginRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:\`\`\`|~~~)`);
306-
const headingBeginRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}#`);
307-
const htmlBeginRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}<(?:[a-z].*>|!--)`, 'i');
303+
const nextBulletRegex = this.rules.other.nextBulletRegex(indent);
304+
const hrRegex = this.rules.other.hrRegex(indent);
305+
const fencesBeginRegex = this.rules.other.fencesBeginRegex(indent);
306+
const headingBeginRegex = this.rules.other.headingBeginRegex(indent);
307+
const htmlBeginRegex = this.rules.other.htmlBeginRegex(indent);
308308

309309
// Check if following lines should be included in List Item
310310
while (src) {
@@ -314,10 +314,10 @@ export class _Tokenizer {
314314

315315
// Re-align to follow commonmark nesting rules
316316
if (this.options.pedantic) {
317-
nextLine = nextLine.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' ');
317+
nextLine = nextLine.replace(this.rules.other.listReplaceNesting, ' ');
318318
nextLineWithoutTabs = nextLine;
319319
} else {
320-
nextLineWithoutTabs = nextLine.replace(/\t/g, ' ');
320+
nextLineWithoutTabs = nextLine.replace(this.rules.other.tabCharGlobal, ' ');
321321
}
322322

323323
// End list item if found code fences
@@ -345,7 +345,7 @@ export class _Tokenizer {
345345
break;
346346
}
347347

348-
if (nextLineWithoutTabs.search(/[^ ]/) >= indent || !nextLine.trim()) { // Dedent if possible
348+
if (nextLineWithoutTabs.search(this.rules.other.nonSpaceChar) >= indent || !nextLine.trim()) { // Dedent if possible
349349
itemContents += '\n' + nextLineWithoutTabs.slice(indent);
350350
} else {
351351
// not enough indentation
@@ -354,7 +354,7 @@ export class _Tokenizer {
354354
}
355355

356356
// paragraph continuation unless last line was a different block level element
357-
if (line.replace(/\t/g, ' ').search(/[^ ]/) >= 4) { // indented code block
357+
if (line.replace(this.rules.other.tabCharGlobal, ' ').search(this.rules.other.nonSpaceChar) >= 4) { // indented code block
358358
break;
359359
}
360360
if (fencesBeginRegex.test(line)) {
@@ -384,7 +384,7 @@ export class _Tokenizer {
384384
// If the previous item ended with a blank line, the list is loose
385385
if (endsWithBlankLine) {
386386
list.loose = true;
387-
} else if (/\n[ \t]*\n[ \t]*$/.test(raw)) {
387+
} else if (this.rules.other.doubleBlankLine.test(raw)) {
388388
endsWithBlankLine = true;
389389
}
390390
}
@@ -393,10 +393,10 @@ export class _Tokenizer {
393393
let ischecked: boolean | undefined;
394394
// Check for task list items
395395
if (this.options.gfm) {
396-
istask = /^\[[ xX]\] /.exec(itemContents);
396+
istask = this.rules.other.listIsTask.exec(itemContents);
397397
if (istask) {
398398
ischecked = istask[0] !== '[ ] ';
399-
itemContents = itemContents.replace(/^\[[ xX]\] +/, '');
399+
itemContents = itemContents.replace(this.rules.other.listReplaceTask, '');
400400
}
401401
}
402402

@@ -426,7 +426,7 @@ export class _Tokenizer {
426426
if (!list.loose) {
427427
// Check if list should be loose
428428
const spacers = list.items[i].tokens.filter(t => t.type === 'space');
429-
const hasMultipleLineBreaks = spacers.length > 0 && spacers.some(t => /\n.*\n/.test(t.raw));
429+
const hasMultipleLineBreaks = spacers.length > 0 && spacers.some(t => this.rules.other.anyLine.test(t.raw));
430430

431431
list.loose = hasMultipleLineBreaks;
432432
}
@@ -460,8 +460,8 @@ export class _Tokenizer {
460460
def(src: string): Tokens.Def | undefined {
461461
const cap = this.rules.block.def.exec(src);
462462
if (cap) {
463-
const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
464-
const href = cap[2] ? cap[2].replace(/^<(.*)>$/, '$1').replace(this.rules.inline.anyPunctuation, '$1') : '';
463+
const tag = cap[1].toLowerCase().replace(this.rules.other.multipleSpaceGlobal, ' ');
464+
const href = cap[2] ? cap[2].replace(this.rules.other.hrefBrackets, '$1').replace(this.rules.inline.anyPunctuation, '$1') : '';
465465
const title = cap[3] ? cap[3].substring(1, cap[3].length - 1).replace(this.rules.inline.anyPunctuation, '$1') : cap[3];
466466
return {
467467
type: 'def',
@@ -479,14 +479,14 @@ export class _Tokenizer {
479479
return;
480480
}
481481

482-
if (!/[:|]/.test(cap[2])) {
482+
if (!this.rules.other.tableDelimiter.test(cap[2])) {
483483
// delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading
484484
return;
485485
}
486486

487487
const headers = splitCells(cap[1]);
488-
const aligns = cap[2].replace(/^\||\| *$/g, '').split('|');
489-
const rows = cap[3] && cap[3].trim() ? cap[3].replace(/\n[ \t]*$/, '').split('\n') : [];
488+
const aligns = cap[2].replace(this.rules.other.tableAlignChars, '').split('|');
489+
const rows = cap[3] && cap[3].trim() ? cap[3].replace(this.rules.other.tableRowBlankLine, '').split('\n') : [];
490490

491491
const item: Tokens.Table = {
492492
type: 'table',
@@ -502,11 +502,11 @@ export class _Tokenizer {
502502
}
503503

504504
for (const align of aligns) {
505-
if (/^ *-+: *$/.test(align)) {
505+
if (this.rules.other.tableAlignRight.test(align)) {
506506
item.align.push('right');
507-
} else if (/^ *:-+: *$/.test(align)) {
507+
} else if (this.rules.other.tableAlignCenter.test(align)) {
508508
item.align.push('center');
509-
} else if (/^ *:-+ *$/.test(align)) {
509+
} else if (this.rules.other.tableAlignLeft.test(align)) {
510510
item.align.push('left');
511511
} else {
512512
item.align.push(null);
@@ -590,14 +590,14 @@ export class _Tokenizer {
590590
tag(src: string): Tokens.Tag | undefined {
591591
const cap = this.rules.inline.tag.exec(src);
592592
if (cap) {
593-
if (!this.lexer.state.inLink && /^<a /i.test(cap[0])) {
593+
if (!this.lexer.state.inLink && this.rules.other.startATag.test(cap[0])) {
594594
this.lexer.state.inLink = true;
595-
} else if (this.lexer.state.inLink && /^<\/a>/i.test(cap[0])) {
595+
} else if (this.lexer.state.inLink && this.rules.other.endATag.test(cap[0])) {
596596
this.lexer.state.inLink = false;
597597
}
598-
if (!this.lexer.state.inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
598+
if (!this.lexer.state.inRawBlock && this.rules.other.startPreScriptTag.test(cap[0])) {
599599
this.lexer.state.inRawBlock = true;
600-
} else if (this.lexer.state.inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
600+
} else if (this.lexer.state.inRawBlock && this.rules.other.endPreScriptTag.test(cap[0])) {
601601
this.lexer.state.inRawBlock = false;
602602
}
603603

@@ -616,9 +616,9 @@ export class _Tokenizer {
616616
const cap = this.rules.inline.link.exec(src);
617617
if (cap) {
618618
const trimmedUrl = cap[2].trim();
619-
if (!this.options.pedantic && /^</.test(trimmedUrl)) {
619+
if (!this.options.pedantic && this.rules.other.startAngleBracket.test(trimmedUrl)) {
620620
// commonmark requires matching angle brackets
621-
if (!(/>$/.test(trimmedUrl))) {
621+
if (!(this.rules.other.endAngleBracket.test(trimmedUrl))) {
622622
return;
623623
}
624624

@@ -642,7 +642,7 @@ export class _Tokenizer {
642642
let title = '';
643643
if (this.options.pedantic) {
644644
// split pedantic href and title
645-
const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
645+
const link = this.rules.other.pedanticHrefTitle.exec(href);
646646

647647
if (link) {
648648
href = link[1];
@@ -653,8 +653,8 @@ export class _Tokenizer {
653653
}
654654

655655
href = href.trim();
656-
if (/^</.test(href)) {
657-
if (this.options.pedantic && !(/>$/.test(trimmedUrl))) {
656+
if (this.rules.other.startAngleBracket.test(href)) {
657+
if (this.options.pedantic && !(this.rules.other.endAngleBracket.test(trimmedUrl))) {
658658
// pedantic allows starting angle bracket without ending angle bracket
659659
href = href.slice(1);
660660
} else {
@@ -664,15 +664,15 @@ export class _Tokenizer {
664664
return outputLink(cap, {
665665
href: href ? href.replace(this.rules.inline.anyPunctuation, '$1') : href,
666666
title: title ? title.replace(this.rules.inline.anyPunctuation, '$1') : title,
667-
}, cap[0], this.lexer);
667+
}, cap[0], this.lexer, this.rules);
668668
}
669669
}
670670

671671
reflink(src: string, links: Links): Tokens.Link | Tokens.Image | Tokens.Text | undefined {
672672
let cap;
673673
if ((cap = this.rules.inline.reflink.exec(src))
674674
|| (cap = this.rules.inline.nolink.exec(src))) {
675-
const linkString = (cap[2] || cap[1]).replace(/\s+/g, ' ');
675+
const linkString = (cap[2] || cap[1]).replace(this.rules.other.multipleSpaceGlobal, ' ');
676676
const link = links[linkString.toLowerCase()];
677677
if (!link) {
678678
const text = cap[0].charAt(0);
@@ -682,7 +682,7 @@ export class _Tokenizer {
682682
text,
683683
};
684684
}
685-
return outputLink(cap, link, cap[0], this.lexer);
685+
return outputLink(cap, link, cap[0], this.lexer, this.rules);
686686
}
687687
}
688688

@@ -691,7 +691,7 @@ export class _Tokenizer {
691691
if (!match) return;
692692

693693
// _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well
694-
if (match[3] && prevChar.match(/[\p{L}\p{N}]/u)) return;
694+
if (match[3] && prevChar.match(this.rules.other.unicodeAlphaNumeric)) return;
695695

696696
const nextChar = match[1] || match[2] || '';
697697

@@ -759,9 +759,9 @@ export class _Tokenizer {
759759
codespan(src: string): Tokens.Codespan | undefined {
760760
const cap = this.rules.inline.code.exec(src);
761761
if (cap) {
762-
let text = cap[2].replace(/\n/g, ' ');
763-
const hasNonSpaceChars = /[^ ]/.test(text);
764-
const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
762+
let text = cap[2].replace(this.rules.other.newLineCharGlobal, ' ');
763+
const hasNonSpaceChars = this.rules.other.nonSpaceChar.test(text);
764+
const hasSpaceCharsOnBothEnds = this.rules.other.startingSpaceChar.test(text) && this.rules.other.endingSpaceChar.test(text);
765765
if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
766766
text = text.substring(1, text.length - 1);
767767
}

0 commit comments

Comments
 (0)