Skip to content

Commit b93e36c

Browse files
Introduce grammar patch from the simplify branch (#4019)
- Add utils authored by @LeaVerou: `isObject()`, `isPlainObject()`, `deepMerge()`, `deepClone()`, `betterAssign()`,`insert()`, and `insertAfter()`. Update `insertBefore()` accordingly. - Update `extend()` to support new special properties (`$insertBefore`, `$insertAfter`, `$insert`, `$delete`, and `$merge`) that allow defining grammars more declaratively and adding a new `grammarPatch()` util (all authored by @LeaVerou). See #3927 for details. - Adjust TS types and config to support the new API. - Fix TS errors. No breaking changes are introduced.
1 parent b67b4e0 commit b93e36c

File tree

16 files changed

+390
-147
lines changed

16 files changed

+390
-147
lines changed

src/core/registry.js

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { kebabToCamelCase } from '../shared/util.js';
2-
import { cloneGrammar } from '../util/extend.js';
2+
import { extend } from '../util/extend.js';
3+
import { grammarPatch } from '../util/grammar-patch.js';
34
import { forEach, toArray } from '../util/iterables.js';
4-
import { extend } from '../util/language-util.js';
5-
import { defineLazyProperty } from '../util/objects.js';
5+
import { deepClone, defineLazyProperty } from '../util/objects.js';
66

77
/**
88
* TODO: docs
@@ -221,7 +221,7 @@ export class Registry {
221221

222222
const base = entry?.proto.base;
223223
// We need this so that any code modifying the base grammar doesn't affect other instances
224-
const baseGrammar = base && cloneGrammar(required(base.id), base.id);
224+
const baseGrammar = base && deepClone(required(base.id));
225225

226226
const requiredLanguages = toArray(
227227
/** @type {LanguageProto | LanguageProto[] | undefined} */ (entry?.proto.require)
@@ -240,7 +240,7 @@ export class Registry {
240240
else {
241241
const options = {
242242
getOptionalLanguage: id => this.getLanguage(id),
243-
extend: (id, ref) => extend(required(id), id, ref),
243+
extend: (id, ref) => extend(required(id), ref),
244244
...(baseGrammar && { base: baseGrammar }),
245245
...(requiredLanguages.length && { languages }),
246246
};
@@ -249,10 +249,10 @@ export class Registry {
249249
}
250250

251251
if (baseGrammar) {
252-
evaluatedGrammar = extend(baseGrammar, base.id, evaluatedGrammar);
252+
evaluatedGrammar = extend(baseGrammar, evaluatedGrammar);
253253
}
254254

255-
return (entry.evaluatedGrammar = evaluatedGrammar);
255+
return (entry.evaluatedGrammar = grammarPatch(evaluatedGrammar));
256256
}
257257
}
258258

src/core/tokenize/tokenize.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,21 @@ export function tokenize (text, grammar) {
4242
const tokenList = new LinkedList();
4343
tokenList.addAfter(tokenList.head, text);
4444

45-
_matchGrammar.call(prism, text, tokenList, grammar, tokenList.head, 0);
45+
_matchGrammar.call(
46+
prism,
47+
text,
48+
tokenList,
49+
/** @type {GrammarTokens} */ (grammar),
50+
tokenList.head,
51+
0
52+
);
4653

4754
return tokenList.toArray();
4855
}
4956

5057
/**
5158
* @typedef {import('../../types.d.ts').TokenStream} TokenStream
5259
* @typedef {import('../../types.d.ts').Grammar} Grammar
60+
* @typedef {import('../../types.d.ts').GrammarTokens} GrammarTokens
5361
* @typedef {import('../prism.js').Prism} Prism
5462
*/

src/languages/c.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ export default {
7373
/* OpenCL host API */
7474
const extensions = getOptionalLanguage('opencl-extensions');
7575
if (extensions) {
76-
insertBefore(base, 'keyword', extensions);
76+
insertBefore(base, 'keyword', /** @type {GrammarTokens} */ (extensions));
7777
delete base['type-opencl-host-cpp'];
7878
}
7979

@@ -104,4 +104,5 @@ export default {
104104

105105
/**
106106
* @typedef {import('../types.d.ts').GrammarToken} GrammarToken
107+
* @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens
107108
*/

src/languages/cpp.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,11 @@ export default {
103103
/* OpenCL host API */
104104
const extensions = getOptionalLanguage('opencl-extensions');
105105
if (extensions) {
106-
insertBefore(cpp, 'keyword', extensions);
106+
insertBefore(
107+
cpp,
108+
'keyword',
109+
/** @type {import('../types.d.ts').GrammarTokens} */ (extensions)
110+
);
107111
}
108112

109113
const baseInside = { ...cpp };

src/languages/css.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,11 @@ export default {
8989

9090
const extras = getOptionalLanguage('css-extras');
9191
if (extras) {
92-
insertBefore(css, 'function', extras);
92+
insertBefore(
93+
css,
94+
'function',
95+
/** @type {import('../types.d.ts').GrammarTokens} */ (extras)
96+
);
9397
}
9498

9599
return css;

src/languages/hlsl.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ export default {
1212
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-keywords
1313
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-reserved-words
1414
'class-name': [
15-
...toArray(base['class-name']),
15+
...toArray(
16+
/** @type {import('../types.d.ts').GrammarTokens} */ (base)['class-name']
17+
),
1618
/\b(?:AppendStructuredBuffer|BlendState|Buffer|ByteAddressBuffer|CompileShader|ComputeShader|ConsumeStructuredBuffer|DepthStencilState|DepthStencilView|DomainShader|GeometryShader|Hullshader|InputPatch|LineStream|OutputPatch|PixelShader|PointStream|RWBuffer|RWByteAddressBuffer|RWStructuredBuffer|RWTexture(?:1D|1DArray|2D|2DArray|3D)|RasterizerState|RenderTargetView|SamplerComparisonState|SamplerState|StructuredBuffer|Texture(?:1D|1DArray|2D|2DArray|2DMS|2DMSArray|3D|Cube|CubeArray)|TriangleStream|VertexShader)\b/,
1719
],
1820
'keyword': [

src/languages/php.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ export default {
352352

353353
const extras = getOptionalLanguage('php-extras');
354354
if (extras) {
355-
insertBefore(php, 'variable', extras);
355+
insertBefore(php, 'variable', /** @type {GrammarTokens} */ (extras));
356356
}
357357

358358
const embedded = embeddedIn('markup');
@@ -376,4 +376,5 @@ export default {
376376

377377
/**
378378
* @typedef {import('../types.d.ts').Grammar} Grammar
379+
* @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens
379380
*/

src/languages/typescript.js

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ export default {
88
require: javascript,
99
alias: 'ts',
1010
grammar ({ extend }) {
11-
/** @type {import('../types.d.ts').Grammar} */
12-
const typeInside = {};
11+
const typeInside = /** @type {import('../types.d.ts').Grammar} */ ({});
1312

1413
const typescript = extend('javascript', {
1514
'class-name': {
@@ -24,7 +23,7 @@ export default {
2423
});
2524

2625
typescript.keyword = [
27-
...toArray(typescript.keyword),
26+
...toArray(/** @type {import('../types.d.ts').GrammarTokens} */ (typescript).keyword),
2827

2928
// The keywords TypeScript adds to JavaScript
3029
/\b(?:abstract|declare|is|keyof|out|readonly|require|satisfies)\b/,

src/types.d.ts

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,35 @@ export type GrammarSpecial = {
198198
$tokenize?: (code: string, grammar: Grammar, Prism: Prism) => TokenStream;
199199
};
200200

201-
export type Grammar = GrammarTokens & GrammarSpecial;
201+
/**
202+
* Tokens within $insert
203+
*/
204+
export type InsertableToken = (RegExpLike | GrammarToken | (RegExpLike | GrammarToken)[]) & {
205+
$before?: TokenName | TokenName[];
206+
$after?: TokenName | TokenName[];
207+
};
208+
209+
/**
210+
* A grammar that is defined as its delta from another grammar.
211+
*/
212+
export type GrammarPatch = {
213+
$insert?: Partial<Record<TokenName, InsertableToken>>;
214+
$insertBefore?: Partial<Record<TokenName, GrammarTokens>>;
215+
$insertAfter?: Partial<Record<TokenName, GrammarTokens>>;
216+
$delete?: TokenName[];
217+
$merge?: Partial<
218+
Record<TokenName, Partial<Omit<GrammarToken, 'pattern'>> & { pattern?: RegExpLike }>
219+
>;
220+
};
221+
222+
export interface Grammar extends GrammarSpecial, GrammarPatch {
223+
[token: string]:
224+
| RegExpLike
225+
| GrammarToken
226+
| (RegExpLike | GrammarToken)[]
227+
| GrammarSpecial[keyof GrammarSpecial]
228+
| GrammarPatch[keyof GrammarPatch];
229+
}
202230

203231
export interface PlainObject {
204232
[key: string]: unknown;

src/util/extend.js

Lines changed: 44 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { betterAssign, deepClone } from './objects.js';
2+
13
/**
24
* Creates a deep copy of the language with the given id and appends the given tokens.
35
*
@@ -13,9 +15,8 @@
1315
* Therefore, it is encouraged to order overwriting tokens according to the positions of the overwritten tokens.
1416
* Furthermore, all non-overwriting tokens should be placed after the overwriting ones.
1517
*
16-
* @param {Grammar} grammar The grammar of the language to extend.
17-
* @param {string} id The id of the language to extend.
18-
* @param {Grammar} reDef The new tokens to append.
18+
* @param {Grammar} base The grammar of the language to extend.
19+
* @param {Grammar} grammar The new tokens to append.
1920
* @returns {Grammar} The new language created.
2021
* @example
2122
* Prism.languages['css-with-colors'] = Prism.languages.extend('css', {
@@ -26,120 +27,67 @@
2627
* 'color': /\b(?:red|green|blue)\b/
2728
* });
2829
*/
29-
export function extend (grammar, id, reDef) {
30-
const lang = cloneGrammar(grammar, id);
30+
export function extend (base, grammar) {
31+
const lang = deepClone(base);
32+
33+
for (const key in grammar) {
34+
if (typeof key !== 'string' || key.startsWith('$')) {
35+
// ignore special keys
36+
continue;
37+
}
3138

32-
for (const key in reDef) {
33-
lang[key] = reDef[key];
39+
lang[key] = grammar[key];
3440
}
3541

36-
return lang;
37-
}
42+
if (grammar.$insertBefore) {
43+
lang.$insertBefore = betterAssign(lang.$insertBefore ?? {}, grammar.$insertBefore);
44+
}
3845

39-
/**
40-
* @param {Grammar} grammar
41-
* @param {string} id
42-
* @returns {Grammar}
43-
*/
44-
export function cloneGrammar (grammar, id) {
45-
/** @type {Grammar} */
46-
const result = {};
46+
if (grammar.$insertAfter) {
47+
lang.$insertAfter = betterAssign(lang.$insertAfter ?? {}, grammar.$insertAfter);
48+
}
4749

48-
/** @type {Map<Grammar, Grammar>} */
49-
const visited = new Map();
50+
if (grammar.$insert) {
51+
// Syntactic sugar for $insertBefore/$insertAfter
52+
for (const tokenName in grammar.$insert) {
53+
const def = grammar.$insert[tokenName];
54+
const { $before, $after, ...token } = def;
55+
const relToken = $before || $after;
56+
const all = $before ? '$insertBefore' : '$insertAfter';
57+
lang[all] ??= {};
5058

51-
/**
52-
* @param {GrammarToken | RegExpLike} value
53-
*/
54-
function cloneToken (value) {
55-
if (!value.pattern) {
56-
return value;
57-
}
58-
else {
59-
/** @type {GrammarToken} */
60-
const copy = { pattern: value.pattern };
61-
if (value.lookbehind) {
62-
copy.lookbehind = value.lookbehind;
59+
if (Array.isArray(relToken)) {
60+
// Insert in multiple places
61+
for (const t of relToken) {
62+
lang[all][t][tokenName] = token;
63+
}
6364
}
64-
if (value.greedy) {
65-
copy.greedy = value.greedy;
65+
else if (relToken) {
66+
(lang[all][relToken] ??= {})[tokenName] = token;
6667
}
67-
if (value.alias) {
68-
copy.alias = Array.isArray(value.alias) ? [...value.alias] : value.alias;
68+
else {
69+
lang[tokenName] = token;
6970
}
70-
if (value.inside) {
71-
copy.inside = cloneRef(value.inside);
72-
}
73-
return copy;
7471
}
7572
}
7673

77-
/**
78-
* @param {GrammarTokens['string']} value
79-
*/
80-
function cloneTokens (value) {
81-
if (!value) {
82-
return undefined;
83-
}
84-
else if (Array.isArray(value)) {
85-
return value.map(cloneToken);
74+
if (grammar.$delete) {
75+
if (lang.$delete) {
76+
// base also had $delete
77+
lang.$delete.push(...grammar.$delete);
8678
}
8779
else {
88-
return cloneToken(value);
80+
lang.$delete = [...grammar.$delete];
8981
}
9082
}
9183

92-
/**
93-
* @param {string | Grammar} ref
94-
*/
95-
function cloneRef (ref) {
96-
if (ref === id) {
97-
// self ref
98-
return result;
99-
}
100-
else if (typeof ref === 'string') {
101-
return ref;
102-
}
103-
else {
104-
return clone(ref);
105-
}
84+
if (grammar.$merge) {
85+
lang.$merge = betterAssign(lang.$merge ?? {}, grammar.$merge);
10686
}
10787

108-
/**
109-
* @param {Grammar} value
110-
*/
111-
function clone (value) {
112-
let mapped = visited.get(value);
113-
if (mapped === undefined) {
114-
mapped = value === grammar ? result : {};
115-
visited.set(value, mapped);
116-
117-
// tokens
118-
for (const [key, tokens] of Object.entries(value)) {
119-
mapped[key] = cloneTokens(/** @type {GrammarToken[]} */ (tokens));
120-
}
121-
122-
// rest
123-
const r = value.$rest;
124-
if (r != null) {
125-
mapped.$rest = cloneRef(r);
126-
}
127-
128-
// tokenize
129-
const t = value.$tokenize;
130-
if (t) {
131-
mapped.$tokenize = t;
132-
}
133-
}
134-
return mapped;
135-
}
136-
137-
return clone(grammar);
88+
return lang;
13889
}
13990

14091
/**
14192
* @typedef {import('../types.d.ts').Grammar} Grammar
142-
* @typedef {import('../types.d.ts').GrammarToken} GrammarToken
143-
* @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens
144-
* @typedef {import('../types.d.ts').RegExpLike} RegExpLike
14593
*/

0 commit comments

Comments
 (0)