Skip to content

Commit edc5392

Browse files
hzyCopilot
andauthored
feat: postprocess llms.txt with AGENTS.md prefixed (#512)
* feat: postprocess `llms.txt` with `AGENTS.md` prefixed Change-Id: I17e16b776f4f330ed4c1579ba66a985bc4985d1f * Apply suggestions from code review Co-authored-by: Copilot <[email protected]> Signed-off-by: Zhiyuan Hong <[email protected]> --------- Signed-off-by: Zhiyuan Hong <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent 60f3a7a commit edc5392

File tree

10 files changed

+728
-0
lines changed

10 files changed

+728
-0
lines changed

docs/public/AGENTS.md

Lines changed: 225 additions & 0 deletions
Large diffs are not rendered by default.

docs/public/zh/AGENTS.md

Lines changed: 225 additions & 0 deletions
Large diffs are not rendered by default.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
"devDependencies": {
5656
"react-render-to-markdown": "0.2.2",
5757
"@lynx-js/lynx-compat-data": "workspace:*",
58+
"@lynx-js/rspress-plugin-llms-postprocess": "workspace:*",
5859
"@lynx-js/react": "^0.107.1",
5960
"@lynx-js/testing-environment": "npm:@lynx-js/[email protected]",
6061
"@mdn/minimalist": "^2.0.4",
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"name": "@lynx-js/rspress-plugin-llms-postprocess",
3+
"version": "0.1.0",
4+
"type": "module",
5+
"module": "./src/index.ts",
6+
"exports": {
7+
".": {
8+
"types": "./src/index.ts",
9+
"import": "./src/index.ts",
10+
"default": "./src/index.ts"
11+
}
12+
},
13+
"dependencies": {
14+
"mdast-util-from-markdown": "2.0.2",
15+
"mdast-util-to-markdown": "2.1.2"
16+
},
17+
"devDependencies": {
18+
"@types/mdast": "4.0.4"
19+
}
20+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import { postprocessLLMs } from '../src/postprocess.ts';
2+
import fs from 'node:fs/promises';
3+
import path from 'node:path';
4+
5+
const __dirname = path.dirname(new URL(import.meta.url).pathname);
6+
7+
async function main() {
8+
const roots = [
9+
path.resolve(__dirname, '../../../doc_build'),
10+
path.resolve(__dirname, '../../../doc_build/zh'),
11+
];
12+
13+
for (const root of roots) {
14+
const llmsTxt = await fs.readFile(path.join(root, 'llms.txt'), 'utf-8');
15+
const agentsMD = await fs.readFile(path.join(root, 'AGENTS.md'), 'utf-8');
16+
17+
// do backup
18+
await fs.copyFile(
19+
path.join(root, 'llms.txt'),
20+
path.join(root, 'llms.txt.bak'),
21+
);
22+
23+
const result = postprocessLLMs(llmsTxt, agentsMD);
24+
25+
for (const [filePath, content] of Object.entries(result)) {
26+
const pp = path.join(root, filePath);
27+
console.log('Overwriting:', pp);
28+
await fs.writeFile(pp + '.2', content);
29+
}
30+
}
31+
}
32+
33+
main();
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import type { RspressPlugin } from '@rspress/core';
2+
import path from 'node:path';
3+
import fs from 'node:fs/promises';
4+
5+
import { postprocessLLMs } from './postprocess';
6+
7+
function pluginLLMsPostprocess(): RspressPlugin {
8+
return {
9+
name: 'rspress-plugin-llms-postprocess',
10+
async afterBuild({ llms, ...config }) {
11+
if (llms) {
12+
const { outDir = 'doc_build' } = config;
13+
const cwd = process.cwd();
14+
const roots = [path.join(cwd, outDir), path.join(cwd, outDir, 'zh')];
15+
16+
for (const root of roots) {
17+
const llmsTxt = await fs.readFile(
18+
path.join(root, 'llms.txt'),
19+
'utf-8',
20+
);
21+
const agentsMD = await fs.readFile(
22+
path.join(root, 'AGENTS.md'),
23+
'utf-8',
24+
);
25+
26+
const result = postprocessLLMs(llmsTxt, agentsMD);
27+
28+
for (const [filePath, content] of Object.entries(result)) {
29+
const pp = path.join(root, filePath);
30+
await fs.writeFile(pp, content);
31+
}
32+
}
33+
}
34+
},
35+
};
36+
}
37+
38+
export { pluginLLMsPostprocess };
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import { fromMarkdown } from 'mdast-util-from-markdown';
2+
import { toMarkdown } from 'mdast-util-to-markdown';
3+
import type {
4+
Parent,
5+
Node,
6+
Heading,
7+
RootContent,
8+
Root,
9+
RootContentMap,
10+
} from 'mdast';
11+
12+
interface H2Segment {
13+
title: string;
14+
heading: Heading;
15+
content: RootContent[];
16+
}
17+
18+
function segmentByH2(root: Parent): Array<H2Segment> {
19+
const segments = [];
20+
let currentSegment: RootContent[] | undefined = undefined;
21+
22+
for (const node of root.children) {
23+
if (node.type === 'heading' && node.depth === 2) {
24+
if (currentSegment && currentSegment.length > 0) {
25+
segments.push(currentSegment);
26+
}
27+
currentSegment = [node];
28+
} else {
29+
if (currentSegment) {
30+
currentSegment.push(node);
31+
}
32+
}
33+
}
34+
35+
if (currentSegment && currentSegment.length > 0) {
36+
segments.push(currentSegment);
37+
}
38+
39+
return segments.map((segment) => {
40+
const [heading, ...rest] = segment as [Heading, ...RootContent[]];
41+
const title = toMarkdown({
42+
type: 'root',
43+
children: heading.children,
44+
}).trim();
45+
46+
return {
47+
title,
48+
heading, // need this to generate markdown back later
49+
content: rest,
50+
};
51+
});
52+
}
53+
54+
function forEachType<T extends keyof RootContentMap>(
55+
type: T,
56+
node: Node,
57+
cb: (nodeWithType: RootContentMap[T], parent?: Parent) => void,
58+
parent?: Parent,
59+
) {
60+
if (node.type === type) {
61+
cb(node as RootContentMap[T], parent);
62+
} else if ('children' in node && Array.isArray(node.children)) {
63+
for (const child of node.children) {
64+
forEachType(type, child, cb, node as Parent);
65+
}
66+
}
67+
}
68+
69+
function postprocessLLMs(
70+
markdown: string,
71+
agentsMD: string,
72+
): Record<string, string> {
73+
const root = fromMarkdown(markdown);
74+
75+
let segments = segmentByH2(root);
76+
77+
const llmsTxtByPath: Record<string, string> = {};
78+
79+
segments = segments.filter((segment) => {
80+
const { title } = segment;
81+
if (title === 'API' || title === 'APIs') {
82+
llmsTxtByPath['/api/llms.txt'] = toMarkdown({
83+
type: 'root',
84+
children: [
85+
...fromMarkdown(`\
86+
# Lynx APIs
87+
88+
Below is a full list of available APIs of Lynx:
89+
`).children,
90+
segment.heading,
91+
...segment.content,
92+
],
93+
});
94+
95+
return false;
96+
}
97+
98+
return true;
99+
});
100+
101+
const allLynxJsLinks: Set<string> = new Set();
102+
forEachType('link', fromMarkdown(agentsMD), (link) => {
103+
allLynxJsLinks.add(link.url);
104+
});
105+
106+
const appendixSectionAst = {
107+
type: 'root',
108+
children: segments.flatMap((segment) => [
109+
{
110+
...segment.heading,
111+
depth: 3,
112+
},
113+
...segment.content,
114+
]),
115+
} satisfies Root;
116+
117+
// filter unwanted links
118+
forEachType('link', appendixSectionAst, (link, parent) => {
119+
link.url = `https://lynxjs.org${link.url}`;
120+
if (
121+
parent &&
122+
(allLynxJsLinks.has(link.url) ||
123+
// This is a trade-off:
124+
// We are hiding `/guide/devtool/*` and `/guide/performance/*` from AI to avoid noise
125+
link.url.match(/\/guide\/devtool/) ||
126+
link.url.match(/\/guide\/performance/))
127+
) {
128+
parent.children = parent.children.filter((child) => child !== link);
129+
}
130+
});
131+
132+
// filter "empty" list item
133+
forEachType('listItem', appendixSectionAst, (listItem, parent) => {
134+
if (parent) {
135+
// Modify the list item as needed
136+
const listItemMD = toMarkdown({
137+
type: 'root',
138+
children: listItem.children,
139+
});
140+
141+
if (listItemMD.trim() === '') {
142+
parent.children = parent.children.filter((child) => child !== listItem);
143+
}
144+
}
145+
});
146+
147+
const appendixSection = `\
148+
---
149+
150+
## 98. Appendix: Links
151+
152+
You may find more information about Lynx and related resources in the links below:
153+
154+
${toMarkdown(appendixSectionAst)}
155+
156+
## 99. Appendix: Lynx APIs
157+
158+
If you need the full list of all APIs of Lynx, please refer to the [Lynx APIs](https://lynxjs.org/next/api/llms.txt).
159+
`;
160+
161+
llmsTxtByPath['/llms.txt'] = agentsMD + appendixSection;
162+
163+
return llmsTxtByPath;
164+
}
165+
166+
export { postprocessLLMs };

pnpm-lock.yaml

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pnpm-workspace.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
packages:
22
- 'packages/*'
3+
- 'plugins/*'

rspress.config.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import {
2020
SHARED_DOC_FILES,
2121
SHARED_SIDEBAR_PATHS,
2222
} from './shared-route-config.js';
23+
import { pluginLLMsPostprocess } from '@lynx-js/rspress-plugin-llms-postprocess';
2324

2425
const PUBLISH_URL = 'https://lynxjs.org/';
2526

@@ -34,6 +35,7 @@ export default defineConfig({
3435
'**/guide/embed-lynx-to-native/*',
3536
],
3637
},
38+
// outDir: 'doc_build',
3739
title: 'Lynx',
3840
description:
3941
'Empower the web community and invite more to build cross-platform apps',
@@ -202,6 +204,7 @@ export default defineConfig({
202204
pluginAlgolia({
203205
verificationContent: '6AD08DFB25B7234D',
204206
}),
207+
pluginLLMsPostprocess(),
205208
],
206209
markdown: {
207210
defaultWrapCode: false,

0 commit comments

Comments
 (0)