Skip to content

Commit 1b2f3d5

Browse files
committed
feat: align MCP corpus rank with MiniSearch; LRU cache for search HTTP
- Add src/corpus-minisearch.ts and wire local-search rankDocuments (SEARCH_RANKER) - search sidecar: response cache keyed by corpus mtime, optional KNOWLEDGE_CACHE_REVISION - Export corpusMaxMtime for cache keys; meta.cache_hit on cache lookup - PRD 1.7 / .env.example Made-with: Cursor
1 parent 39e3e20 commit 1b2f3d5

File tree

9 files changed

+279
-7
lines changed

9 files changed

+279
-7
lines changed

.env.example

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ QWEN_CONTAINER_RUNTIME=docker
2020
QWEN_CONTAINER_CORPUS_PATH=/corpus
2121
QWEN_CONTAINER_MAX_STDOUT_BYTES=524288
2222
QWEN_CONTAINER_MAX_STDERR_BYTES=65536
23+
# Knowledge ranking in MCP (local-search) and search sidecar default: minisearch; use legacy for old heuristic only.
24+
SEARCH_RANKER=minisearch
25+
# search container: LRU cache of full JSON answers (same query+locale+corpus mtime). 0 = disabled.
26+
SEARCH_RESPONSE_CACHE_MAX=64
27+
# Bust cache after deploy without filesystem mtime change (optional string).
28+
KNOWLEDGE_CACHE_REVISION=
2329
# Public URL clients use (scheme + host, no trailing slash). With Caddy on 80/443 use http://your-host or https://your-host
2430
PUBLIC_ORIGIN=https://spawn-dock.w3voice.net
2531
# docker-compose.prod.yml mounts ./data/state → /app/.spawndock

docker/search/http-server.mjs

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,16 @@ import { spawn } from "node:child_process";
88
import { readFileSync, existsSync } from "node:fs";
99
import { fileURLToPath } from "node:url";
1010
import { dirname, join } from "node:path";
11-
import { rankKnowledgeForQuery } from "./knowledge-rank.mjs";
11+
import { corpusMaxMtime, rankKnowledgeForQuery } from "./knowledge-rank.mjs";
1212

1313
const __dirname = dirname(fileURLToPath(import.meta.url));
1414
const PORT = parseInt(process.env.SEARCH_HTTP_PORT || process.env.QWEN_HTTP_PORT || "8790", 10);
1515
const TIMEOUT_MS = parseInt(process.env.QWEN_TIMEOUT_MS || process.env.SEARCH_TIMEOUT_MS || "120000", 10);
1616
const MAX_STDOUT = parseInt(process.env.QWEN_SEARCH_MAX_STDOUT || process.env.SEARCH_MAX_STDOUT || "524288", 10);
1717
const LISTEN = process.env.SEARCH_HTTP_BIND || process.env.QWEN_HTTP_BIND || "0.0.0.0";
1818
const API_TOKEN = (process.env.API_TOKEN || "").trim();
19+
const SEARCH_RESPONSE_CACHE_MAX = parseInt(process.env.SEARCH_RESPONSE_CACHE_MAX || "64", 10);
20+
const KNOWLEDGE_CACHE_REVISION = (process.env.KNOWLEDGE_CACHE_REVISION || "").trim();
1921

2022
const DEFAULT_TIERS = {
2123
free: { requests_per_minute: 1, requests_per_day: 10 },
@@ -104,6 +106,42 @@ function readOpenapiYaml() {
104106
return readFileSync(openapiPath(), "utf8");
105107
}
106108

109+
/** LRU-ish: move key to end on get (Map preserves insertion order). */
110+
const searchResponseCache = new Map();
111+
112+
function normalizeQueryForCache(q) {
113+
return q.trim().toLowerCase().replace(/\s+/g, " ");
114+
}
115+
116+
function searchCacheKey(knowledgeRoot, locale, query) {
117+
const mtime = knowledgeRoot ? corpusMaxMtime(knowledgeRoot) : 0;
118+
const loc = (locale ?? "").trim().toLowerCase();
119+
return `${KNOWLEDGE_CACHE_REVISION}|${mtime}|${loc}|${normalizeQueryForCache(query)}`;
120+
}
121+
122+
function searchCacheGet(key) {
123+
const v = searchResponseCache.get(key);
124+
if (v !== undefined) {
125+
searchResponseCache.delete(key);
126+
searchResponseCache.set(key, v);
127+
}
128+
return v;
129+
}
130+
131+
function searchCacheSet(key, value) {
132+
if (SEARCH_RESPONSE_CACHE_MAX <= 0) {
133+
return;
134+
}
135+
if (searchResponseCache.has(key)) {
136+
searchResponseCache.delete(key);
137+
}
138+
searchResponseCache.set(key, value);
139+
while (searchResponseCache.size > SEARCH_RESPONSE_CACHE_MAX) {
140+
const first = searchResponseCache.keys().next().value;
141+
searchResponseCache.delete(first);
142+
}
143+
}
144+
107145
function clientFacingIp(req) {
108146
const xff = req.headers["x-forwarded-for"];
109147
if (typeof xff === "string" && xff.trim().length > 0) {
@@ -279,6 +317,15 @@ function normalizeSearchBody(rawText) {
279317

280318
async function runSearchQuery(query, locale) {
281319
const knowledgeRoot = resolveKnowledgeRoot();
320+
const cacheKey = searchCacheKey(knowledgeRoot, locale, query);
321+
const cached = searchCacheGet(cacheKey);
322+
if (cached) {
323+
return {
324+
...cached,
325+
meta: { ...cached.meta, cache_hit: true },
326+
};
327+
}
328+
282329
let matches = [];
283330
if (knowledgeRoot) {
284331
try {
@@ -296,7 +343,9 @@ async function runSearchQuery(query, locale) {
296343
}
297344
const meta = {};
298345
if (locale) meta.locale_requested = locale;
299-
return { answer: normalized.answer, sources, meta };
346+
const result = { answer: normalized.answer, sources, meta };
347+
searchCacheSet(cacheKey, result);
348+
return result;
300349
}
301350

302351
function sendJson(res, status, body) {

docker/search/knowledge-rank.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ function walkKnowledgeTree(dir) {
169169
return files;
170170
}
171171

172-
function corpusMaxMtime(rootDir) {
172+
export function corpusMaxMtime(rootDir) {
173173
let max = 0;
174174
try {
175175
for (const p of walkKnowledgeTree(rootDir)) {

docs/PRD-public-knowledge-search-service.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ Caddy по-прежнему монтирует весь продукт под п
170170
|------|-----|----------|
171171
| `answer` | string | Основной текст ответа |
172172
| `sources` | array | **Опционально.** Унифицированный список источников из корпуса (например `{ "path": "guides/foo.md", "title": "..." }`) — структура задаётся в OpenAPI `components.schemas` |
173-
| `meta` | object | **Опционально.** Служебные не секретные поля (например `request_id`) |
173+
| `meta` | object | **Опционально.** Служебные не секретные поля (например `locale_requested`, `cache_hit` при попадании в LRU-кэш **`search`**) |
174174

175175
Нормативные детали полей **`sources`** / **`meta`** — только в OpenAPI; клиент опирается на спецификацию, без знания внутренней реализации.
176176

@@ -274,6 +274,8 @@ rate_limit_tiers:
274274
| Учёт **`locale`** в промпте | **Done** | Явные инструкции `ru` / `en` / авто по языку запроса. |
275275
| Поле **`sources`** | **Done** | Из JSON ответа модели; если пусто — fallback из ранжированных источников. |
276276
| Диагностика сбоев Qwen (**stdout/stderr** в **502**) | **Done** | Усечённые потоки в `message` для оператора. |
277+
| Совпадение ранжирования MCP и **search** | **Done** | Оба пути: MiniSearch по секциям + legacy fallback; `SEARCH_RANKER=legacy` для отката. |
278+
| Кэш готового ответа (Qwen) | **Done** | LRU в `http-server.mjs`; см. `SEARCH_RESPONSE_CACHE_MAX`. |
277279

278280
**Будущие улучшения (см. §12, NR-RET):** по желанию заменить или дополнить эвристику поиском уровня **BM25 / FTS** (как в локальных MCP-инструментах), **RRF**, нормализация запросов — без изменения путей **`/knowledge/api/v1/*`**.
279281

@@ -288,7 +290,9 @@ rate_limit_tiers:
288290
| `SEARCH_HTTP_PORT` / `QWEN_HTTP_PORT` | Порт HTTP listener (по умолчанию **8790**). |
289291
| `SEARCH_HTTP_BIND` / `QWEN_HTTP_BIND` | Bind address (по умолчанию **0.0.0.0**). |
290292
| `KNOWLEDGE_ROOT` | Корень Markdown-корпуса (**рекомендуется `/corpus`** в проде). |
291-
| `SEARCH_RANKER` | `minisearch` (по умолчанию) или `legacy` — только эвристика по токенам. |
293+
| `SEARCH_RANKER` | `minisearch` (по умолчанию) или `legacy` — только эвристика по токенам (MCP `local-search` и sidecar). |
294+
| `SEARCH_RESPONSE_CACHE_MAX` | LRU-кэш готовых JSON-ответов **search** (ключ: ревизия + mtime корпуса + locale + query). **`0`** — выкл. |
295+
| `KNOWLEDGE_CACHE_REVISION` | Произвольная строка для инвалидации кэша без смены файлов на диске. |
292296
| `SEARCH_RATE_LIMIT_TIERS` | JSON override лимитов **free** / **basic** (см. §5.6). |
293297
| `API_TOKEN` | Общий секрет для **Bearer** и tier **basic** на **`search`**. |
294298
| `PROD_QWEN_OAUTH_CREDS` / `QWEN_OAUTH_CREDS_B64` | Base64 **oauth_creds** для Qwen CLI в контейнере; после смены секрета — **пересобрать/перезапустить** **`search`**. |
@@ -364,7 +368,7 @@ rate_limit_tiers:
364368
| ID | Требование | Приоритет |
365369
|----|------------|-----------|
366370
| **NR-RET-1** | ~~Оценить BM25~~**частично done** (MiniSearch + секции). Далее: **FTS5 / RRF / trigram** при необходимости; контракт API без изменений. | P2 |
367-
| **NR-RET-2** | Опциональный **кэш** ответов по `(query нормализованный, locale, версия корпуса)` при неизменном корпусе — снижение стоимости Qwen и latency. | P3 |
371+
| **NR-RET-2** | **Частично done:** in-memory LRU в **`search`** (`SEARCH_RESPONSE_CACHE_MAX`, mtime корпуса + `KNOWLEDGE_CACHE_REVISION`). Далее: shared store при нескольких репликах. | P3 |
368372
| **NR-OBS-1** | Метрики (**accepted/429/latency/502**) и точки интеграции с мониторингом хоста. | P2 |
369373
| **NR-HA-1** | При **>1 реплики** `search` — вынести дневные/минутные счётчики rate limit из in-memory (**Redis** и аналоги); см. §5.6.3. | P2 |
370374
| **NR-TEST-1** | CI: e2e контейнер **`search`** + health + search с моком Qwen или dry-run режимом. | P3 |
@@ -389,4 +393,4 @@ rate_limit_tiers:
389393

390394
---
391395

392-
*Document version: 1.6 — 2026-03-25 — §5.8 MiniSearch ranker; §6 `SEARCH_RANKER`; NR-RET-1 частично закрыт; vitest exclude `docker/search`.*
396+
*Document version: 1.7 — 2026-03-25 — MCP `local-search` + MiniSearch; кэш ответов `search`; §6 cache env; meta `cache_hit`.*

package-lock.json

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"cors": "^2.8.6",
2727
"express": "^5.2.1",
2828
"express-rate-limit": "^8.3.1",
29+
"minisearch": "^7.1.0",
2930
"ws": "^8.19.0",
3031
"zod": "^4.3.6"
3132
},
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import { describe, expect, it } from "vitest";
2+
import { rankCorpusWithMiniSearch, splitIntoSections, tokenizeRankTerms } from "../corpus-minisearch.js";
3+
4+
describe("corpus-minisearch", () => {
5+
it("splitIntoSections respects headings", () => {
6+
const parts = splitIntoSections("a.md", "# One\nx\n\n## Two\ny");
7+
expect(parts.map((p) => p.section)).toEqual(["One", "Two"]);
8+
expect(parts[0].content).toContain("x");
9+
});
10+
11+
it("tokenizeRankTerms keeps Cyrillic tokens", () => {
12+
expect(tokenizeRankTerms("как сделать TMA")).toEqual(["как", "сделать", "tma"]);
13+
});
14+
15+
it("rankCorpusWithMiniSearch surfaces the best matching section", () => {
16+
const ranked = rankCorpusWithMiniSearch("telegram WebApp mini app", [
17+
{
18+
file: "guides/x.md",
19+
content: "# Noise\nNothing here.\n\n# Telegram\nUse WebApp for Telegram Mini App.",
20+
},
21+
]);
22+
expect(ranked.length).toBeGreaterThan(0);
23+
expect(ranked[0].section).toBe("Telegram");
24+
expect(ranked[0].file).toBe("guides/x.md");
25+
});
26+
});

src/corpus-minisearch.ts

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
/**
2+
* MiniSearch (BM25) ranking over Markdown sections — same strategy as docker/search/knowledge-rank.mjs.
3+
*/
4+
import MiniSearch from "minisearch";
5+
6+
export interface CorpusDocument {
7+
file: string;
8+
content: string;
9+
}
10+
11+
export interface RankedCorpusChunk {
12+
file: string;
13+
score: number;
14+
section: string;
15+
snippet: string;
16+
}
17+
18+
const MAX_RESULTS = 5;
19+
const MIN_TOKEN_LENGTH = 2;
20+
const STOP_WORDS = new Set([
21+
"a",
22+
"an",
23+
"and",
24+
"are",
25+
"for",
26+
"how",
27+
"is",
28+
"into",
29+
"that",
30+
"the",
31+
"this",
32+
"what",
33+
"with",
34+
]);
35+
36+
function unicodeTokenize(text: string): string[] {
37+
return text
38+
.toLowerCase()
39+
.split(/[^\p{L}\p{N}]+/u)
40+
.filter((t) => t.length > 0);
41+
}
42+
43+
/** Exported for tests and snippet logic aligned with legacy ranker. */
44+
export function tokenizeRankTerms(query: string): string[] {
45+
return unicodeTokenize(query).filter(
46+
(token) => token.length >= MIN_TOKEN_LENGTH && !STOP_WORDS.has(token),
47+
);
48+
}
49+
50+
export function splitIntoSections(
51+
relPath: string,
52+
content: string,
53+
): ReadonlyArray<{ file: string; section: string; content: string }> {
54+
const lines = content.split(/\r?\n/);
55+
const out: { file: string; section: string; content: string }[] = [];
56+
let sectionTitle = "Overview";
57+
const buf: string[] = [];
58+
const flush = () => {
59+
const text = buf.join("\n").trim();
60+
if (text.length > 0) {
61+
out.push({ file: relPath, section: sectionTitle, content: text });
62+
}
63+
buf.length = 0;
64+
};
65+
for (const line of lines) {
66+
const m = /^#{1,6}\s+(.+)$/.exec(line);
67+
if (m) {
68+
flush();
69+
sectionTitle = m[1].trim();
70+
continue;
71+
}
72+
buf.push(line);
73+
}
74+
flush();
75+
return out;
76+
}
77+
78+
function extractSnippet(content: string, matchIndex: number): string {
79+
const windowStart = Math.max(0, matchIndex - 120);
80+
const windowEnd = Math.min(content.length, matchIndex + 220);
81+
const rawSnippet = content
82+
.slice(windowStart, windowEnd)
83+
.replace(/\s+/g, " ")
84+
.trim();
85+
if (rawSnippet.length <= 220) {
86+
return rawSnippet;
87+
}
88+
return `${rawSnippet.slice(0, 217)}...`;
89+
}
90+
91+
function snippetFromContent(content: string, query: string): string {
92+
const terms = tokenizeRankTerms(query);
93+
if (terms.length === 0) {
94+
const fb = unicodeTokenize(query).filter((t) => t.length >= 1);
95+
for (const t of fb) {
96+
const i = content.toLowerCase().indexOf(t);
97+
if (i !== -1) {
98+
return extractSnippet(content, i);
99+
}
100+
}
101+
return extractSnippet(content, 0);
102+
}
103+
const lower = content.toLowerCase();
104+
let best = -1;
105+
for (const t of terms) {
106+
const i = lower.indexOf(t);
107+
if (i !== -1 && (best === -1 || i < best)) {
108+
best = i;
109+
}
110+
}
111+
const idx = best === -1 ? 0 : best;
112+
return extractSnippet(content, idx);
113+
}
114+
115+
/**
116+
* Rank corpus slices with MiniSearch (per-heading chunks). Rebuilds index each call — fine for MCP corpus sizes.
117+
*/
118+
export function rankCorpusWithMiniSearch(
119+
query: string,
120+
documents: ReadonlyArray<CorpusDocument>,
121+
): ReadonlyArray<RankedCorpusChunk> {
122+
const rows: { id: number; file: string; section: string; content: string }[] = [];
123+
let id = 0;
124+
for (const doc of documents) {
125+
for (const sec of splitIntoSections(doc.file, doc.content)) {
126+
rows.push({ id: id++, file: sec.file, section: sec.section, content: sec.content });
127+
}
128+
}
129+
if (rows.length === 0) {
130+
return [];
131+
}
132+
133+
const mini = new MiniSearch({
134+
fields: ["content", "section", "file"],
135+
storeFields: ["file", "section", "content"],
136+
idField: "id",
137+
tokenize: (string) => unicodeTokenize(string).filter((t) => t.length >= 1),
138+
});
139+
mini.addAll(rows);
140+
141+
const hits = mini.search(query, {
142+
prefix: true,
143+
fuzzy: 0.12,
144+
boost: { section: 2.2, file: 1.65, content: 1 },
145+
});
146+
147+
const byId = new Map(rows.map((r) => [r.id, r]));
148+
const out: RankedCorpusChunk[] = [];
149+
for (const h of hits.slice(0, MAX_RESULTS)) {
150+
const hid = h.id as number;
151+
const stored = byId.get(hid);
152+
if (!stored) {
153+
continue;
154+
}
155+
out.push({
156+
file: stored.file,
157+
score: h.score,
158+
section: stored.section,
159+
snippet: snippetFromContent(stored.content, query),
160+
});
161+
}
162+
return out;
163+
}

0 commit comments

Comments
 (0)