From be9fb8bacb2ad94ad5d59801af80fe90d7072376 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Fri, 3 Apr 2026 12:05:34 +0800 Subject: [PATCH 01/10] delete unnecessary changes --- api/docs-json-api.md | 68 ++++++ package.json | 4 + scripts/build-docs-api-index.js | 22 ++ scripts/docs-api-lib.js | 368 ++++++++++++++++++++++++++++++++ scripts/docs-api-server.js | 134 ++++++++++++ 5 files changed, 596 insertions(+) create mode 100644 api/docs-json-api.md create mode 100644 scripts/build-docs-api-index.js create mode 100644 scripts/docs-api-lib.js create mode 100644 scripts/docs-api-server.js diff --git a/api/docs-json-api.md b/api/docs-json-api.md new file mode 100644 index 0000000000000..bf8833653a16e --- /dev/null +++ b/api/docs-json-api.md @@ -0,0 +1,68 @@ +--- +title: Docs JSON API (Experimental) +summary: Provide a structured JSON API for TiDB docs with topic and feature filters. +--- + +# Docs JSON API (Experimental) + +This API layer exposes structured metadata for markdown docs. + +## Why + +- Query docs by feature token (for example, `tidb_max_dist_task_nodes`) +- Query docs by topic/category +- Return structured schema instead of raw markdown only + +## Data schema + +Each doc record includes: + +- `id` +- `path` +- `title` +- `summary` +- `product` +- `topics` +- `features` +- `headings` +- `frontMatter` +- `frontMatterRaw` +- `updatedAt` + +## Build index + +```bash +npm run docs-api:build +``` + +Default output file: `tmp/docs-api-index.json` + +## Run API server + +```bash +npm run docs-api:serve +``` + +Default host and port: `127.0.0.1:3000` + +## Endpoints + +- `GET /healthz` +- `GET /schema` +- `GET /topics` +- `GET /features` +- `GET /features?prefix=tidb_` +- `GET /docs` +- `GET /docs?feature=tidb_max_dist_task_nodes` +- `GET /docs?topic=tidb-cloud` +- `GET /docs?q=resource control` +- `GET /docs?feature=tidb_max_dist_task_nodes&limit=10&offset=0` +- `GET /reload` (reload in-memory index) + +## Environment variables + +- `DOCS_API_HOST` (default `127.0.0.1`) +- `DOCS_API_PORT` (default `3000`) +- `DOCS_API_ROOT` (default current working directory) +- `DOCS_API_INDEX_FILE` (optional prebuilt JSON index path) + diff --git a/package.json b/package.json index 4e5b303151bdf..685301216ec88 100644 --- a/package.json +++ b/package.json @@ -4,6 +4,10 @@ "main": "index.js", "license": "MIT", "type": "module", + "scripts": { + "docs-api:build": "node scripts/build-docs-api-index.js", + "docs-api:serve": "node scripts/docs-api-server.js" + }, "dependencies": { "axios": "^1.4.0", "glob": "^8.0.3", diff --git a/scripts/build-docs-api-index.js b/scripts/build-docs-api-index.js new file mode 100644 index 0000000000000..0bc4e498d51ee --- /dev/null +++ b/scripts/build-docs-api-index.js @@ -0,0 +1,22 @@ +import * as fs from "fs"; +import path from "path"; +import { buildDocsIndex } from "./docs-api-lib.js"; + +const args = process.argv.slice(2); +const outputArg = args[0] || "tmp/docs-api-index.json"; +const rootArg = args[1] || process.cwd(); + +const outputPath = path.resolve(outputArg); +const outputDir = path.dirname(outputPath); + +if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); +} + +const index = buildDocsIndex(rootArg); +fs.writeFileSync(outputPath, JSON.stringify(index, null, 2), "utf8"); + +console.log( + `Docs API index generated: ${outputPath} (${index.totalDocs} docs, ${index.features.length} features)` +); + diff --git a/scripts/docs-api-lib.js b/scripts/docs-api-lib.js new file mode 100644 index 0000000000000..d5dadb1a8713b --- /dev/null +++ b/scripts/docs-api-lib.js @@ -0,0 +1,368 @@ +import * as fs from "fs"; +import path from "path"; + +const DOC_IGNORE_DIRS = new Set(["node_modules", ".git", "media", "tmp"]); + +const MAX_SUMMARY_LENGTH = 220; + +const toPosixPath = (filePath) => filePath.replaceAll("\\", "/"); + +const safeString = (value) => (typeof value === "string" ? value : ""); + +const slugify = (input = "") => + input + .toLowerCase() + .trim() + .replace(/[`~!@#$%^&*()+=[\]{}|\\:;"'<>,.?/]+/g, "") + .replace(/\s+/g, "-"); + +const parseScalar = (raw) => { + const value = raw.trim(); + if (value === "true") return true; + if (value === "false") return false; + if (/^-?\d+$/.test(value)) return Number.parseInt(value, 10); + if (/^-?\d+\.\d+$/.test(value)) return Number.parseFloat(value); + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + return value.slice(1, -1); + } + return value; +}; + +const parseSimpleYaml = (raw = "") => { + const result = {}; + let currentArrayKey = null; + + raw.split(/\r?\n/).forEach((line) => { + if (!line.trim() || line.trim().startsWith("#")) { + return; + } + + const kvMatch = line.match(/^([A-Za-z0-9_-]+):\s*(.*)$/); + if (kvMatch) { + const key = kvMatch[1]; + const value = kvMatch[2]; + if (!value.trim()) { + result[key] = []; + currentArrayKey = key; + } else { + result[key] = parseScalar(value); + currentArrayKey = null; + } + return; + } + + const listMatch = line.match(/^\s*-\s*(.*)$/); + if (listMatch && currentArrayKey) { + result[currentArrayKey].push(parseScalar(listMatch[1])); + return; + } + + currentArrayKey = null; + }); + + return result; +}; + +const extractFrontMatter = (content) => { + const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n?/); + if (!match) { + return { + raw: "", + data: {}, + }; + } + return { + raw: match[1], + data: parseSimpleYaml(match[1]), + }; +}; + +const stripFrontMatter = (content) => + content.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/, ""); + +const stripInlineMarkdown = (text) => + text + .replace(/`([^`]+)`/g, "$1") + .replace(/\[([^\]]+)\]\(([^)]+)\)/g, "$1") + .replace(/[*_~>#]/g, "") + .replace(/\s+/g, " ") + .trim(); + +const collectMarkdownFiles = (rootDir) => { + const results = []; + const walk = (dir) => { + const entries = fs.readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + if (entry.isDirectory()) { + if (DOC_IGNORE_DIRS.has(entry.name)) continue; + walk(path.join(dir, entry.name)); + continue; + } + if (!entry.isFile()) continue; + if (!entry.name.endsWith(".md")) continue; + results.push(path.join(dir, entry.name)); + } + }; + walk(rootDir); + return results; +}; + +const parseHeadingsAndSummary = (content) => { + const lines = stripFrontMatter(content).split(/\r?\n/); + const headings = []; + let summary = ""; + let inCodeBlock = false; + let paragraphBuffer = []; + + const flushParagraph = () => { + if (summary || paragraphBuffer.length === 0) { + paragraphBuffer = []; + return; + } + const text = stripInlineMarkdown(paragraphBuffer.join(" ").trim()); + if (text) { + summary = truncate(text); + } + paragraphBuffer = []; + }; + + for (const rawLine of lines) { + const line = rawLine.trim(); + + if (line.startsWith("```")) { + inCodeBlock = !inCodeBlock; + continue; + } + if (inCodeBlock) continue; + + const headingMatch = line.match(/^(#{1,6})\s+(.*)$/); + if (headingMatch) { + flushParagraph(); + const level = headingMatch[1].length; + const text = stripInlineMarkdown(headingMatch[2]); + if (text) { + headings.push({ + level, + text, + slug: slugify(text), + }); + } + continue; + } + + if (!line) { + flushParagraph(); + continue; + } + + if ( + line.startsWith("- ") || + line.startsWith("* ") || + line.startsWith("> ") || + line.startsWith("|") || + /^\d+\.\s+/.test(line) + ) { + continue; + } + + paragraphBuffer.push(line); + } + + flushParagraph(); + return { headings, summary }; +}; + +const inferProduct = (docPath) => { + if (docPath.startsWith("tidb-cloud/")) return "tidb-cloud"; + if (docPath.startsWith("dm/")) return "dm"; + if (docPath.startsWith("br/")) return "br"; + if (docPath.startsWith("ticdc/")) return "ticdc"; + if (docPath.startsWith("tiflash/")) return "tiflash"; + if (docPath.startsWith("tiup/")) return "tiup"; + return "tidb"; +}; + +const extractFeatures = (content, frontMatterData) => { + const features = new Set(); + const varRegex = /\b[a-z]+(?:_[a-z0-9]+){2,}\b/g; + for (const match of content.matchAll(varRegex)) { + const token = match[0]; + if ( + token.startsWith("tidb_") || + token.startsWith("tikv_") || + token.startsWith("pd_") || + token.startsWith("tiflash_") + ) { + features.add(token); + } + } + + const fmFeatureKeys = ["feature", "features", "tag", "tags"]; + fmFeatureKeys.forEach((key) => { + const value = frontMatterData[key]; + if (Array.isArray(value)) { + value.forEach((item) => { + if (typeof item === "string" && item.trim()) { + features.add(item.trim()); + } + }); + return; + } + if (typeof value === "string" && value.trim()) { + features.add(value.trim()); + } + }); + + return [...features]; +}; + +const truncate = (text, limit = MAX_SUMMARY_LENGTH) => { + if (!text) return ""; + if (text.length <= limit) return text; + return `${text.slice(0, limit - 3)}...`; +}; + +const normalizeTopics = (docPath, frontMatterData) => { + const segments = docPath + .replace(/\.md$/, "") + .split("/") + .map((segment) => segment.trim()) + .filter(Boolean); + const topics = new Set(segments.slice(0, -1)); + + const fmTopicKeys = ["topic", "topics", "category", "categories"]; + fmTopicKeys.forEach((key) => { + const value = frontMatterData[key]; + if (Array.isArray(value)) { + value.forEach((item) => { + if (typeof item === "string" && item.trim()) topics.add(item.trim()); + }); + return; + } + if (typeof value === "string" && value.trim()) { + topics.add(value.trim()); + } + }); + + return [...topics]; +}; + +const parseMarkdownDoc = (rootDir, absPath) => { + const relativePath = toPosixPath(path.relative(rootDir, absPath)); + const raw = fs.readFileSync(absPath, "utf8"); + const { data: frontMatter, raw: frontMatterRaw } = extractFrontMatter(raw); + const { headings, summary } = parseHeadingsAndSummary(raw); + let title = safeString(frontMatter.title); + if (!title) { + const h1 = headings.find((item) => item.level === 1); + if (h1) title = h1.text; + } + + if (!title) { + title = path.basename(relativePath, ".md"); + } + + const docStat = fs.statSync(absPath); + const features = extractFeatures(raw, frontMatter).sort(); + const topics = normalizeTopics(relativePath, frontMatter).sort(); + + return { + id: relativePath.replace(/\.md$/, ""), + path: relativePath, + title, + summary, + product: inferProduct(relativePath), + topics, + features, + headings, + frontMatter, + frontMatterRaw, + updatedAt: docStat.mtime.toISOString(), + }; +}; + +export const buildDocsIndex = (rootDir = process.cwd()) => { + const normalizedRoot = path.resolve(rootDir); + const mdFiles = collectMarkdownFiles(normalizedRoot); + + const docs = mdFiles + .map((absPath) => parseMarkdownDoc(normalizedRoot, absPath)) + .sort((a, b) => a.path.localeCompare(b.path)); + + const topicSet = new Set(); + const featureSet = new Set(); + docs.forEach((doc) => { + doc.topics.forEach((topic) => topicSet.add(topic)); + doc.features.forEach((feature) => featureSet.add(feature)); + }); + + return { + schemaVersion: "1.0.0", + generatedAt: new Date().toISOString(), + totalDocs: docs.length, + topics: [...topicSet].sort(), + features: [...featureSet].sort(), + docs, + }; +}; + +export const docsApiSchema = { + schemaVersion: "1.0.0", + endpoints: { + "/docs": { + method: "GET", + query: { + feature: "Exact feature token filter, case-insensitive.", + topic: "Topic/category filter, case-insensitive.", + q: "Keyword match in path/title/summary, case-insensitive.", + path: "Exact document path filter, case-insensitive.", + limit: "Page size. Default 20, max 100.", + offset: "Pagination offset. Default 0.", + }, + response: { + meta: { + total: "Matched document count before pagination.", + limit: "Applied page size.", + offset: "Applied offset.", + returned: "Number of docs in data.", + }, + data: "Array", + }, + }, + "/topics": { + method: "GET", + response: "Array", + }, + "/features": { + method: "GET", + query: { + prefix: "Optional prefix filter.", + }, + response: "Array", + }, + "/schema": { + method: "GET", + response: "This schema document.", + }, + "/healthz": { + method: "GET", + response: "{ ok: true }", + }, + }, + docRecord: { + id: "string", + path: "string", + title: "string", + summary: "string", + product: "string", + topics: "string[]", + features: "string[]", + headings: "Array<{level:number,text:string,slug:string}>", + frontMatter: "object", + frontMatterRaw: "string", + updatedAt: "ISO-8601 string", + }, +}; diff --git a/scripts/docs-api-server.js b/scripts/docs-api-server.js new file mode 100644 index 0000000000000..22de5225bb48f --- /dev/null +++ b/scripts/docs-api-server.js @@ -0,0 +1,134 @@ +import * as fs from "fs"; +import http from "http"; +import path from "path"; +import { buildDocsIndex, docsApiSchema } from "./docs-api-lib.js"; + +const PORT = Number.parseInt(process.env.DOCS_API_PORT || "3000", 10); +const HOST = process.env.DOCS_API_HOST || "127.0.0.1"; +const ROOT_DIR = path.resolve(process.env.DOCS_API_ROOT || process.cwd()); +const PREBUILT_INDEX = process.env.DOCS_API_INDEX_FILE; + +const loadIndex = () => { + if (PREBUILT_INDEX) { + const filePath = path.resolve(PREBUILT_INDEX); + if (fs.existsSync(filePath)) { + return JSON.parse(fs.readFileSync(filePath, "utf8")); + } + } + return buildDocsIndex(ROOT_DIR); +}; + +let docsIndex = loadIndex(); + +const toInt = (value, fallback) => { + const num = Number.parseInt(value, 10); + return Number.isNaN(num) ? fallback : num; +}; + +const containsCI = (text, keyword) => + text.toLowerCase().includes(keyword.toLowerCase()); + +const json = (res, statusCode, payload) => { + res.writeHead(statusCode, { "Content-Type": "application/json; charset=utf-8" }); + res.end(JSON.stringify(payload, null, 2)); +}; + +const filterDocs = (docs, query) => { + const feature = query.get("feature"); + const topic = query.get("topic"); + const keyword = query.get("q"); + const pathFilter = query.get("path"); + const limit = Math.min(Math.max(toInt(query.get("limit"), 20), 1), 100); + const offset = Math.max(toInt(query.get("offset"), 0), 0); + + let rows = docs; + + if (feature) { + rows = rows.filter((doc) => + doc.features.some((item) => item.toLowerCase() === feature.toLowerCase()) + ); + } + if (topic) { + rows = rows.filter((doc) => + doc.topics.some((item) => item.toLowerCase() === topic.toLowerCase()) + ); + } + if (pathFilter) { + rows = rows.filter((doc) => doc.path.toLowerCase() === pathFilter.toLowerCase()); + } + if (keyword) { + rows = rows.filter((doc) => { + return ( + containsCI(doc.path, keyword) || + containsCI(doc.title, keyword) || + containsCI(doc.summary, keyword) + ); + }); + } + + const total = rows.length; + const paged = rows.slice(offset, offset + limit); + + return { + meta: { + total, + limit, + offset, + returned: paged.length, + }, + data: paged, + }; +}; + +const server = http.createServer((req, res) => { + if (!req.url) { + return json(res, 400, { error: "Invalid request URL." }); + } + + const url = new URL(req.url, `http://${HOST}:${PORT}`); + const pathname = url.pathname; + + if (req.method !== "GET") { + return json(res, 405, { error: "Only GET is supported." }); + } + + if (pathname === "/healthz") { + return json(res, 200, { ok: true }); + } + if (pathname === "/schema") { + return json(res, 200, docsApiSchema); + } + if (pathname === "/topics") { + return json(res, 200, { data: docsIndex.topics }); + } + if (pathname === "/features") { + const prefix = url.searchParams.get("prefix"); + if (!prefix) { + return json(res, 200, { data: docsIndex.features }); + } + const filtered = docsIndex.features.filter((f) => + f.toLowerCase().startsWith(prefix.toLowerCase()) + ); + return json(res, 200, { data: filtered }); + } + if (pathname === "/reload") { + docsIndex = loadIndex(); + return json(res, 200, { + ok: true, + totalDocs: docsIndex.totalDocs, + generatedAt: docsIndex.generatedAt, + }); + } + if (pathname === "/docs") { + return json(res, 200, filterDocs(docsIndex.docs, url.searchParams)); + } + + return json(res, 404, { error: "Not found." }); +}); + +server.listen(PORT, HOST, () => { + console.log( + `Docs API server running at http://${HOST}:${PORT} (docs: ${docsIndex.totalDocs})` + ); +}); + From 9666f08c561f8949c9075375123ede4f30982c7d Mon Sep 17 00:00:00 2001 From: houfaxin Date: Fri, 3 Apr 2026 14:18:44 +0800 Subject: [PATCH 02/10] Add DOCS_API_SOURCE_DIR and template vars Introduce DOCS_API_SOURCE_DIR and source-directory resolution so the docs API can prefer ../docs-staging if present. Update documentation to describe source priority. Propagate the resolved source dir into build-docs-api-index and docs-api-server (including startup/log messages). Add support for template variable replacement in markdown using variables.json (pattern {{{ .path }}}) with a safe parser and warning on JSON parse failure. Improve file collection to skip dot-directories, ignore api/docs-json-api.md, and normalize paths. Minor refactors: pass resolved sourceDir into buildDocsIndex, add resolveDefaultSourceDir helper, and include variables when parsing markdown. --- api/docs-json-api.md | 11 ++++++- scripts/build-docs-api-index.js | 11 ++++--- scripts/docs-api-lib.js | 55 ++++++++++++++++++++++++++++++--- scripts/docs-api-server.js | 15 ++++++--- 4 files changed, 76 insertions(+), 16 deletions(-) diff --git a/api/docs-json-api.md b/api/docs-json-api.md index bf8833653a16e..85941e9d73c91 100644 --- a/api/docs-json-api.md +++ b/api/docs-json-api.md @@ -63,6 +63,15 @@ Default host and port: `127.0.0.1:3000` - `DOCS_API_HOST` (default `127.0.0.1`) - `DOCS_API_PORT` (default `3000`) -- `DOCS_API_ROOT` (default current working directory) +- `DOCS_API_SOURCE_DIR` (default: if `../docs-staging` exists, use it; otherwise current working directory) - `DOCS_API_INDEX_FILE` (optional prebuilt JSON index path) +## Source priority + +The API loads markdown files from the source directory in this order: + +1. `DOCS_API_SOURCE_DIR` (if set) +2. `../docs-staging` (if exists) +3. current working directory + +Template variables in markdown such as `{{{ .starter }}}` are replaced using `variables.json` in the selected source directory. diff --git a/scripts/build-docs-api-index.js b/scripts/build-docs-api-index.js index 0bc4e498d51ee..813f06a704ab9 100644 --- a/scripts/build-docs-api-index.js +++ b/scripts/build-docs-api-index.js @@ -1,10 +1,11 @@ import * as fs from "fs"; import path from "path"; -import { buildDocsIndex } from "./docs-api-lib.js"; +import { buildDocsIndex, resolveDefaultSourceDir } from "./docs-api-lib.js"; const args = process.argv.slice(2); const outputArg = args[0] || "tmp/docs-api-index.json"; -const rootArg = args[1] || process.cwd(); +const rootArg = + args[1] || process.env.DOCS_API_SOURCE_DIR || resolveDefaultSourceDir(process.cwd()); const outputPath = path.resolve(outputArg); const outputDir = path.dirname(outputPath); @@ -13,10 +14,10 @@ if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } -const index = buildDocsIndex(rootArg); +const sourceDir = path.resolve(rootArg); +const index = buildDocsIndex(sourceDir); fs.writeFileSync(outputPath, JSON.stringify(index, null, 2), "utf8"); console.log( - `Docs API index generated: ${outputPath} (${index.totalDocs} docs, ${index.features.length} features)` + `Docs API index generated: ${outputPath} (${index.totalDocs} docs, ${index.features.length} features) from source: ${sourceDir}` ); - diff --git a/scripts/docs-api-lib.js b/scripts/docs-api-lib.js index d5dadb1a8713b..b1e7377a2272f 100644 --- a/scripts/docs-api-lib.js +++ b/scripts/docs-api-lib.js @@ -2,6 +2,7 @@ import * as fs from "fs"; import path from "path"; const DOC_IGNORE_DIRS = new Set(["node_modules", ".git", "media", "tmp"]); +const DOC_IGNORE_FILES = new Set(["api/docs-json-api.md"]); const MAX_SUMMARY_LENGTH = 220; @@ -16,6 +17,25 @@ const slugify = (input = "") => .replace(/[`~!@#$%^&*()+=[\]{}|\\:;"'<>,.?/]+/g, "") .replace(/\s+/g, "-"); +const getValueByPath = (obj, keyPath) => { + return ( + keyPath + .split(".") + .reduce((acc, key) => (acc !== undefined && acc !== null ? acc[key] : ""), obj) ?? "" + ); +}; + +const replaceTemplateVariables = (content, variables = {}) => { + const variablePattern = /{{{\s*\.(.+?)\s*}}}/g; + return content.replace(variablePattern, (match, variablePath) => { + const value = getValueByPath(variables, variablePath.trim()); + if (value === undefined || value === null || value === "") { + return match; + } + return String(value); + }); +}; + const parseScalar = (raw) => { const value = raw.trim(); if (value === "true") return true; @@ -97,13 +117,16 @@ const collectMarkdownFiles = (rootDir) => { const entries = fs.readdirSync(dir, { withFileTypes: true }); for (const entry of entries) { if (entry.isDirectory()) { - if (DOC_IGNORE_DIRS.has(entry.name)) continue; + if (entry.name.startsWith(".") || DOC_IGNORE_DIRS.has(entry.name)) continue; walk(path.join(dir, entry.name)); continue; } if (!entry.isFile()) continue; if (!entry.name.endsWith(".md")) continue; - results.push(path.join(dir, entry.name)); + const absPath = path.join(dir, entry.name); + const relativePath = toPosixPath(path.relative(rootDir, absPath)); + if (DOC_IGNORE_FILES.has(relativePath)) continue; + results.push(absPath); } }; walk(rootDir); @@ -250,9 +273,10 @@ const normalizeTopics = (docPath, frontMatterData) => { return [...topics]; }; -const parseMarkdownDoc = (rootDir, absPath) => { +const parseMarkdownDoc = (rootDir, absPath, variables) => { const relativePath = toPosixPath(path.relative(rootDir, absPath)); - const raw = fs.readFileSync(absPath, "utf8"); + const originalRaw = fs.readFileSync(absPath, "utf8"); + const raw = replaceTemplateVariables(originalRaw, variables); const { data: frontMatter, raw: frontMatterRaw } = extractFrontMatter(raw); const { headings, summary } = parseHeadingsAndSummary(raw); let title = safeString(frontMatter.title); @@ -286,10 +310,22 @@ const parseMarkdownDoc = (rootDir, absPath) => { export const buildDocsIndex = (rootDir = process.cwd()) => { const normalizedRoot = path.resolve(rootDir); + const variablesPath = path.join(normalizedRoot, "variables.json"); + let variables = {}; + if (fs.existsSync(variablesPath)) { + try { + variables = JSON.parse(fs.readFileSync(variablesPath, "utf8")); + } catch (error) { + console.warn( + `Warning: failed to parse variables.json at ${variablesPath}, continuing without variable replacement.` + ); + } + } + const mdFiles = collectMarkdownFiles(normalizedRoot); const docs = mdFiles - .map((absPath) => parseMarkdownDoc(normalizedRoot, absPath)) + .map((absPath) => parseMarkdownDoc(normalizedRoot, absPath, variables)) .sort((a, b) => a.path.localeCompare(b.path)); const topicSet = new Set(); @@ -309,6 +345,15 @@ export const buildDocsIndex = (rootDir = process.cwd()) => { }; }; +export const resolveDefaultSourceDir = (baseDir = process.cwd()) => { + const normalizedBase = path.resolve(baseDir); + const siblingDocsStaging = path.resolve(normalizedBase, "..", "docs-staging"); + if (fs.existsSync(siblingDocsStaging) && fs.statSync(siblingDocsStaging).isDirectory()) { + return siblingDocsStaging; + } + return normalizedBase; +}; + export const docsApiSchema = { schemaVersion: "1.0.0", endpoints: { diff --git a/scripts/docs-api-server.js b/scripts/docs-api-server.js index 22de5225bb48f..5d45d1dbf4c50 100644 --- a/scripts/docs-api-server.js +++ b/scripts/docs-api-server.js @@ -1,11 +1,17 @@ import * as fs from "fs"; import http from "http"; import path from "path"; -import { buildDocsIndex, docsApiSchema } from "./docs-api-lib.js"; +import { + buildDocsIndex, + docsApiSchema, + resolveDefaultSourceDir, +} from "./docs-api-lib.js"; const PORT = Number.parseInt(process.env.DOCS_API_PORT || "3000", 10); const HOST = process.env.DOCS_API_HOST || "127.0.0.1"; -const ROOT_DIR = path.resolve(process.env.DOCS_API_ROOT || process.cwd()); +const SOURCE_DIR = path.resolve( + process.env.DOCS_API_SOURCE_DIR || resolveDefaultSourceDir(process.cwd()) +); const PREBUILT_INDEX = process.env.DOCS_API_INDEX_FILE; const loadIndex = () => { @@ -15,7 +21,7 @@ const loadIndex = () => { return JSON.parse(fs.readFileSync(filePath, "utf8")); } } - return buildDocsIndex(ROOT_DIR); + return buildDocsIndex(SOURCE_DIR); }; let docsIndex = loadIndex(); @@ -128,7 +134,6 @@ const server = http.createServer((req, res) => { server.listen(PORT, HOST, () => { console.log( - `Docs API server running at http://${HOST}:${PORT} (docs: ${docsIndex.totalDocs})` + `Docs API server running at http://${HOST}:${PORT} (docs: ${docsIndex.totalDocs}, source: ${SOURCE_DIR})` ); }); - From 809ab74b7aee08a77cce451620c379c7275e6c42 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Fri, 3 Apr 2026 14:51:50 +0800 Subject: [PATCH 03/10] Add doc content endpoint and full-text search Expose on-demand markdown content and improve search/indexing. Adds markdownToSearchText and stores _searchText for full-text matching, plus loadTemplateVariables and loadDocContentByPath to safely load rendered markdown. Server changes add includeContent query support for /docs, a /docs/content endpoint for single-doc content retrieval, and a toPublicDoc serializer; also introduces isTruthy helper and updates docs API schema and docs-json-api.md to document performance behavior and new query options. These changes keep list responses lightweight while enabling full-content fetches when needed. --- api/docs-json-api.md | 11 +++++++ scripts/docs-api-lib.js | 47 ++++++++++++++++++++++++-- scripts/docs-api-server.js | 67 +++++++++++++++++++++++++++++++++++--- 3 files changed, 119 insertions(+), 6 deletions(-) diff --git a/api/docs-json-api.md b/api/docs-json-api.md index 85941e9d73c91..9a3077879f898 100644 --- a/api/docs-json-api.md +++ b/api/docs-json-api.md @@ -12,6 +12,7 @@ This API layer exposes structured metadata for markdown docs. - Query docs by feature token (for example, `tidb_max_dist_task_nodes`) - Query docs by topic/category - Return structured schema instead of raw markdown only +- Keep list APIs fast by default, and fetch full content on demand ## Data schema @@ -57,8 +58,18 @@ Default host and port: `127.0.0.1:3000` - `GET /docs?topic=tidb-cloud` - `GET /docs?q=resource control` - `GET /docs?feature=tidb_max_dist_task_nodes&limit=10&offset=0` +- `GET /docs?topic=tidb-cloud&includeContent=true` (returns markdown content in list response) +- `GET /docs/content?path=tidb-cloud/backup-and-restore.md` +- `GET /docs/content?id=tidb-cloud/backup-and-restore` - `GET /reload` (reload in-memory index) +## Search and performance behavior + +- `q` uses path, title, summary, and full-text matching. +- `/docs` does **not** return full markdown content by default. +- Use `/docs/content` to fetch full markdown content for a single document. +- If needed, set `includeContent=true` on `/docs` for small result sets. + ## Environment variables - `DOCS_API_HOST` (default `127.0.0.1`) diff --git a/scripts/docs-api-lib.js b/scripts/docs-api-lib.js index b1e7377a2272f..acd39f5d8b5ea 100644 --- a/scripts/docs-api-lib.js +++ b/scripts/docs-api-lib.js @@ -111,6 +111,18 @@ const stripInlineMarkdown = (text) => .replace(/\s+/g, " ") .trim(); +const markdownToSearchText = (content) => { + const withoutFrontMatter = stripFrontMatter(content); + return stripInlineMarkdown( + withoutFrontMatter + .replace(/```[\s\S]*?```/g, " ") + .replace(/<[^>]+>/g, " ") + .replace(/{{<[^>]+>}}/g, " ") + .replace(/\|/g, " ") + .replace(/\r?\n/g, " ") + ); +}; + const collectMarkdownFiles = (rootDir) => { const results = []; const walk = (dir) => { @@ -292,6 +304,7 @@ const parseMarkdownDoc = (rootDir, absPath, variables) => { const docStat = fs.statSync(absPath); const features = extractFeatures(raw, frontMatter).sort(); const topics = normalizeTopics(relativePath, frontMatter).sort(); + const searchText = markdownToSearchText(raw).toLowerCase(); return { id: relativePath.replace(/\.md$/, ""), @@ -305,10 +318,11 @@ const parseMarkdownDoc = (rootDir, absPath, variables) => { frontMatter, frontMatterRaw, updatedAt: docStat.mtime.toISOString(), + _searchText: searchText, }; }; -export const buildDocsIndex = (rootDir = process.cwd()) => { +export const loadTemplateVariables = (rootDir = process.cwd()) => { const normalizedRoot = path.resolve(rootDir); const variablesPath = path.join(normalizedRoot, "variables.json"); let variables = {}; @@ -321,6 +335,26 @@ export const buildDocsIndex = (rootDir = process.cwd()) => { ); } } + return variables; +}; + +export const loadDocContentByPath = (rootDir, docPath, variables) => { + const normalizedRoot = path.resolve(rootDir); + const normalizedDocPath = docPath.replaceAll("\\", "/").replace(/^\/+/, ""); + const absPath = path.join(normalizedRoot, normalizedDocPath); + if (!absPath.startsWith(normalizedRoot)) { + throw new Error("Invalid path."); + } + if (!fs.existsSync(absPath)) { + throw new Error("Document not found."); + } + const raw = fs.readFileSync(absPath, "utf8"); + return replaceTemplateVariables(raw, variables); +}; + +export const buildDocsIndex = (rootDir = process.cwd()) => { + const normalizedRoot = path.resolve(rootDir); + const variables = loadTemplateVariables(normalizedRoot); const mdFiles = collectMarkdownFiles(normalizedRoot); @@ -362,7 +396,8 @@ export const docsApiSchema = { query: { feature: "Exact feature token filter, case-insensitive.", topic: "Topic/category filter, case-insensitive.", - q: "Keyword match in path/title/summary, case-insensitive.", + q: "Keyword match in path/title/summary/full-text, case-insensitive.", + includeContent: "Whether to include markdown content in list results. Default false.", path: "Exact document path filter, case-insensitive.", limit: "Page size. Default 20, max 100.", offset: "Pagination offset. Default 0.", @@ -392,6 +427,14 @@ export const docsApiSchema = { method: "GET", response: "This schema document.", }, + "/docs/content": { + method: "GET", + query: { + path: "Exact document path, e.g. tidb-cloud/backup-and-restore.md", + id: "Document id, e.g. tidb-cloud/backup-and-restore", + }, + response: "Single DocRecord with markdown content.", + }, "/healthz": { method: "GET", response: "{ ok: true }", diff --git a/scripts/docs-api-server.js b/scripts/docs-api-server.js index 5d45d1dbf4c50..8e433a1e17b55 100644 --- a/scripts/docs-api-server.js +++ b/scripts/docs-api-server.js @@ -4,6 +4,8 @@ import path from "path"; import { buildDocsIndex, docsApiSchema, + loadDocContentByPath, + loadTemplateVariables, resolveDefaultSourceDir, } from "./docs-api-lib.js"; @@ -13,6 +15,7 @@ const SOURCE_DIR = path.resolve( process.env.DOCS_API_SOURCE_DIR || resolveDefaultSourceDir(process.cwd()) ); const PREBUILT_INDEX = process.env.DOCS_API_INDEX_FILE; +const TEMPLATE_VARIABLES = loadTemplateVariables(SOURCE_DIR); const loadIndex = () => { if (PREBUILT_INDEX) { @@ -34,16 +37,50 @@ const toInt = (value, fallback) => { const containsCI = (text, keyword) => text.toLowerCase().includes(keyword.toLowerCase()); +const isTruthy = (value) => { + if (!value) return false; + return ["1", "true", "yes", "on"].includes(value.toLowerCase()); +}; + const json = (res, statusCode, payload) => { res.writeHead(statusCode, { "Content-Type": "application/json; charset=utf-8" }); res.end(JSON.stringify(payload, null, 2)); }; +const toPublicDoc = (doc, options = {}) => { + const includeContent = options.includeContent === true; + const result = { + id: doc.id, + path: doc.path, + title: doc.title, + summary: doc.summary, + product: doc.product, + topics: doc.topics, + features: doc.features, + headings: doc.headings, + frontMatter: doc.frontMatter, + frontMatterRaw: doc.frontMatterRaw, + updatedAt: doc.updatedAt, + }; + if (includeContent) { + try { + result.content = loadDocContentByPath(SOURCE_DIR, doc.path, TEMPLATE_VARIABLES); + result.contentType = "text/markdown"; + } catch (error) { + result.content = ""; + result.contentType = "text/markdown"; + result.contentError = String(error.message || error); + } + } + return result; +}; + const filterDocs = (docs, query) => { const feature = query.get("feature"); const topic = query.get("topic"); const keyword = query.get("q"); const pathFilter = query.get("path"); + const includeContent = isTruthy(query.get("includeContent")); const limit = Math.min(Math.max(toInt(query.get("limit"), 20), 1), 100); const offset = Math.max(toInt(query.get("offset"), 0), 0); @@ -63,11 +100,13 @@ const filterDocs = (docs, query) => { rows = rows.filter((doc) => doc.path.toLowerCase() === pathFilter.toLowerCase()); } if (keyword) { + const loweredKeyword = keyword.toLowerCase(); rows = rows.filter((doc) => { return ( - containsCI(doc.path, keyword) || - containsCI(doc.title, keyword) || - containsCI(doc.summary, keyword) + containsCI(doc.path, loweredKeyword) || + containsCI(doc.title, loweredKeyword) || + containsCI(doc.summary, loweredKeyword) || + containsCI(doc._searchText || "", loweredKeyword) ); }); } @@ -81,8 +120,9 @@ const filterDocs = (docs, query) => { limit, offset, returned: paged.length, + includeContent, }, - data: paged, + data: paged.map((doc) => toPublicDoc(doc, { includeContent })), }; }; @@ -128,6 +168,25 @@ const server = http.createServer((req, res) => { if (pathname === "/docs") { return json(res, 200, filterDocs(docsIndex.docs, url.searchParams)); } + if (pathname === "/docs/content") { + const pathParam = url.searchParams.get("path"); + const idParam = url.searchParams.get("id"); + if (!pathParam && !idParam) { + return json(res, 400, { error: "Either path or id is required." }); + } + + const doc = docsIndex.docs.find((item) => { + if (pathParam && item.path.toLowerCase() === pathParam.toLowerCase()) return true; + if (idParam && item.id.toLowerCase() === idParam.toLowerCase()) return true; + return false; + }); + + if (!doc) { + return json(res, 404, { error: "Document not found." }); + } + + return json(res, 200, { data: toPublicDoc(doc, { includeContent: true }) }); + } return json(res, 404, { error: "Not found." }); }); From 07595db8b35ca4690180da7c9f2d06b5146289df Mon Sep 17 00:00:00 2001 From: houfaxin Date: Fri, 3 Apr 2026 14:59:29 +0800 Subject: [PATCH 04/10] Add experimental Docs MCP server Introduce an experimental MCP server to expose TiDB docs over STDIO. Adds scripts/docs-mcp-server.js implementing MCP handlers (search_docs, get_doc_content, list_topics, list_features, reload_docs_index) and resource URIs (docs://schema, docs://index/meta, docs://doc/). Adds user-facing docs at api/docs-mcp-server.md with usage and example .mcp.json, and registers the new npm script "docs-mcp:serve" in package.json. The server builds and serves the docs index, loads template variables, and supports reloading the index at runtime. --- api/docs-mcp-server.md | 61 ++++++ package.json | 3 +- scripts/docs-mcp-server.js | 391 +++++++++++++++++++++++++++++++++++++ 3 files changed, 454 insertions(+), 1 deletion(-) create mode 100644 api/docs-mcp-server.md create mode 100644 scripts/docs-mcp-server.js diff --git a/api/docs-mcp-server.md b/api/docs-mcp-server.md new file mode 100644 index 0000000000000..37281f0b4c4f0 --- /dev/null +++ b/api/docs-mcp-server.md @@ -0,0 +1,61 @@ +--- +title: Docs MCP Server (Experimental) +summary: Expose TiDB docs as MCP tools and resources for AI clients such as Claude Code and Cursor. +--- + +# Docs MCP Server (Experimental) + +This server exposes TiDB docs through MCP over STDIO. + +## Start server + +```bash +npm run docs-mcp:serve +``` + +Optional source override: + +```bash +DOCS_API_SOURCE_DIR=/workspaces/docs-staging npm run docs-mcp:serve +``` + +## MCP tools + +- `search_docs` +- `get_doc_content` +- `list_topics` +- `list_features` +- `reload_docs_index` + +## MCP resources + +- `docs://schema` +- `docs://index/meta` +- `docs://doc/` + +Example: + +- `docs://doc/tidb-cloud%2Fbackup-and-restore-serverless.md` + +## Claude Code example (`.mcp.json`) + +```json +{ + "mcpServers": { + "tidb-docs": { + "command": "node", + "args": ["scripts/docs-mcp-server.js"], + "env": { + "DOCS_API_SOURCE_DIR": "/workspaces/docs-staging" + } + } + } +} +``` + +## Design notes + +- `search_docs` does full-text filtering but returns lightweight metadata by default. +- `get_doc_content` fetches full markdown only when needed. +- Template variables like `{{{ .starter }}}` are replaced using `variables.json` from source. + diff --git a/package.json b/package.json index 685301216ec88..22f151c8362ef 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,8 @@ "type": "module", "scripts": { "docs-api:build": "node scripts/build-docs-api-index.js", - "docs-api:serve": "node scripts/docs-api-server.js" + "docs-api:serve": "node scripts/docs-api-server.js", + "docs-mcp:serve": "node scripts/docs-mcp-server.js" }, "dependencies": { "axios": "^1.4.0", diff --git a/scripts/docs-mcp-server.js b/scripts/docs-mcp-server.js new file mode 100644 index 0000000000000..bf7802a5df969 --- /dev/null +++ b/scripts/docs-mcp-server.js @@ -0,0 +1,391 @@ +import { + buildDocsIndex, + docsApiSchema, + loadDocContentByPath, + loadTemplateVariables, + resolveDefaultSourceDir, +} from "./docs-api-lib.js"; + +const SERVER_NAME = "tidb-docs-mcp"; +const SERVER_VERSION = "0.1.0"; +const PROTOCOL_VERSION = "2024-11-05"; + +const SOURCE_DIR = + process.env.DOCS_API_SOURCE_DIR || resolveDefaultSourceDir(process.cwd()); + +const templateVariables = loadTemplateVariables(SOURCE_DIR); +let docsIndex = buildDocsIndex(SOURCE_DIR); + +const toInt = (value, fallback) => { + const num = Number.parseInt(value, 10); + return Number.isNaN(num) ? fallback : num; +}; + +const containsCI = (text, keyword) => + String(text || "") + .toLowerCase() + .includes(String(keyword || "").toLowerCase()); + +const stripPrivateFields = (doc, includeContent = false) => { + const result = { + id: doc.id, + path: doc.path, + title: doc.title, + summary: doc.summary, + product: doc.product, + topics: doc.topics, + features: doc.features, + headings: doc.headings, + frontMatter: doc.frontMatter, + frontMatterRaw: doc.frontMatterRaw, + updatedAt: doc.updatedAt, + }; + if (includeContent) { + result.content = loadDocContentByPath(SOURCE_DIR, doc.path, templateVariables); + result.contentType = "text/markdown"; + } + return result; +}; + +const searchDocs = (args = {}) => { + const feature = args.feature; + const topic = args.topic; + const keyword = args.q; + const pathFilter = args.path; + const includeContent = args.includeContent === true; + const limit = Math.min(Math.max(toInt(args.limit, 20), 1), 100); + const offset = Math.max(toInt(args.offset, 0), 0); + + let rows = docsIndex.docs; + + if (feature) { + rows = rows.filter((doc) => + doc.features.some((item) => item.toLowerCase() === String(feature).toLowerCase()) + ); + } + if (topic) { + rows = rows.filter((doc) => + doc.topics.some((item) => item.toLowerCase() === String(topic).toLowerCase()) + ); + } + if (pathFilter) { + rows = rows.filter((doc) => doc.path.toLowerCase() === String(pathFilter).toLowerCase()); + } + if (keyword) { + rows = rows.filter((doc) => { + return ( + containsCI(doc.path, keyword) || + containsCI(doc.title, keyword) || + containsCI(doc.summary, keyword) || + containsCI(doc._searchText || "", keyword) + ); + }); + } + + const total = rows.length; + const data = rows + .slice(offset, offset + limit) + .map((doc) => stripPrivateFields(doc, includeContent)); + + return { + meta: { + total, + limit, + offset, + returned: data.length, + includeContent, + sourceDir: SOURCE_DIR, + }, + data, + }; +}; + +const getDocByPathOrId = (args = {}) => { + const docPath = args.path; + const docId = args.id; + if (!docPath && !docId) { + throw new Error("Either path or id is required."); + } + const doc = docsIndex.docs.find((item) => { + if (docPath && item.path.toLowerCase() === String(docPath).toLowerCase()) return true; + if (docId && item.id.toLowerCase() === String(docId).toLowerCase()) return true; + return false; + }); + if (!doc) throw new Error("Document not found."); + return stripPrivateFields(doc, true); +}; + +const listFeatures = (args = {}) => { + const prefix = String(args.prefix || ""); + if (!prefix) return docsIndex.features; + return docsIndex.features.filter((item) => item.toLowerCase().startsWith(prefix.toLowerCase())); +}; + +const reloadIndex = () => { + docsIndex = buildDocsIndex(SOURCE_DIR); + return { + ok: true, + totalDocs: docsIndex.totalDocs, + generatedAt: docsIndex.generatedAt, + sourceDir: SOURCE_DIR, + }; +}; + +const getResourceByUri = (uri) => { + if (uri === "docs://schema") { + return { + uri, + mimeType: "application/json", + text: JSON.stringify(docsApiSchema, null, 2), + }; + } + if (uri === "docs://index/meta") { + return { + uri, + mimeType: "application/json", + text: JSON.stringify( + { + schemaVersion: docsIndex.schemaVersion, + generatedAt: docsIndex.generatedAt, + totalDocs: docsIndex.totalDocs, + totalTopics: docsIndex.topics.length, + totalFeatures: docsIndex.features.length, + sourceDir: SOURCE_DIR, + }, + null, + 2 + ), + }; + } + if (uri.startsWith("docs://doc/")) { + const rawPath = decodeURIComponent(uri.replace("docs://doc/", "")); + const content = loadDocContentByPath(SOURCE_DIR, rawPath, templateVariables); + return { + uri, + mimeType: "text/markdown", + text: content, + }; + } + throw new Error(`Unsupported resource URI: ${uri}`); +}; + +const TOOL_DEFS = [ + { + name: "search_docs", + description: "Search TiDB docs by feature/topic/path/full-text. Returns lightweight records by default.", + inputSchema: { + type: "object", + properties: { + feature: { type: "string" }, + topic: { type: "string" }, + q: { type: "string" }, + path: { type: "string" }, + limit: { type: "integer", minimum: 1, maximum: 100 }, + offset: { type: "integer", minimum: 0 }, + includeContent: { type: "boolean", default: false }, + }, + additionalProperties: false, + }, + }, + { + name: "get_doc_content", + description: "Get full markdown content by document path or id.", + inputSchema: { + type: "object", + properties: { + path: { type: "string" }, + id: { type: "string" }, + }, + additionalProperties: false, + }, + }, + { + name: "list_topics", + description: "List all available topics/categories in the docs index.", + inputSchema: { + type: "object", + properties: {}, + additionalProperties: false, + }, + }, + { + name: "list_features", + description: "List all recognized feature tokens, optionally filtered by prefix.", + inputSchema: { + type: "object", + properties: { + prefix: { type: "string" }, + }, + additionalProperties: false, + }, + }, + { + name: "reload_docs_index", + description: "Reload docs index from disk (use after docs update).", + inputSchema: { + type: "object", + properties: {}, + additionalProperties: false, + }, + }, +]; + +const buildResourceList = () => [ + { + uri: "docs://schema", + name: "Docs API Schema", + description: "Schema and endpoint model for docs capabilities.", + mimeType: "application/json", + }, + { + uri: "docs://index/meta", + name: "Docs Index Meta", + description: "Index metadata such as counts and generated timestamp.", + mimeType: "application/json", + }, + ...docsIndex.docs.map((doc) => ({ + uri: `docs://doc/${encodeURIComponent(doc.path)}`, + name: doc.title, + description: doc.path, + mimeType: "text/markdown", + })), +]; + +const textResult = (payload) => { + return { + content: [ + { + type: "text", + text: JSON.stringify(payload, null, 2), + }, + ], + }; +}; + +const handlers = { + initialize: (params) => ({ + protocolVersion: PROTOCOL_VERSION, + serverInfo: { + name: SERVER_NAME, + version: SERVER_VERSION, + }, + capabilities: { + tools: {}, + resources: {}, + }, + instructions: + "Use search_docs for discovery and get_doc_content for full markdown. Prefer lightweight responses unless full content is required.", + clientInfo: params?.clientInfo || null, + }), + "notifications/initialized": () => null, + "tools/list": () => ({ + tools: TOOL_DEFS, + }), + "tools/call": (params) => { + const name = params?.name; + const args = params?.arguments || {}; + if (name === "search_docs") return textResult(searchDocs(args)); + if (name === "get_doc_content") return textResult({ data: getDocByPathOrId(args) }); + if (name === "list_topics") return textResult({ data: docsIndex.topics }); + if (name === "list_features") return textResult({ data: listFeatures(args) }); + if (name === "reload_docs_index") return textResult(reloadIndex()); + throw new Error(`Unknown tool: ${name}`); + }, + "resources/list": () => ({ + resources: buildResourceList(), + }), + "resources/read": (params) => ({ + contents: [getResourceByUri(params?.uri)], + }), + "ping": () => ({}), +}; + +let inputBuffer = Buffer.alloc(0); + +const writeMessage = (message) => { + const json = JSON.stringify(message); + const header = `Content-Length: ${Buffer.byteLength(json, "utf8")}\r\n\r\n`; + process.stdout.write(header + json); +}; + +const writeError = (id, code, message) => { + writeMessage({ + jsonrpc: "2.0", + id: id ?? null, + error: { + code, + message, + }, + }); +}; + +const parseMessages = () => { + while (true) { + const separator = inputBuffer.indexOf("\r\n\r\n"); + if (separator === -1) return; + + const headerRaw = inputBuffer.slice(0, separator).toString("utf8"); + const lengthLine = headerRaw + .split("\r\n") + .find((line) => line.toLowerCase().startsWith("content-length:")); + if (!lengthLine) { + inputBuffer = Buffer.alloc(0); + return; + } + const length = Number.parseInt(lengthLine.split(":")[1]?.trim() || "0", 10); + const bodyStart = separator + 4; + const bodyEnd = bodyStart + length; + if (inputBuffer.length < bodyEnd) return; + + const body = inputBuffer.slice(bodyStart, bodyEnd).toString("utf8"); + inputBuffer = inputBuffer.slice(bodyEnd); + + let message; + try { + message = JSON.parse(body); + } catch (error) { + writeError(null, -32700, "Parse error"); + continue; + } + handleMessage(message); + } +}; + +const handleMessage = (msg) => { + if (msg.jsonrpc !== "2.0") { + return writeError(msg.id, -32600, "Invalid Request"); + } + const method = msg.method; + const handler = handlers[method]; + if (!handler) { + if (msg.id !== undefined) writeError(msg.id, -32601, "Method not found"); + return; + } + try { + const result = handler(msg.params); + if (msg.id !== undefined && method !== "notifications/initialized") { + writeMessage({ + jsonrpc: "2.0", + id: msg.id, + result: result ?? {}, + }); + } + } catch (error) { + if (msg.id !== undefined) { + writeError(msg.id, -32000, String(error.message || error)); + } + } +}; + +process.stdin.on("data", (chunk) => { + inputBuffer = Buffer.concat([inputBuffer, chunk]); + parseMessages(); +}); + +process.stdin.on("end", () => { + process.exit(0); +}); + +process.stderr.write( + `[${SERVER_NAME}] ready (source=${SOURCE_DIR}, docs=${docsIndex.totalDocs})\n` +); From bc1d600da8012f2546b8664223ea508ef5b353af Mon Sep 17 00:00:00 2001 From: houfaxin Date: Fri, 3 Apr 2026 16:16:53 +0800 Subject: [PATCH 05/10] Support HTTP transport and multi-source docs MCP Add HTTP transport, authentication and multi-source support to the Docs MCP server. Docs and README updated with HTTP usage, /mcp and /healthz endpoints, bearer token auth and x-docs-source header. package.json adds a new npm script (docs-mcp:serve:http). scripts/docs-mcp-server.js refactored: import http, bump server version, introduce TRANSPORT/HTTP_HOST/HTTP_PORT/AUTH_TOKEN/SOURCE_MAP, per-source state caching (index + template variables), source refresh, and modular handlers; implement JSON-RPC processing over both stdio and HTTP with proper error handling. scripts/docs-api-lib.js excludes the docs MCP markdown from indexing. Overall this enables running the MCP server over HTTP, supports multiple doc sources, and preserves the original stdio mode. --- api/docs-mcp-server.md | 60 ++++- package.json | 3 +- scripts/docs-api-lib.js | 2 +- scripts/docs-mcp-server.js | 528 ++++++++++++++++++++++++------------- 4 files changed, 410 insertions(+), 183 deletions(-) diff --git a/api/docs-mcp-server.md b/api/docs-mcp-server.md index 37281f0b4c4f0..7a235180cd8cb 100644 --- a/api/docs-mcp-server.md +++ b/api/docs-mcp-server.md @@ -5,9 +5,12 @@ summary: Expose TiDB docs as MCP tools and resources for AI clients such as Clau # Docs MCP Server (Experimental) -This server exposes TiDB docs through MCP over STDIO. +This server exposes TiDB docs through MCP using: -## Start server +- STDIO transport (local tool integration) +- HTTP transport (shared staging endpoint) + +## Start server (STDIO) ```bash npm run docs-mcp:serve @@ -19,6 +22,43 @@ Optional source override: DOCS_API_SOURCE_DIR=/workspaces/docs-staging npm run docs-mcp:serve ``` +## Start server (HTTP) + +```bash +DOCS_MCP_TRANSPORT=http DOCS_MCP_HTTP_HOST=0.0.0.0 DOCS_MCP_HTTP_PORT=3100 npm run docs-mcp:serve:http +``` + +MCP endpoint: `POST /mcp` +Health endpoint: `GET /healthz` + +## Authentication + +Set bearer token: + +```bash +DOCS_MCP_AUTH_TOKEN= +``` + +Then call MCP with header: + +```http +Authorization: Bearer +``` + +## Source isolation (staging/prod) + +You can map source keys to different docs directories: + +```bash +DOCS_MCP_SOURCE_MAP='{"staging":"/workspaces/docs-staging","prod":"/workspaces/docs"}' +``` + +Clients can select source via header: + +```http +x-docs-source: staging +``` + ## MCP tools - `search_docs` @@ -53,9 +93,23 @@ Example: } ``` +## HTTP JSON-RPC example + +```bash +curl -X POST "http://127.0.0.1:3100/mcp" \ + -H "content-type: application/json" \ + -H "authorization: Bearer " \ + -H "x-docs-source: staging" \ + -d '{ + "jsonrpc":"2.0", + "id":1, + "method":"tools/call", + "params":{"name":"search_docs","arguments":{"feature":"tidb_max_dist_task_nodes","limit":3}} + }' +``` + ## Design notes - `search_docs` does full-text filtering but returns lightweight metadata by default. - `get_doc_content` fetches full markdown only when needed. - Template variables like `{{{ .starter }}}` are replaced using `variables.json` from source. - diff --git a/package.json b/package.json index 22f151c8362ef..44d9f9a60e66b 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,8 @@ "scripts": { "docs-api:build": "node scripts/build-docs-api-index.js", "docs-api:serve": "node scripts/docs-api-server.js", - "docs-mcp:serve": "node scripts/docs-mcp-server.js" + "docs-mcp:serve": "node scripts/docs-mcp-server.js", + "docs-mcp:serve:http": "node scripts/docs-mcp-server.js" }, "dependencies": { "axios": "^1.4.0", diff --git a/scripts/docs-api-lib.js b/scripts/docs-api-lib.js index acd39f5d8b5ea..8f79107c61718 100644 --- a/scripts/docs-api-lib.js +++ b/scripts/docs-api-lib.js @@ -2,7 +2,7 @@ import * as fs from "fs"; import path from "path"; const DOC_IGNORE_DIRS = new Set(["node_modules", ".git", "media", "tmp"]); -const DOC_IGNORE_FILES = new Set(["api/docs-json-api.md"]); +const DOC_IGNORE_FILES = new Set(["api/docs-json-api.md", "api/docs-mcp-server.md"]); const MAX_SUMMARY_LENGTH = 220; diff --git a/scripts/docs-mcp-server.js b/scripts/docs-mcp-server.js index bf7802a5df969..02b9290ed48ae 100644 --- a/scripts/docs-mcp-server.js +++ b/scripts/docs-mcp-server.js @@ -1,3 +1,4 @@ +import http from "http"; import { buildDocsIndex, docsApiSchema, @@ -7,26 +8,79 @@ import { } from "./docs-api-lib.js"; const SERVER_NAME = "tidb-docs-mcp"; -const SERVER_VERSION = "0.1.0"; +const SERVER_VERSION = "0.2.0"; const PROTOCOL_VERSION = "2024-11-05"; -const SOURCE_DIR = - process.env.DOCS_API_SOURCE_DIR || resolveDefaultSourceDir(process.cwd()); +const TRANSPORT = (process.env.DOCS_MCP_TRANSPORT || "stdio").toLowerCase(); +const HTTP_HOST = process.env.DOCS_MCP_HTTP_HOST || "127.0.0.1"; +const HTTP_PORT = Number.parseInt(process.env.DOCS_MCP_HTTP_PORT || "3100", 10); +const AUTH_TOKEN = process.env.DOCS_MCP_AUTH_TOKEN || ""; +const SOURCE_MAP = parseJsonMap(process.env.DOCS_MCP_SOURCE_MAP || ""); -const templateVariables = loadTemplateVariables(SOURCE_DIR); -let docsIndex = buildDocsIndex(SOURCE_DIR); +const DEFAULT_SOURCE_DIR = process.env.DOCS_API_SOURCE_DIR || resolveDefaultSourceDir(process.cwd()); +const stateCache = new Map(); -const toInt = (value, fallback) => { +function parseJsonMap(raw) { + if (!raw) return {}; + try { + const parsed = JSON.parse(raw); + if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) return parsed; + } catch (_error) {} + return {}; +} + +function normalizeSourceConfig(sourceKey) { + if (sourceKey && SOURCE_MAP[sourceKey]) { + return { + sourceKey, + sourceDir: SOURCE_MAP[sourceKey], + }; + } + return { + sourceKey: sourceKey || "default", + sourceDir: DEFAULT_SOURCE_DIR, + }; +} + +function getSourceState(sourceKey) { + const cfg = normalizeSourceConfig(sourceKey); + const cacheKey = `${cfg.sourceKey}::${cfg.sourceDir}`; + if (!stateCache.has(cacheKey)) { + stateCache.set(cacheKey, { + sourceKey: cfg.sourceKey, + sourceDir: cfg.sourceDir, + templateVariables: loadTemplateVariables(cfg.sourceDir), + docsIndex: buildDocsIndex(cfg.sourceDir), + }); + } + return stateCache.get(cacheKey); +} + +function refreshSourceState(sourceKey) { + const cfg = normalizeSourceConfig(sourceKey); + const cacheKey = `${cfg.sourceKey}::${cfg.sourceDir}`; + const next = { + sourceKey: cfg.sourceKey, + sourceDir: cfg.sourceDir, + templateVariables: loadTemplateVariables(cfg.sourceDir), + docsIndex: buildDocsIndex(cfg.sourceDir), + }; + stateCache.set(cacheKey, next); + return next; +} + +function toInt(value, fallback) { const num = Number.parseInt(value, 10); return Number.isNaN(num) ? fallback : num; -}; +} -const containsCI = (text, keyword) => - String(text || "") +function containsCI(text, keyword) { + return String(text || "") .toLowerCase() .includes(String(keyword || "").toLowerCase()); +} -const stripPrivateFields = (doc, includeContent = false) => { +function stripPrivateFields(sourceState, doc, includeContent = false) { const result = { id: doc.id, path: doc.path, @@ -41,13 +95,17 @@ const stripPrivateFields = (doc, includeContent = false) => { updatedAt: doc.updatedAt, }; if (includeContent) { - result.content = loadDocContentByPath(SOURCE_DIR, doc.path, templateVariables); + result.content = loadDocContentByPath( + sourceState.sourceDir, + doc.path, + sourceState.templateVariables + ); result.contentType = "text/markdown"; } return result; -}; +} -const searchDocs = (args = {}) => { +function searchDocs(sourceState, args = {}) { const feature = args.feature; const topic = args.topic; const keyword = args.q; @@ -56,7 +114,7 @@ const searchDocs = (args = {}) => { const limit = Math.min(Math.max(toInt(args.limit, 20), 1), 100); const offset = Math.max(toInt(args.offset, 0), 0); - let rows = docsIndex.docs; + let rows = sourceState.docsIndex.docs; if (feature) { rows = rows.filter((doc) => @@ -85,7 +143,7 @@ const searchDocs = (args = {}) => { const total = rows.length; const data = rows .slice(offset, offset + limit) - .map((doc) => stripPrivateFields(doc, includeContent)); + .map((doc) => stripPrivateFields(sourceState, doc, includeContent)); return { meta: { @@ -94,44 +152,37 @@ const searchDocs = (args = {}) => { offset, returned: data.length, includeContent, - sourceDir: SOURCE_DIR, + sourceKey: sourceState.sourceKey, + sourceDir: sourceState.sourceDir, }, data, }; -}; +} -const getDocByPathOrId = (args = {}) => { +function getDocByPathOrId(sourceState, args = {}) { const docPath = args.path; const docId = args.id; if (!docPath && !docId) { throw new Error("Either path or id is required."); } - const doc = docsIndex.docs.find((item) => { + const doc = sourceState.docsIndex.docs.find((item) => { if (docPath && item.path.toLowerCase() === String(docPath).toLowerCase()) return true; if (docId && item.id.toLowerCase() === String(docId).toLowerCase()) return true; return false; }); if (!doc) throw new Error("Document not found."); - return stripPrivateFields(doc, true); -}; + return stripPrivateFields(sourceState, doc, true); +} -const listFeatures = (args = {}) => { +function listFeatures(sourceState, args = {}) { const prefix = String(args.prefix || ""); - if (!prefix) return docsIndex.features; - return docsIndex.features.filter((item) => item.toLowerCase().startsWith(prefix.toLowerCase())); -}; - -const reloadIndex = () => { - docsIndex = buildDocsIndex(SOURCE_DIR); - return { - ok: true, - totalDocs: docsIndex.totalDocs, - generatedAt: docsIndex.generatedAt, - sourceDir: SOURCE_DIR, - }; -}; + if (!prefix) return sourceState.docsIndex.features; + return sourceState.docsIndex.features.filter((item) => + item.toLowerCase().startsWith(prefix.toLowerCase()) + ); +} -const getResourceByUri = (uri) => { +function getResourceByUri(sourceState, uri) { if (uri === "docs://schema") { return { uri, @@ -145,12 +196,13 @@ const getResourceByUri = (uri) => { mimeType: "application/json", text: JSON.stringify( { - schemaVersion: docsIndex.schemaVersion, - generatedAt: docsIndex.generatedAt, - totalDocs: docsIndex.totalDocs, - totalTopics: docsIndex.topics.length, - totalFeatures: docsIndex.features.length, - sourceDir: SOURCE_DIR, + schemaVersion: sourceState.docsIndex.schemaVersion, + generatedAt: sourceState.docsIndex.generatedAt, + totalDocs: sourceState.docsIndex.totalDocs, + totalTopics: sourceState.docsIndex.topics.length, + totalFeatures: sourceState.docsIndex.features.length, + sourceKey: sourceState.sourceKey, + sourceDir: sourceState.sourceDir, }, null, 2 @@ -159,7 +211,11 @@ const getResourceByUri = (uri) => { } if (uri.startsWith("docs://doc/")) { const rawPath = decodeURIComponent(uri.replace("docs://doc/", "")); - const content = loadDocContentByPath(SOURCE_DIR, rawPath, templateVariables); + const content = loadDocContentByPath( + sourceState.sourceDir, + rawPath, + sourceState.templateVariables + ); return { uri, mimeType: "text/markdown", @@ -167,7 +223,30 @@ const getResourceByUri = (uri) => { }; } throw new Error(`Unsupported resource URI: ${uri}`); -}; +} + +function buildResourceList(sourceState) { + return [ + { + uri: "docs://schema", + name: "Docs API Schema", + description: "Schema and endpoint model for docs capabilities.", + mimeType: "application/json", + }, + { + uri: "docs://index/meta", + name: "Docs Index Meta", + description: "Index metadata such as counts and generated timestamp.", + mimeType: "application/json", + }, + ...sourceState.docsIndex.docs.map((doc) => ({ + uri: `docs://doc/${encodeURIComponent(doc.path)}`, + name: doc.title, + description: doc.path, + mimeType: "text/markdown", + })), + ]; +} const TOOL_DEFS = [ { @@ -230,28 +309,7 @@ const TOOL_DEFS = [ }, ]; -const buildResourceList = () => [ - { - uri: "docs://schema", - name: "Docs API Schema", - description: "Schema and endpoint model for docs capabilities.", - mimeType: "application/json", - }, - { - uri: "docs://index/meta", - name: "Docs Index Meta", - description: "Index metadata such as counts and generated timestamp.", - mimeType: "application/json", - }, - ...docsIndex.docs.map((doc) => ({ - uri: `docs://doc/${encodeURIComponent(doc.path)}`, - name: doc.title, - description: doc.path, - mimeType: "text/markdown", - })), -]; - -const textResult = (payload) => { +function textResult(payload) { return { content: [ { @@ -260,132 +318,246 @@ const textResult = (payload) => { }, ], }; -}; - -const handlers = { - initialize: (params) => ({ - protocolVersion: PROTOCOL_VERSION, - serverInfo: { - name: SERVER_NAME, - version: SERVER_VERSION, - }, - capabilities: { - tools: {}, - resources: {}, - }, - instructions: - "Use search_docs for discovery and get_doc_content for full markdown. Prefer lightweight responses unless full content is required.", - clientInfo: params?.clientInfo || null, - }), - "notifications/initialized": () => null, - "tools/list": () => ({ - tools: TOOL_DEFS, - }), - "tools/call": (params) => { - const name = params?.name; - const args = params?.arguments || {}; - if (name === "search_docs") return textResult(searchDocs(args)); - if (name === "get_doc_content") return textResult({ data: getDocByPathOrId(args) }); - if (name === "list_topics") return textResult({ data: docsIndex.topics }); - if (name === "list_features") return textResult({ data: listFeatures(args) }); - if (name === "reload_docs_index") return textResult(reloadIndex()); - throw new Error(`Unknown tool: ${name}`); - }, - "resources/list": () => ({ - resources: buildResourceList(), - }), - "resources/read": (params) => ({ - contents: [getResourceByUri(params?.uri)], - }), - "ping": () => ({}), -}; - -let inputBuffer = Buffer.alloc(0); - -const writeMessage = (message) => { - const json = JSON.stringify(message); - const header = `Content-Length: ${Buffer.byteLength(json, "utf8")}\r\n\r\n`; - process.stdout.write(header + json); -}; - -const writeError = (id, code, message) => { - writeMessage({ - jsonrpc: "2.0", - id: id ?? null, - error: { - code, - message, - }, - }); -}; - -const parseMessages = () => { - while (true) { - const separator = inputBuffer.indexOf("\r\n\r\n"); - if (separator === -1) return; - - const headerRaw = inputBuffer.slice(0, separator).toString("utf8"); - const lengthLine = headerRaw - .split("\r\n") - .find((line) => line.toLowerCase().startsWith("content-length:")); - if (!lengthLine) { - inputBuffer = Buffer.alloc(0); - return; - } - const length = Number.parseInt(lengthLine.split(":")[1]?.trim() || "0", 10); - const bodyStart = separator + 4; - const bodyEnd = bodyStart + length; - if (inputBuffer.length < bodyEnd) return; +} - const body = inputBuffer.slice(bodyStart, bodyEnd).toString("utf8"); - inputBuffer = inputBuffer.slice(bodyEnd); +function buildHandlers(sourceState) { + return { + initialize: (params) => ({ + protocolVersion: PROTOCOL_VERSION, + serverInfo: { + name: SERVER_NAME, + version: SERVER_VERSION, + }, + capabilities: { + tools: {}, + resources: {}, + }, + instructions: + "Use search_docs for discovery and get_doc_content for full markdown. Prefer lightweight responses unless full content is required.", + clientInfo: params?.clientInfo || null, + }), + "notifications/initialized": () => null, + "tools/list": () => ({ + tools: TOOL_DEFS, + }), + "tools/call": (params) => { + const name = params?.name; + const args = params?.arguments || {}; + if (name === "search_docs") return textResult(searchDocs(sourceState, args)); + if (name === "get_doc_content") + return textResult({ data: getDocByPathOrId(sourceState, args) }); + if (name === "list_topics") return textResult({ data: sourceState.docsIndex.topics }); + if (name === "list_features") return textResult({ data: listFeatures(sourceState, args) }); + if (name === "reload_docs_index") { + const refreshed = refreshSourceState(sourceState.sourceKey); + return textResult({ + ok: true, + totalDocs: refreshed.docsIndex.totalDocs, + generatedAt: refreshed.docsIndex.generatedAt, + sourceKey: refreshed.sourceKey, + sourceDir: refreshed.sourceDir, + }); + } + throw new Error(`Unknown tool: ${name}`); + }, + "resources/list": () => ({ + resources: buildResourceList(sourceState), + }), + "resources/read": (params) => ({ + contents: [getResourceByUri(sourceState, params?.uri)], + }), + ping: () => ({}), + }; +} - let message; - try { - message = JSON.parse(body); - } catch (error) { - writeError(null, -32700, "Parse error"); - continue; - } - handleMessage(message); - } -}; +function processRpcMessage(msg, sourceKey) { + const sourceState = getSourceState(sourceKey); + const handlers = buildHandlers(sourceState); -const handleMessage = (msg) => { if (msg.jsonrpc !== "2.0") { - return writeError(msg.id, -32600, "Invalid Request"); + return { + jsonrpc: "2.0", + id: msg.id ?? null, + error: { + code: -32600, + message: "Invalid Request", + }, + }; } + const method = msg.method; const handler = handlers[method]; if (!handler) { - if (msg.id !== undefined) writeError(msg.id, -32601, "Method not found"); - return; + return { + jsonrpc: "2.0", + id: msg.id ?? null, + error: { + code: -32601, + message: "Method not found", + }, + }; } + try { const result = handler(msg.params); - if (msg.id !== undefined && method !== "notifications/initialized") { - writeMessage({ - jsonrpc: "2.0", - id: msg.id, - result: result ?? {}, - }); + if (msg.id === undefined || method === "notifications/initialized") { + return null; } + return { + jsonrpc: "2.0", + id: msg.id, + result: result ?? {}, + }; } catch (error) { - if (msg.id !== undefined) { - writeError(msg.id, -32000, String(error.message || error)); - } + return { + jsonrpc: "2.0", + id: msg.id ?? null, + error: { + code: -32000, + message: String(error.message || error), + }, + }; } -}; +} + +function validateAuth(headers) { + if (!AUTH_TOKEN) return true; + const raw = headers.authorization || ""; + if (!raw.toLowerCase().startsWith("bearer ")) return false; + const token = raw.slice(7).trim(); + return token === AUTH_TOKEN; +} + +function parseBodyJson(req) { + return new Promise((resolve, reject) => { + const chunks = []; + req.on("data", (chunk) => chunks.push(chunk)); + req.on("end", () => { + try { + const body = Buffer.concat(chunks).toString("utf8"); + resolve(body ? JSON.parse(body) : {}); + } catch (error) { + reject(error); + } + }); + req.on("error", reject); + }); +} + +function startHttpServer() { + const server = http.createServer(async (req, res) => { + if (req.url === "/healthz" && req.method === "GET") { + res.writeHead(200, { "content-type": "application/json; charset=utf-8" }); + res.end(JSON.stringify({ ok: true })); + return; + } + + if (req.url !== "/mcp" || req.method !== "POST") { + res.writeHead(404, { "content-type": "application/json; charset=utf-8" }); + res.end(JSON.stringify({ error: "Not found" })); + return; + } + + if (!validateAuth(req.headers)) { + res.writeHead(401, { "content-type": "application/json; charset=utf-8" }); + res.end(JSON.stringify({ error: "Unauthorized" })); + return; + } + + const sourceKey = (req.headers["x-docs-source"] || "default").toString(); + + try { + const json = await parseBodyJson(req); + const response = processRpcMessage(json, sourceKey); + if (!response) { + res.writeHead(204); + res.end(); + return; + } + res.writeHead(200, { "content-type": "application/json; charset=utf-8" }); + res.end(JSON.stringify(response)); + } catch (error) { + res.writeHead(400, { "content-type": "application/json; charset=utf-8" }); + res.end( + JSON.stringify({ + jsonrpc: "2.0", + id: null, + error: { + code: -32700, + message: `Parse error: ${String(error.message || error)}`, + }, + }) + ); + } + }); + + server.listen(HTTP_PORT, HTTP_HOST, () => { + process.stderr.write( + `[${SERVER_NAME}] http ready at http://${HTTP_HOST}:${HTTP_PORT}/mcp (defaultSource=${DEFAULT_SOURCE_DIR})\n` + ); + }); +} + +function startStdioServer() { + let inputBuffer = Buffer.alloc(0); + + const writeMessage = (message) => { + const json = JSON.stringify(message); + const header = `Content-Length: ${Buffer.byteLength(json, "utf8")}\r\n\r\n`; + process.stdout.write(header + json); + }; + + const parseMessages = () => { + while (true) { + const separator = inputBuffer.indexOf("\r\n\r\n"); + if (separator === -1) return; + + const headerRaw = inputBuffer.slice(0, separator).toString("utf8"); + const lengthLine = headerRaw + .split("\r\n") + .find((line) => line.toLowerCase().startsWith("content-length:")); + if (!lengthLine) { + inputBuffer = Buffer.alloc(0); + return; + } + const length = Number.parseInt(lengthLine.split(":")[1]?.trim() || "0", 10); + const bodyStart = separator + 4; + const bodyEnd = bodyStart + length; + if (inputBuffer.length < bodyEnd) return; + + const body = inputBuffer.slice(bodyStart, bodyEnd).toString("utf8"); + inputBuffer = inputBuffer.slice(bodyEnd); + + let msg; + try { + msg = JSON.parse(body); + } catch (_error) { + writeMessage({ + jsonrpc: "2.0", + id: null, + error: { code: -32700, message: "Parse error" }, + }); + continue; + } + const response = processRpcMessage(msg, "default"); + if (response) writeMessage(response); + } + }; + + process.stdin.on("data", (chunk) => { + inputBuffer = Buffer.concat([inputBuffer, chunk]); + parseMessages(); + }); -process.stdin.on("data", (chunk) => { - inputBuffer = Buffer.concat([inputBuffer, chunk]); - parseMessages(); -}); + process.stdin.on("end", () => process.exit(0)); + process.stderr.write( + `[${SERVER_NAME}] stdio ready (defaultSource=${DEFAULT_SOURCE_DIR})\n` + ); +} -process.stdin.on("end", () => { - process.exit(0); -}); +if (TRANSPORT === "http") { + startHttpServer(); +} else { + startStdioServer(); +} -process.stderr.write( - `[${SERVER_NAME}] ready (source=${SOURCE_DIR}, docs=${docsIndex.totalDocs})\n` -); From 42da92fdb5bb0da21f9aa5fc6332a750ec9655bb Mon Sep 17 00:00:00 2001 From: houfaxin Date: Fri, 3 Apr 2026 16:19:15 +0800 Subject: [PATCH 06/10] Update docs-mcp-server.md --- api/docs-mcp-server.md | 97 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/api/docs-mcp-server.md b/api/docs-mcp-server.md index 7a235180cd8cb..790727a2e04f2 100644 --- a/api/docs-mcp-server.md +++ b/api/docs-mcp-server.md @@ -108,6 +108,103 @@ curl -X POST "http://127.0.0.1:3100/mcp" \ }' ``` +## Validation and Testing + +Use this section to validate your deployment locally and with remote teammates. + +### 1) Start service for shared testing + +```bash +DOCS_MCP_TRANSPORT=http \ +DOCS_MCP_HTTP_HOST=0.0.0.0 \ +DOCS_MCP_HTTP_PORT=3100 \ +DOCS_MCP_AUTH_TOKEN= \ +DOCS_MCP_SOURCE_MAP='{"staging":"/workspaces/docs-staging","prod":"/workspaces/docs"}' \ +npm run docs-mcp:serve:http +``` + +Health check: + +```bash +curl http://:3100/healthz +``` + +### 2) Core validation commands + +Set variables: + +```bash +BASE=http://:3100/mcp +TOKEN= +``` + +List tools: + +```bash +curl -s -X POST "$BASE" \ + -H "content-type: application/json" \ + -H "authorization: Bearer $TOKEN" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}' +``` + +Search by feature from staging source: + +```bash +curl -s -X POST "$BASE" \ + -H "content-type: application/json" \ + -H "authorization: Bearer $TOKEN" \ + -H "x-docs-source: staging" \ + -d '{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"search_docs","arguments":{"feature":"tidb_max_dist_task_nodes","limit":3}}}' +``` + +Fetch full content of one doc: + +```bash +curl -s -X POST "$BASE" \ + -H "content-type: application/json" \ + -H "authorization: Bearer $TOKEN" \ + -H "x-docs-source: staging" \ + -d '{"jsonrpc":"2.0","id":3,"method":"tools/call","params":{"name":"get_doc_content","arguments":{"path":"tidb-cloud/backup-and-restore-serverless.md"}}}' +``` + +Reload docs index: + +```bash +curl -s -X POST "$BASE" \ + -H "content-type: application/json" \ + -H "authorization: Bearer $TOKEN" \ + -H "x-docs-source: staging" \ + -d '{"jsonrpc":"2.0","id":4,"method":"tools/call","params":{"name":"reload_docs_index","arguments":{}}}' +``` + +### 3) Expected results checklist + +- `/healthz` returns `{ "ok": true }`. +- `tools/list` includes: + - `search_docs` + - `get_doc_content` + - `list_topics` + - `list_features` + - `reload_docs_index` +- `search_docs` response includes `meta.sourceKey` and `meta.sourceDir`. +- `x-docs-source: staging` points to your `docs-staging` path. +- No unresolved placeholders like `{{{ .starter }}}` in returned titles/content. +- `search_docs` returns lightweight records by default. +- `get_doc_content` returns full markdown content. + +### 4) Remote teammate test template + +Ask teammates to report: + +1. Environment: OS + time + endpoint tested. +2. Commands used (copy/paste). +3. Result summary: + - status code + - `meta.total` + - `meta.sourceKey` +4. Pass/fail verdict. +5. If failed: error payload + reproduction steps. + ## Design notes - `search_docs` does full-text filtering but returns lightweight metadata by default. From 1b0ced1263a5b787e188e337e72a228c2b2f6341 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Fri, 3 Apr 2026 16:42:26 +0800 Subject: [PATCH 07/10] Add TiDB Docs MCP Server docs, update TOC/API Add a new integration guide (ai/integrations/tidb-docs-mcp-server.md) that documents the TiDB Docs MCP Server (stdio/http transports, auth, source isolation, tools, resources, examples, validation and troubleshooting). Update the TOC (TOC-ai.md) to include the new entry and overhaul api/docs-mcp-server.md to match the expanded title/content and provide clearer start, auth, source-map, HTTP examples, validation steps, and design notes. --- TOC-ai.md | 1 + ai/integrations/tidb-docs-mcp-server.md | 210 ++++++++++++++++++++++++ api/docs-mcp-server.md | 206 ++++++++++++----------- 3 files changed, 313 insertions(+), 104 deletions(-) create mode 100644 ai/integrations/tidb-docs-mcp-server.md diff --git a/TOC-ai.md b/TOC-ai.md index e79d9a34cc2be..e42e79ceef66f 100644 --- a/TOC-ai.md +++ b/TOC-ai.md @@ -70,6 +70,7 @@ - [Amazon Bedrock](/ai/integrations/vector-search-integrate-with-amazon-bedrock.md) - MCP Server - [Overview](/ai/integrations/tidb-mcp-server.md) + - [TiDB Docs MCP Server](/ai/integrations/tidb-docs-mcp-server.md) - [Claude Code](/ai/integrations/tidb-mcp-claude-code.md) - [Claude Desktop](/ai/integrations/tidb-mcp-claude-desktop.md) - [Cursor](/ai/integrations/tidb-mcp-cursor.md) diff --git a/ai/integrations/tidb-docs-mcp-server.md b/ai/integrations/tidb-docs-mcp-server.md new file mode 100644 index 0000000000000..de871519797ab --- /dev/null +++ b/ai/integrations/tidb-docs-mcp-server.md @@ -0,0 +1,210 @@ +--- +title: TiDB Docs MCP Server +summary: Connect AI clients to TiDB documentation through an MCP server with search tools and markdown resources. +--- + +# TiDB Docs MCP Server + +TiDB Docs MCP Server exposes TiDB documentation to MCP-compatible AI clients such as Claude Code, Claude Desktop, VS Code, Cursor, and other tools. + +It supports: + +- **STDIO transport** for local development +- **HTTP transport** for shared environments (for example, staging) +- **Bearer token authentication** +- **Source isolation** (for example, `staging` vs `prod`) + +## What you get + +The server provides structured tools and resources for docs access: + +- Search by feature, topic, path, and full-text +- Fetch full markdown for a single document on demand +- List topics and feature tokens +- Reload index after docs updates + +## Prerequisites + +- Node.js 18 or later +- TiDB docs repository cloned locally + +## Start the server + +### Start with STDIO transport + +```bash +npm run docs-mcp:serve +``` + +Optionally use `docs-staging` as source: + +```bash +DOCS_API_SOURCE_DIR=/workspaces/docs-staging npm run docs-mcp:serve +``` + +### Start with HTTP transport + +```bash +DOCS_MCP_TRANSPORT=http \ +DOCS_MCP_HTTP_HOST=0.0.0.0 \ +DOCS_MCP_HTTP_PORT=3100 \ +DOCS_MCP_AUTH_TOKEN= \ +DOCS_MCP_SOURCE_MAP='{"staging":"/workspaces/docs-staging","prod":"/workspaces/docs"}' \ +npm run docs-mcp:serve:http +``` + +Endpoints: + +- MCP endpoint: `POST /mcp` +- Health check: `GET /healthz` + +## Authentication + +If `DOCS_MCP_AUTH_TOKEN` is set, all MCP HTTP calls must include: + +```http +Authorization: Bearer +``` + +## Source isolation + +Use `DOCS_MCP_SOURCE_MAP` to map source keys to directories: + +```bash +DOCS_MCP_SOURCE_MAP='{"staging":"/workspaces/docs-staging","prod":"/workspaces/docs"}' +``` + +Then select source per request: + +```http +x-docs-source: staging +``` + +## Supported tools + +### Read-only tools + +- `search_docs` +- `get_doc_content` +- `list_topics` +- `list_features` + +### Admin tool + +- `reload_docs_index` + +## Supported resources + +- `docs://schema` +- `docs://index/meta` +- `docs://doc/` + +Example: + +- `docs://doc/tidb-cloud%2Fbackup-and-restore-serverless.md` + +## Client configuration examples + +### Claude Code (`.mcp.json`, STDIO) + +```json +{ + "mcpServers": { + "tidb-docs": { + "command": "node", + "args": ["scripts/docs-mcp-server.js"], + "env": { + "DOCS_API_SOURCE_DIR": "/workspaces/docs-staging" + } + } + } +} +``` + +### Generic MCP HTTP client + +Use your MCP client's HTTP transport option with: + +- URL: `https://docs-api-staging.pingcap.com/mcp` (or your own endpoint) +- Header: `Authorization: Bearer ` +- Header (optional): `x-docs-source: staging` + +## HTTP JSON-RPC example + +```bash +curl -X POST "http://127.0.0.1:3100/mcp" \ + -H "content-type: application/json" \ + -H "authorization: Bearer " \ + -H "x-docs-source: staging" \ + -d '{ + "jsonrpc":"2.0", + "id":1, + "method":"tools/call", + "params":{ + "name":"search_docs", + "arguments":{"feature":"tidb_max_dist_task_nodes","limit":3} + } + }' +``` + +## Validate your deployment + +### 1. Health check + +```bash +curl http://:3100/healthz +``` + +Expected: + +- `{"ok":true}` + +### 2. Check available tools + +```bash +curl -s -X POST "http://:3100/mcp" \ + -H "content-type: application/json" \ + -H "authorization: Bearer " \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}' +``` + +Expected tools: + +- `search_docs` +- `get_doc_content` +- `list_topics` +- `list_features` +- `reload_docs_index` + +### 3. Verify staging source and placeholder replacement + +```bash +curl -s -X POST "http://:3100/mcp" \ + -H "content-type: application/json" \ + -H "authorization: Bearer " \ + -H "x-docs-source: staging" \ + -d '{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"search_docs","arguments":{"path":"tidb-cloud/backup-and-restore-serverless.md","limit":1}}}' +``` + +Check: + +- `meta.sourceKey` is `staging` +- Returned title/content does not include unresolved placeholders like `{{{ .starter }}}` + +## Troubleshooting + +- **401 Unauthorized** + - Verify `Authorization: Bearer ` and `DOCS_MCP_AUTH_TOKEN`. +- **Wrong docs source** + - Verify `x-docs-source` and `DOCS_MCP_SOURCE_MAP`. +- **No results for expected queries** + - Run `reload_docs_index` after docs updates. +- **Cannot connect** + - Check host/port and network access to `/mcp`. + +## Design notes + +- `search_docs` is optimized for lightweight response by default. +- Use `get_doc_content` when full markdown is required. +- Template variables (for example, `{{{ .starter }}}`) are resolved via `variables.json` in the selected source directory. + diff --git a/api/docs-mcp-server.md b/api/docs-mcp-server.md index 790727a2e04f2..de871519797ab 100644 --- a/api/docs-mcp-server.md +++ b/api/docs-mcp-server.md @@ -1,83 +1,111 @@ --- -title: Docs MCP Server (Experimental) -summary: Expose TiDB docs as MCP tools and resources for AI clients such as Claude Code and Cursor. +title: TiDB Docs MCP Server +summary: Connect AI clients to TiDB documentation through an MCP server with search tools and markdown resources. --- -# Docs MCP Server (Experimental) +# TiDB Docs MCP Server -This server exposes TiDB docs through MCP using: +TiDB Docs MCP Server exposes TiDB documentation to MCP-compatible AI clients such as Claude Code, Claude Desktop, VS Code, Cursor, and other tools. -- STDIO transport (local tool integration) -- HTTP transport (shared staging endpoint) +It supports: -## Start server (STDIO) +- **STDIO transport** for local development +- **HTTP transport** for shared environments (for example, staging) +- **Bearer token authentication** +- **Source isolation** (for example, `staging` vs `prod`) + +## What you get + +The server provides structured tools and resources for docs access: + +- Search by feature, topic, path, and full-text +- Fetch full markdown for a single document on demand +- List topics and feature tokens +- Reload index after docs updates + +## Prerequisites + +- Node.js 18 or later +- TiDB docs repository cloned locally + +## Start the server + +### Start with STDIO transport ```bash npm run docs-mcp:serve ``` -Optional source override: +Optionally use `docs-staging` as source: ```bash DOCS_API_SOURCE_DIR=/workspaces/docs-staging npm run docs-mcp:serve ``` -## Start server (HTTP) +### Start with HTTP transport ```bash -DOCS_MCP_TRANSPORT=http DOCS_MCP_HTTP_HOST=0.0.0.0 DOCS_MCP_HTTP_PORT=3100 npm run docs-mcp:serve:http +DOCS_MCP_TRANSPORT=http \ +DOCS_MCP_HTTP_HOST=0.0.0.0 \ +DOCS_MCP_HTTP_PORT=3100 \ +DOCS_MCP_AUTH_TOKEN= \ +DOCS_MCP_SOURCE_MAP='{"staging":"/workspaces/docs-staging","prod":"/workspaces/docs"}' \ +npm run docs-mcp:serve:http ``` -MCP endpoint: `POST /mcp` -Health endpoint: `GET /healthz` - -## Authentication +Endpoints: -Set bearer token: +- MCP endpoint: `POST /mcp` +- Health check: `GET /healthz` -```bash -DOCS_MCP_AUTH_TOKEN= -``` +## Authentication -Then call MCP with header: +If `DOCS_MCP_AUTH_TOKEN` is set, all MCP HTTP calls must include: ```http -Authorization: Bearer +Authorization: Bearer ``` -## Source isolation (staging/prod) +## Source isolation -You can map source keys to different docs directories: +Use `DOCS_MCP_SOURCE_MAP` to map source keys to directories: ```bash DOCS_MCP_SOURCE_MAP='{"staging":"/workspaces/docs-staging","prod":"/workspaces/docs"}' ``` -Clients can select source via header: +Then select source per request: ```http x-docs-source: staging ``` -## MCP tools +## Supported tools + +### Read-only tools - `search_docs` - `get_doc_content` - `list_topics` - `list_features` + +### Admin tool + - `reload_docs_index` -## MCP resources +## Supported resources - `docs://schema` - `docs://index/meta` -- `docs://doc/` +- `docs://doc/` Example: - `docs://doc/tidb-cloud%2Fbackup-and-restore-serverless.md` -## Claude Code example (`.mcp.json`) +## Client configuration examples + +### Claude Code (`.mcp.json`, STDIO) ```json { @@ -93,120 +121,90 @@ Example: } ``` +### Generic MCP HTTP client + +Use your MCP client's HTTP transport option with: + +- URL: `https://docs-api-staging.pingcap.com/mcp` (or your own endpoint) +- Header: `Authorization: Bearer ` +- Header (optional): `x-docs-source: staging` + ## HTTP JSON-RPC example ```bash curl -X POST "http://127.0.0.1:3100/mcp" \ -H "content-type: application/json" \ - -H "authorization: Bearer " \ + -H "authorization: Bearer " \ -H "x-docs-source: staging" \ -d '{ "jsonrpc":"2.0", "id":1, "method":"tools/call", - "params":{"name":"search_docs","arguments":{"feature":"tidb_max_dist_task_nodes","limit":3}} + "params":{ + "name":"search_docs", + "arguments":{"feature":"tidb_max_dist_task_nodes","limit":3} + } }' ``` -## Validation and Testing - -Use this section to validate your deployment locally and with remote teammates. +## Validate your deployment -### 1) Start service for shared testing - -```bash -DOCS_MCP_TRANSPORT=http \ -DOCS_MCP_HTTP_HOST=0.0.0.0 \ -DOCS_MCP_HTTP_PORT=3100 \ -DOCS_MCP_AUTH_TOKEN= \ -DOCS_MCP_SOURCE_MAP='{"staging":"/workspaces/docs-staging","prod":"/workspaces/docs"}' \ -npm run docs-mcp:serve:http -``` - -Health check: +### 1. Health check ```bash curl http://:3100/healthz ``` -### 2) Core validation commands +Expected: -Set variables: +- `{"ok":true}` -```bash -BASE=http://:3100/mcp -TOKEN= -``` - -List tools: +### 2. Check available tools ```bash -curl -s -X POST "$BASE" \ +curl -s -X POST "http://:3100/mcp" \ -H "content-type: application/json" \ - -H "authorization: Bearer $TOKEN" \ + -H "authorization: Bearer " \ -d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}' ``` -Search by feature from staging source: +Expected tools: -```bash -curl -s -X POST "$BASE" \ - -H "content-type: application/json" \ - -H "authorization: Bearer $TOKEN" \ - -H "x-docs-source: staging" \ - -d '{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"search_docs","arguments":{"feature":"tidb_max_dist_task_nodes","limit":3}}}' -``` +- `search_docs` +- `get_doc_content` +- `list_topics` +- `list_features` +- `reload_docs_index` -Fetch full content of one doc: +### 3. Verify staging source and placeholder replacement ```bash -curl -s -X POST "$BASE" \ +curl -s -X POST "http://:3100/mcp" \ -H "content-type: application/json" \ - -H "authorization: Bearer $TOKEN" \ + -H "authorization: Bearer " \ -H "x-docs-source: staging" \ - -d '{"jsonrpc":"2.0","id":3,"method":"tools/call","params":{"name":"get_doc_content","arguments":{"path":"tidb-cloud/backup-and-restore-serverless.md"}}}' + -d '{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"search_docs","arguments":{"path":"tidb-cloud/backup-and-restore-serverless.md","limit":1}}}' ``` -Reload docs index: +Check: -```bash -curl -s -X POST "$BASE" \ - -H "content-type: application/json" \ - -H "authorization: Bearer $TOKEN" \ - -H "x-docs-source: staging" \ - -d '{"jsonrpc":"2.0","id":4,"method":"tools/call","params":{"name":"reload_docs_index","arguments":{}}}' -``` +- `meta.sourceKey` is `staging` +- Returned title/content does not include unresolved placeholders like `{{{ .starter }}}` + +## Troubleshooting -### 3) Expected results checklist - -- `/healthz` returns `{ "ok": true }`. -- `tools/list` includes: - - `search_docs` - - `get_doc_content` - - `list_topics` - - `list_features` - - `reload_docs_index` -- `search_docs` response includes `meta.sourceKey` and `meta.sourceDir`. -- `x-docs-source: staging` points to your `docs-staging` path. -- No unresolved placeholders like `{{{ .starter }}}` in returned titles/content. -- `search_docs` returns lightweight records by default. -- `get_doc_content` returns full markdown content. - -### 4) Remote teammate test template - -Ask teammates to report: - -1. Environment: OS + time + endpoint tested. -2. Commands used (copy/paste). -3. Result summary: - - status code - - `meta.total` - - `meta.sourceKey` -4. Pass/fail verdict. -5. If failed: error payload + reproduction steps. +- **401 Unauthorized** + - Verify `Authorization: Bearer ` and `DOCS_MCP_AUTH_TOKEN`. +- **Wrong docs source** + - Verify `x-docs-source` and `DOCS_MCP_SOURCE_MAP`. +- **No results for expected queries** + - Run `reload_docs_index` after docs updates. +- **Cannot connect** + - Check host/port and network access to `/mcp`. ## Design notes -- `search_docs` does full-text filtering but returns lightweight metadata by default. -- `get_doc_content` fetches full markdown only when needed. -- Template variables like `{{{ .starter }}}` are replaced using `variables.json` from source. +- `search_docs` is optimized for lightweight response by default. +- Use `get_doc_content` when full markdown is required. +- Template variables (for example, `{{{ .starter }}}`) are resolved via `variables.json` in the selected source directory. + From 2c5f8aa58a42589b3102247c3ef111234295d377 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Tue, 7 Apr 2026 16:55:48 +0800 Subject: [PATCH 08/10] Delete docs-mcp-server.md --- api/docs-mcp-server.md | 210 ----------------------------------------- 1 file changed, 210 deletions(-) delete mode 100644 api/docs-mcp-server.md diff --git a/api/docs-mcp-server.md b/api/docs-mcp-server.md deleted file mode 100644 index de871519797ab..0000000000000 --- a/api/docs-mcp-server.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -title: TiDB Docs MCP Server -summary: Connect AI clients to TiDB documentation through an MCP server with search tools and markdown resources. ---- - -# TiDB Docs MCP Server - -TiDB Docs MCP Server exposes TiDB documentation to MCP-compatible AI clients such as Claude Code, Claude Desktop, VS Code, Cursor, and other tools. - -It supports: - -- **STDIO transport** for local development -- **HTTP transport** for shared environments (for example, staging) -- **Bearer token authentication** -- **Source isolation** (for example, `staging` vs `prod`) - -## What you get - -The server provides structured tools and resources for docs access: - -- Search by feature, topic, path, and full-text -- Fetch full markdown for a single document on demand -- List topics and feature tokens -- Reload index after docs updates - -## Prerequisites - -- Node.js 18 or later -- TiDB docs repository cloned locally - -## Start the server - -### Start with STDIO transport - -```bash -npm run docs-mcp:serve -``` - -Optionally use `docs-staging` as source: - -```bash -DOCS_API_SOURCE_DIR=/workspaces/docs-staging npm run docs-mcp:serve -``` - -### Start with HTTP transport - -```bash -DOCS_MCP_TRANSPORT=http \ -DOCS_MCP_HTTP_HOST=0.0.0.0 \ -DOCS_MCP_HTTP_PORT=3100 \ -DOCS_MCP_AUTH_TOKEN= \ -DOCS_MCP_SOURCE_MAP='{"staging":"/workspaces/docs-staging","prod":"/workspaces/docs"}' \ -npm run docs-mcp:serve:http -``` - -Endpoints: - -- MCP endpoint: `POST /mcp` -- Health check: `GET /healthz` - -## Authentication - -If `DOCS_MCP_AUTH_TOKEN` is set, all MCP HTTP calls must include: - -```http -Authorization: Bearer -``` - -## Source isolation - -Use `DOCS_MCP_SOURCE_MAP` to map source keys to directories: - -```bash -DOCS_MCP_SOURCE_MAP='{"staging":"/workspaces/docs-staging","prod":"/workspaces/docs"}' -``` - -Then select source per request: - -```http -x-docs-source: staging -``` - -## Supported tools - -### Read-only tools - -- `search_docs` -- `get_doc_content` -- `list_topics` -- `list_features` - -### Admin tool - -- `reload_docs_index` - -## Supported resources - -- `docs://schema` -- `docs://index/meta` -- `docs://doc/` - -Example: - -- `docs://doc/tidb-cloud%2Fbackup-and-restore-serverless.md` - -## Client configuration examples - -### Claude Code (`.mcp.json`, STDIO) - -```json -{ - "mcpServers": { - "tidb-docs": { - "command": "node", - "args": ["scripts/docs-mcp-server.js"], - "env": { - "DOCS_API_SOURCE_DIR": "/workspaces/docs-staging" - } - } - } -} -``` - -### Generic MCP HTTP client - -Use your MCP client's HTTP transport option with: - -- URL: `https://docs-api-staging.pingcap.com/mcp` (or your own endpoint) -- Header: `Authorization: Bearer ` -- Header (optional): `x-docs-source: staging` - -## HTTP JSON-RPC example - -```bash -curl -X POST "http://127.0.0.1:3100/mcp" \ - -H "content-type: application/json" \ - -H "authorization: Bearer " \ - -H "x-docs-source: staging" \ - -d '{ - "jsonrpc":"2.0", - "id":1, - "method":"tools/call", - "params":{ - "name":"search_docs", - "arguments":{"feature":"tidb_max_dist_task_nodes","limit":3} - } - }' -``` - -## Validate your deployment - -### 1. Health check - -```bash -curl http://:3100/healthz -``` - -Expected: - -- `{"ok":true}` - -### 2. Check available tools - -```bash -curl -s -X POST "http://:3100/mcp" \ - -H "content-type: application/json" \ - -H "authorization: Bearer " \ - -d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}' -``` - -Expected tools: - -- `search_docs` -- `get_doc_content` -- `list_topics` -- `list_features` -- `reload_docs_index` - -### 3. Verify staging source and placeholder replacement - -```bash -curl -s -X POST "http://:3100/mcp" \ - -H "content-type: application/json" \ - -H "authorization: Bearer " \ - -H "x-docs-source: staging" \ - -d '{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"search_docs","arguments":{"path":"tidb-cloud/backup-and-restore-serverless.md","limit":1}}}' -``` - -Check: - -- `meta.sourceKey` is `staging` -- Returned title/content does not include unresolved placeholders like `{{{ .starter }}}` - -## Troubleshooting - -- **401 Unauthorized** - - Verify `Authorization: Bearer ` and `DOCS_MCP_AUTH_TOKEN`. -- **Wrong docs source** - - Verify `x-docs-source` and `DOCS_MCP_SOURCE_MAP`. -- **No results for expected queries** - - Run `reload_docs_index` after docs updates. -- **Cannot connect** - - Check host/port and network access to `/mcp`. - -## Design notes - -- `search_docs` is optimized for lightweight response by default. -- Use `get_doc_content` when full markdown is required. -- Template variables (for example, `{{{ .starter }}}`) are resolved via `variables.json` in the selected source directory. - From 23252a7be67c3d36582fd50d33ef2ecf3de17007 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Tue, 7 Apr 2026 16:57:31 +0800 Subject: [PATCH 09/10] Update TOC-ai.md --- TOC-ai.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TOC-ai.md b/TOC-ai.md index e42e79ceef66f..5ce52f73f6b09 100644 --- a/TOC-ai.md +++ b/TOC-ai.md @@ -70,12 +70,12 @@ - [Amazon Bedrock](/ai/integrations/vector-search-integrate-with-amazon-bedrock.md) - MCP Server - [Overview](/ai/integrations/tidb-mcp-server.md) - - [TiDB Docs MCP Server](/ai/integrations/tidb-docs-mcp-server.md) - [Claude Code](/ai/integrations/tidb-mcp-claude-code.md) - [Claude Desktop](/ai/integrations/tidb-mcp-claude-desktop.md) - [Cursor](/ai/integrations/tidb-mcp-cursor.md) - [VS Code](/ai/integrations/tidb-mcp-vscode.md) - [Windsurf](/ai/integrations/tidb-mcp-windsurf.md) + - [TiDB Docs MCP Server](/ai/integrations/tidb-docs-mcp-server.md) ## REFERENCE From 8f29341ffccc2f0ab703dddfcb2d5c7734479861 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Tue, 7 Apr 2026 17:09:58 +0800 Subject: [PATCH 10/10] Update tidb-docs-mcp-server.md --- ai/integrations/tidb-docs-mcp-server.md | 141 +++++++++++++++++++++--- 1 file changed, 125 insertions(+), 16 deletions(-) diff --git a/ai/integrations/tidb-docs-mcp-server.md b/ai/integrations/tidb-docs-mcp-server.md index de871519797ab..f3aecfe6026a1 100644 --- a/ai/integrations/tidb-docs-mcp-server.md +++ b/ai/integrations/tidb-docs-mcp-server.md @@ -1,6 +1,6 @@ --- title: TiDB Docs MCP Server -summary: Connect AI clients to TiDB documentation through an MCP server with search tools and markdown resources. +summary: Connect AI clients to TiDB documentation through an MCP server with version routing, layered tools, and citation-aware answers. --- # TiDB Docs MCP Server @@ -13,16 +13,52 @@ It supports: - **HTTP transport** for shared environments (for example, staging) - **Bearer token authentication** - **Source isolation** (for example, `staging` vs `prod`) +- **Version routing** (default to latest LTS stable docs) ## What you get The server provides structured tools and resources for docs access: -- Search by feature, topic, path, and full-text -- Fetch full markdown for a single document on demand -- List topics and feature tokens +- Search and retrieve docs by intent (search, page, procedure, reference, concept) +- Enforce version-aware retrieval +- Return per-answer source citation for trust and traceability - Reload index after docs updates +## Version routing (required) + +Each tool call accepts: + +- `version` (optional) + +Routing behavior: + +- If `version` is provided, the server routes to that documentation version. +- If `version` is omitted, the server routes to the **latest LTS stable** version by default. +- The resolved version must be returned in each result payload. + +This behavior is mandatory to avoid mixing content across documentation versions. + +## Chunk metadata + +Each indexed and returned chunk must include: + +- `version` +- `category` (`tuning` | `deploy` | `reference`) +- `doc_type` (`concept` | `task` | `reference`) + +Example: + +```json +{ + "chunk_id": "tidb-v8.5:/tuning/sql-performance.md#chunk-03", + "version": "v8.5", + "category": "tuning", + "doc_type": "task", + "source": "/tuning/sql-performance.md", + "content": "..." +} +``` + ## Prerequisites - Node.js 18 or later @@ -80,14 +116,29 @@ Then select source per request: x-docs-source: staging ``` +## Tool layering (required) + +Expose at least these tools: + +| Tool | Purpose | +| --- | --- | +| `search_docs` | Fuzzy retrieval by keyword, topic, or intent | +| `get_doc_page` | Fetch one exact document by path | +| `get_procedure` | Extract ordered steps from task-style docs | +| `get_reference` | Return parameter, variable, option, or field reference entries | +| `explain_concept` | Return concept-focused explanations from concept docs | + +`reload_docs_index` remains an admin tool. + ## Supported tools ### Read-only tools - `search_docs` -- `get_doc_content` -- `list_topics` -- `list_features` +- `get_doc_page` +- `get_procedure` +- `get_reference` +- `explain_concept` ### Admin tool @@ -129,7 +180,9 @@ Use your MCP client's HTTP transport option with: - Header: `Authorization: Bearer ` - Header (optional): `x-docs-source: staging` -## HTTP JSON-RPC example +## HTTP JSON-RPC examples + +Search example: ```bash curl -X POST "http://127.0.0.1:3100/mcp" \ @@ -142,11 +195,64 @@ curl -X POST "http://127.0.0.1:3100/mcp" \ "method":"tools/call", "params":{ "name":"search_docs", - "arguments":{"feature":"tidb_max_dist_task_nodes","limit":3} + "arguments":{ + "query":"tidb_max_dist_task_nodes", + "version":"v8.5", + "limit":3 + } } }' ``` +Exact page example: + +```bash +curl -X POST "http://127.0.0.1:3100/mcp" \ + -H "content-type: application/json" \ + -H "authorization: Bearer " \ + -d '{ + "jsonrpc":"2.0", + "id":2, + "method":"tools/call", + "params":{ + "name":"get_doc_page", + "arguments":{ + "path":"/tuning/sql-performance.md", + "version":"v8.5" + } + } + }' +``` + +## Answer constraint template (required) + +Use this template in your MCP client system prompt: + +```text +You are a TiDB docs assistant. +Only answer using the provided MCP context. +Do not use prior knowledge. +If the answer is not found in context, reply exactly: "not documented". +Always include a source citation for each key statement. +``` + +## Citation output (strongly recommended) + +Each answer payload should include `source` for traceability: + +```json +{ + "answer": "Set this variable to ...", + "version": "v8.5", + "citations": [ + { + "source": "/tuning/sql-performance.md", + "section": "Tune SQL execution performance" + } + ] +} +``` + ## Validate your deployment ### 1. Health check @@ -171,24 +277,26 @@ curl -s -X POST "http://:3100/mcp" \ Expected tools: - `search_docs` -- `get_doc_content` -- `list_topics` -- `list_features` +- `get_doc_page` +- `get_procedure` +- `get_reference` +- `explain_concept` - `reload_docs_index` -### 3. Verify staging source and placeholder replacement +### 3. Verify staging source and metadata ```bash curl -s -X POST "http://:3100/mcp" \ -H "content-type: application/json" \ -H "authorization: Bearer " \ -H "x-docs-source: staging" \ - -d '{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"search_docs","arguments":{"path":"tidb-cloud/backup-and-restore-serverless.md","limit":1}}}' + -d '{"jsonrpc":"2.0","id":3,"method":"tools/call","params":{"name":"search_docs","arguments":{"query":"backup and restore serverless","version":"v8.5","limit":1}}}' ``` Check: - `meta.sourceKey` is `staging` +- Returned chunk metadata includes `version`, `category`, and `doc_type` - Returned title/content does not include unresolved placeholders like `{{{ .starter }}}` ## Troubleshooting @@ -205,6 +313,7 @@ Check: ## Design notes - `search_docs` is optimized for lightweight response by default. -- Use `get_doc_content` when full markdown is required. +- Use `get_doc_page` when full markdown is required. +- Enforce version routing on every query. If `version` is missing, resolve to latest LTS stable. +- Include `source` in outputs to improve answer trust. - Template variables (for example, `{{{ .starter }}}`) are resolved via `variables.json` in the selected source directory. -