diff --git a/src/aggregate.test.ts b/src/aggregate.test.ts index bc7bd87..a72aaf4 100644 --- a/src/aggregate.test.ts +++ b/src/aggregate.test.ts @@ -137,3 +137,105 @@ describe("aggregate", () => { expect(groups).toHaveLength(0); }); }); + +// ─── aggregate with regexFilter ─────────────────────────────────────────────── + +function makeMatchWithFragments(repo: string, path: string, fragments: string[]): CodeMatch { + return { + path, + repoFullName: repo, + htmlUrl: `https://github.com/${repo}/blob/main/${path}`, + archived: false, + textMatches: fragments.map((fragment) => ({ fragment, matches: [] })), + }; +} + +describe("aggregate — regexFilter", () => { + it("keeps matches where at least one fragment satisfies the regex", () => { + const matches: CodeMatch[] = [ + makeMatchWithFragments("myorg/repoA", "src/a.ts", ["import axios from 'axios'"]), + makeMatchWithFragments("myorg/repoA", "src/b.ts", ["const x = 1"]), + ]; + + const groups = aggregate(matches, new Set(), new Set(), false, /axios/); + expect(groups).toHaveLength(1); + expect(groups[0].matches).toHaveLength(1); + expect(groups[0].matches[0].path).toBe("src/a.ts"); + }); + + it("excludes the whole repo when no fragment matches the regex", () => { + const matches: CodeMatch[] = [ + makeMatchWithFragments("myorg/repoA", "src/a.ts", ["const x = 1"]), + makeMatchWithFragments("myorg/repoA", "src/b.ts", ["const y = 2"]), + ]; + + const groups = aggregate(matches, new Set(), new Set(), false, /axios/); + expect(groups).toHaveLength(0); + }); + + it("matches against any fragment in a multi-fragment match", () => { + const matches: CodeMatch[] = [ + makeMatchWithFragments("myorg/repoA", "src/a.ts", [ + "unrelated line", + 'import axios from "axios"', + ]), + ]; + + const groups = aggregate(matches, new Set(), new Set(), false, /axios/); + expect(groups).toHaveLength(1); + }); + + it("keeps all matches when regexFilter is undefined (backward compat)", () => { + const matches: CodeMatch[] = [ + makeMatchWithFragments("myorg/repoA", "src/a.ts", ["const x = 1"]), + makeMatchWithFragments("myorg/repoB", "src/b.ts", ["const y = 2"]), + ]; + + const groups = aggregate(matches, new Set(), new Set()); + expect(groups).toHaveLength(2); + }); + + it("keeps all matches when regexFilter is null (backward compat — null treated as no filter)", () => { + const matches: CodeMatch[] = [ + makeMatchWithFragments("myorg/repoA", "src/a.ts", ["const x = 1"]), + makeMatchWithFragments("myorg/repoB", "src/b.ts", ["const y = 2"]), + ]; + + const groups = aggregate(matches, new Set(), new Set(), false, null); + expect(groups).toHaveLength(2); + }); + + it("respects regex flags (case-insensitive)", () => { + const matches: CodeMatch[] = [ + makeMatchWithFragments("myorg/repoA", "src/a.ts", ["import AXIOS from 'axios'"]), + ]; + + const groups = aggregate(matches, new Set(), new Set(), false, /axios/i); + expect(groups).toHaveLength(1); + }); + + it("works with matches having no textMatches (empty fragments array)", () => { + const matches: CodeMatch[] = [ + makeMatch("myorg/repoA", "src/a.ts"), // textMatches: [] + ]; + + // No fragments → regex can never match → repo excluded + const groups = aggregate(matches, new Set(), new Set(), false, /axios/); + expect(groups).toHaveLength(0); + }); + + it("restores lastIndex to its pre-call value after filtering (does not clobber caller state)", () => { + // Regression: aggregate() must restore lastIndex to whatever the caller had + // set before the call — not necessarily 0. + const matches: CodeMatch[] = [ + makeMatchWithFragments("myorg/repoA", "src/a.ts", ["import axios from 'axios'"]), + ]; + const regex = /axios/g; + // Caller has already run a match, so lastIndex is non-zero. + regex.test("import axios from 'axios'"); + const savedIndex = regex.lastIndex; + expect(savedIndex).toBeGreaterThan(0); + aggregate(matches, new Set(), new Set(), false, regex); + expect(regex.lastIndex).toBe(savedIndex); + }); +}); diff --git a/src/aggregate.ts b/src/aggregate.ts index 3fe7b91..bc94f14 100644 --- a/src/aggregate.ts +++ b/src/aggregate.ts @@ -34,11 +34,29 @@ export function aggregate( excludedRepos: Set, excludedExtractRefs: Set, includeArchived = false, + regexFilter?: RegExp | null, ): RepoGroup[] { const map = new Map(); for (const m of matches) { if (excludedRepos.has(m.repoFullName)) continue; if (!includeArchived && m.archived) continue; + // Fix: when a regex filter is active, only keep matches where at least one + // text_match fragment satisfies the pattern — see issue #111 + if (regexFilter != null) { + // Preserve the caller's lastIndex: aggregate() must not have observable + // side-effects on the passed-in RegExp instance. + const savedLastIndex = regexFilter.lastIndex; + const hasMatch = m.textMatches.some((tm) => { + // Fix: reset lastIndex before each call — a global/sticky regex is + // stateful and would produce false negatives on subsequent fragments. + regexFilter.lastIndex = 0; + return regexFilter.test(tm.fragment); + }); + // Restore the caller's original lastIndex (rather than hard-coding 0), + // so aggregate() doesn't have observable side effects on its inputs. + regexFilter.lastIndex = savedLastIndex; + if (!hasMatch) continue; + } const list = map.get(m.repoFullName) ?? []; list.push(m); map.set(m.repoFullName, list); diff --git a/src/regex.test.ts b/src/regex.test.ts new file mode 100644 index 0000000..8add72b --- /dev/null +++ b/src/regex.test.ts @@ -0,0 +1,261 @@ +import { describe, expect, it } from "bun:test"; +import { buildApiQuery, isRegexQuery } from "./regex.ts"; + +// ─── isRegexQuery ───────────────────────────────────────────────────────────── + +describe("isRegexQuery", () => { + it("returns true for a bare regex token", () => { + expect(isRegexQuery("/from.*axios/")).toBe(true); + }); + + it("returns true for a regex with flags", () => { + expect(isRegexQuery("/pattern/i")).toBe(true); + }); + + it("returns true for a query mixing qualifiers and regex", () => { + expect(isRegexQuery('filename:package.json /["\'"]axios["\'"]:/')).toBe(true); + }); + + it("returns false for a plain text query", () => { + expect(isRegexQuery("from axios")).toBe(false); + }); + + it("returns false for an empty string", () => { + expect(isRegexQuery("")).toBe(false); + }); + + it("returns false for a qualifier-only query", () => { + expect(isRegexQuery("filename:package.json")).toBe(false); + }); + + it("returns false when /pattern/flags is not end-bounded (e.g. /useState/iSomething)", () => { + // Regression: the token must be followed by whitespace or end-of-string; + // a suffix of non-flag characters must not be silently swallowed. + expect(isRegexQuery("/useState/iSomething")).toBe(false); + }); + + it("returns false for /pattern/e ('e' is not a valid JS RegExp flag)", () => { + expect(isRegexQuery("/pattern/e")).toBe(false); + }); +}); + +// ─── buildApiQuery ──────────────────────────────────────────────────────────── + +describe("buildApiQuery — plain text passthrough", () => { + it("returns input unchanged when no regex token", () => { + const r = buildApiQuery("plain text query"); + expect(r.apiQuery).toBe("plain text query"); + expect(r.regexFilter).toBeNull(); + expect(r.warn).toBeUndefined(); + }); + + it("/useState/iSomething is NOT treated as a regex token (boundary regression)", () => { + // 'iSomething' is not a valid flag sequence — the token should not match. + const r = buildApiQuery("/useState/iSomething"); + expect(r.apiQuery).toBe("/useState/iSomething"); + expect(r.regexFilter).toBeNull(); + }); + + it("/pattern/e is NOT treated as a regex token ('e' is not a valid JS RegExp flag)", () => { + const r = buildApiQuery("/pattern/e"); + expect(r.apiQuery).toBe("/pattern/e"); + expect(r.regexFilter).toBeNull(); + }); +}); + +describe("buildApiQuery — longest literal extraction", () => { + it("/from.*['\"]axios/ → axios", () => { + const r = buildApiQuery("/from.*['\"]axios/"); + expect(r.apiQuery).toBe("axios"); + expect(r.regexFilter).toEqual(/from.*['"]axios/); + expect(r.warn).toBeUndefined(); + }); + + it("/useState/ → useState (trivial literal)", () => { + const r = buildApiQuery("/useState/"); + expect(r.apiQuery).toBe("useState"); + expect(r.regexFilter).toEqual(/useState/); + }); + + it("/require\\(['\"']old-lib['\"]\\)/ → old-lib", () => { + const r = buildApiQuery("/require\\(['\"]old-lib['\"]\\)/"); + expect(r.apiQuery).toBe("old-lib"); + expect(r.regexFilter).not.toBeNull(); + }); +}); + +describe("buildApiQuery — top-level alternation → OR", () => { + it("/TODO|FIXME|HACK/ → TODO OR FIXME OR HACK", () => { + const r = buildApiQuery("/TODO|FIXME|HACK/"); + expect(r.apiQuery).toBe("TODO OR FIXME OR HACK"); + expect(r.regexFilter).toEqual(/TODO|FIXME|HACK/); + expect(r.warn).toBeUndefined(); + }); + + it("/a|bc/ — short branches (< 3 chars each) fall back to longest literal and warn", () => { + // branches: "a" (1 char) and "bc" (2 chars) — both < 3 → fall back to + // longestLiteralSequence("a|bc") → "bc" (2 chars) < 3 → warn + empty term + const r = buildApiQuery("/a|bc/"); + expect(r.warn).toBeDefined(); + expect(r.apiQuery).toBe(""); + expect(r.regexFilter).not.toBeNull(); + }); + + it("/\\\\|foo/ — escaped backslash before | → | is top-level → falls back to longest literal 'foo'", () => { + // Pattern \\|foo: \\ is an escaped backslash (matches literal \), | is top-level. + // splitTopLevelAlternation gives ["\\", "foo"]; "\\" yields no useful literal + // so branchTerms fails the every->=1 check and we fall back to longestLiteralSequence. + const r = buildApiQuery("/\\\\|foo/"); + expect(r.apiQuery).toBe("foo"); + expect(r.regexFilter).not.toBeNull(); + }); +}); + +describe("buildApiQuery — partial alternation falls back to longest literal", () => { + it("/(import|require).*someLongLib/ → someLongLib", () => { + const r = buildApiQuery("/(import|require).*someLongLib/"); + // The alternation is nested inside (...) — not top-level — so we fall back + // to the longest contiguous literal sequence: "someLongLib". + expect(r.apiQuery).toBe("someLongLib"); + expect(r.regexFilter).not.toBeNull(); + }); +}); + +describe("buildApiQuery — qualifier preservation", () => { + it("filename:package.json /['\"]axios['\"]:/ → filename:package.json axios", () => { + const r = buildApiQuery("filename:package.json /['\"]axios['\"]:/"); + expect(r.apiQuery).toBe("filename:package.json axios"); + expect(r.regexFilter).not.toBeNull(); + expect(r.warn).toBeUndefined(); + }); + + it("preserves free-text terms alongside the regex token", () => { + const r = buildApiQuery("useFeatureFlag NOT deprecated /pattern/i"); + expect(r.apiQuery).toBe("useFeatureFlag NOT deprecated pattern"); + expect(r.regexFilter).not.toBeNull(); + }); + + it("preserves language: qualifier", () => { + const r = buildApiQuery("language:TypeScript /useState/"); + expect(r.apiQuery).toBe("language:TypeScript useState"); + }); + + it("preserves path: qualifier", () => { + const r = buildApiQuery("path:src/ /useState/"); + expect(r.apiQuery).toBe("path:src/ useState"); + }); + + it("preserves quoted phrase alongside regex token", () => { + // Regression: split(/\s+/) would break quoted phrases like \"feature flag\"; + // the reconstruction must replace only the regex token, byte-for-byte. + const r = buildApiQuery('"feature flag" /from.*axios/'); + expect(r.apiQuery).toBe('"feature flag" axios'); + expect(r.regexFilter).not.toBeNull(); + expect(r.warn).toBeUndefined(); + }); + + it("replaces the matched token when the same raw text appears earlier as a prefix substring", () => { + // Regression: '/useState/i' is a substring of '/useState/iSomething' (not a + // valid token — fails boundary check). q.replace(raw, term) would wrongly + // replace the first occurrence inside the non-token prefix. The splice must + // target only the index-validated token. + const r = buildApiQuery("/useState/iSomething /useState/i"); + expect(r.apiQuery).toBe("/useState/iSomething useState"); + expect(r.regexFilter).not.toBeNull(); + }); +}); + +describe("buildApiQuery — flags", () => { + it("/pattern/i → compiles with i flag", () => { + const r = buildApiQuery("/pattern/i"); + expect(r.apiQuery).toBe("pattern"); + expect(r.regexFilter?.flags).toContain("i"); + }); + + it("/pattern/s → s (dotAll) flag recognized and preserved", () => { + // s is a valid JS RegExp flag (dotAll) — must be tokenized correctly + // so the /pattern/s token is replaced in the API query (not left as-is). + const r = buildApiQuery("/pattern/s"); + expect(r.apiQuery).toBe("pattern"); + expect(r.regexFilter).not.toBeNull(); + expect(r.regexFilter?.flags).toContain("s"); + }); + + it("/pattern/gi → g flag stripped, i kept", () => { + const r = buildApiQuery("/pattern/gi"); + expect(r.regexFilter?.flags).not.toContain("g"); + expect(r.regexFilter?.flags).toContain("i"); + }); + + it("/pattern/iy → y (sticky) flag stripped, i kept", () => { + const r = buildApiQuery("/pattern/iy"); + expect(r.regexFilter?.flags).not.toContain("y"); + expect(r.regexFilter?.flags).toContain("i"); + }); +}); + +describe("buildApiQuery — special escape handling in longestLiteralSequence", () => { + it("/\\buseState\\b/ → useState (word-boundary escapes do not contaminate the term)", () => { + // Regression: \b is a regex assertion, not the letter 'b'. + // The sequence must be broken at \b so 'useState' is extracted, not 'buseStateb'. + const r = buildApiQuery("/\\buseState\\b/"); + expect(r.apiQuery).toBe("useState"); + expect(r.regexFilter).not.toBeNull(); + }); + + it("/\\d+\\.\\d+/ → empty term + warn (\\d and \\. are not literals)", () => { + const r = buildApiQuery("/\\d+\\.\\d+/"); + expect(r.apiQuery).toBe(""); + expect(r.warn).toBeDefined(); + }); + + it("/foobar\\sxyz/ → foobar (\\s breaks the sequence, longer prefix wins)", () => { + const r = buildApiQuery("/foobar\\sxyz/"); + expect(r.apiQuery).toBe("foobar"); + }); + + it("/foobar\\nbar/ → foobar (\\n is a control-character escape, not the letter 'n')", () => { + // Regression: \n must break the sequence, not accumulate 'n' → 'foobarnbar'. + const r = buildApiQuery("/foobar\\nbar/"); + expect(r.apiQuery).toBe("foobar"); + }); + + it("/foobar\\tbaz/ → foobar (\\t control escape breaks the sequence)", () => { + const r = buildApiQuery("/foobar\\tbaz/"); + expect(r.apiQuery).toBe("foobar"); + }); + + it("/foobar\\cAbaz/ → foobar (\\cA is a control escape, 'c' must not be accumulated)", () => { + // Regression: \cA matches control-character 0x01, not the letter 'c'. + // The sequence must be broken at \c so 'foobar' is extracted, not 'foobarcAbaz'. + const r = buildApiQuery("/foobar\\cAbaz/"); + expect(r.apiQuery).toBe("foobar"); + }); + + it("/foobar\\kbaz/ → foobar (\\k is a named back-reference, 'k' must not be accumulated)", () => { + // Regression: \k is a named back-reference, not the letter 'k'. + // The sequence must be broken at \k so 'foobar' is extracted, not 'foobarknameabaz'. + const r = buildApiQuery("/foobar\\kbaz/"); + expect(r.apiQuery).toBe("foobar"); + }); +}); + +describe("buildApiQuery — warn cases", () => { + it("/[~^]?[0-9]+\\.[0-9]+/ → empty term + warn", () => { + const r = buildApiQuery("/[~^]?[0-9]+\\.[0-9]+/"); + expect(r.apiQuery).toBe(""); + expect(r.regexFilter).not.toBeNull(); + expect(r.warn).toBeDefined(); + }); + + it("/[/ (invalid regex) → warn includes the engine error message", () => { + const r = buildApiQuery("/[/"); + expect(r.apiQuery).toBe(""); + expect(r.regexFilter).toBeNull(); + // The warn message must include the engine-provided reason (not just a + // generic message), so callers can surface a precise debugging hint. + expect(r.warn).toBeDefined(); + // The raw token should appear in warn for easy identification. + expect(r.warn).toContain("/[/"); + }); +}); diff --git a/src/regex.ts b/src/regex.ts new file mode 100644 index 0000000..d209c1e --- /dev/null +++ b/src/regex.ts @@ -0,0 +1,274 @@ +// ─── Regex query helpers ────────────────────────────────────────────────────── +// +// The GitHub REST API (/search/code) does not support /pattern/ regex syntax. +// These helpers detect regex queries, derive a safe literal term to send to the +// API (casting a wide net), and return a compiled RegExp for local post-filtering. + +/** + * Returns true if `q` contains a `/pattern/` or `/pattern/flags` token. + * A leading qualifier like `filename:package.json /regex/` is also matched. + */ +export function isRegexQuery(q: string): boolean { + return extractRegexToken(q) !== null; +} + +/** + * Given a raw query string (possibly mixing GitHub qualifiers and a /regex/flags + * token), returns: + * + * - `apiQuery` — the query safe to send to the GitHub REST API + * - `regexFilter` — the compiled RegExp to apply locally on `TextMatch.fragment` + * - `warn` — set when no exploitable literal term could be extracted; + * the caller should require `--regex-hint` before proceeding. + * + * When `q` contains no regex token the input is returned unchanged and + * `regexFilter` is `null`. + */ +export function buildApiQuery(q: string): { + apiQuery: string; + regexFilter: RegExp | null; + warn?: string; +} { + const token = extractRegexToken(q); + + // Plain-text query — nothing to do. + if (token === null) { + return { apiQuery: q, regexFilter: null }; + } + + // Compile the regex. Strip stateful flags: + // g (global) — GitHub returns at most a few fragments, not all occurrences. + // y (sticky) — makes RegExp.test() stateful via lastIndex, causing false + // negatives when the same instance is reused across fragments. + // Both are intentionally removed; all other flags (i, m, s, d, v, …) are kept. + const { pattern, flags, raw, index } = token; + const safeFlags = flags.replace(/[gy]/g, ""); + let regexFilter: RegExp | null = null; + try { + regexFilter = new RegExp(pattern, safeFlags); + } catch (err) { + // Fix: invalid regex — warn with the engine's own error message so callers + // can surface a precise reason (e.g. invalid flags vs bad syntax). + const reason = err instanceof Error ? err.message : String(err); + return { + apiQuery: "", + regexFilter: null, + warn: `Invalid regex /${pattern}/${flags}: ${reason}`, + }; + } + + // Derive the API search term from the regex pattern. + const { term, warn } = extractApiTerm(pattern); + + // Rebuild the API query by splicing the derived term at the exact byte + // position of the matched token. Using q.replace(raw, term) would replace the + // first *substring* occurrence of `raw`, which may appear earlier in the query + // as a non-token prefix (e.g. inside a longer word). Using the stored index + // guarantees we replace only the boundary-validated token that was matched. + const apiQuery = (q.slice(0, index) + term + q.slice(index + raw.length)).trim(); + + return { apiQuery, regexFilter, warn }; +} + +// ─── Internal helpers ───────────────────────────────────────────────────────── + +interface RegexToken { + /** Raw string as it appears in the query, e.g. "/from.*axios/i" */ + raw: string; + /** Pattern string without delimiters */ + pattern: string; + /** Flags string (may be empty) */ + flags: string; + /** Start index of `raw` within the original query string. */ + index: number; +} + +/** + * Extracts the first `/pattern/flags` token from a query string. + * Returns `null` when no regex token is found. + */ +function extractRegexToken(q: string): RegexToken | null { + // Match /pattern/flags where: + // - the pattern is a non-empty sequence that doesn't contain an unescaped + // forward slash, backslash, or newline (\r / \n) + // - the token ends at end-of-string or a whitespace boundary, so we don't + // accidentally match a prefix of a longer non-delimited word + // (e.g. /foo/iSomething must NOT be recognised as a regex token). + // The trailing flags cover all valid JS RegExp flag letters: + // g (global), i (ignoreCase), m (multiline), s (dotAll), + // u (unicode), y (sticky), d (hasIndices ES2022), v (unicodeSets ES2023). + // Note: 'e' is intentionally excluded — it is not a valid JS RegExp flag. + const m = q.match(/(?:^|\s)(\/(?:[^/\\\r\n]|\\.)+\/[gimsuydv]*)(?=$|\s)/); + if (!m || !m[1]) return null; + const raw = m[1].trim(); + // Compute the exact start position of the token within the original string. + // m.index is where the full match starts; the token (group 1) may be preceded + // by one whitespace character captured by (?:^|\s), hence the offset. + const tokenStart = m.index! + m[0].length - m[1].length; + const lastSlash = raw.lastIndexOf("/"); + const pattern = raw.slice(1, lastSlash); + const flags = raw.slice(lastSlash + 1); + return { raw, pattern, flags, index: tokenStart }; +} + +/** + * Derive a literal API search term from a regex pattern. + * + * Strategy (in order): + * 1. If the pattern is a **top-level alternation** `A|B|C` (branches not + * nested inside `(...)` or `[...]`) → join branches with ` OR `. + * 2. Otherwise → extract all unescaped literal sequences, pick the longest one. + * 3. If the best term is shorter than 3 characters → return `warn`. + */ +function extractApiTerm(pattern: string): { term: string; warn?: string } { + // 1. Top-level alternation detection. + const branches = splitTopLevelAlternation(pattern); + if (branches.length > 1) { + // Each branch must yield a meaningful literal (>= 3 chars) to use the OR + // strategy — the same minimum enforced by the single-literal path below. + // Branches shorter than 3 chars (e.g. /a|bc/) fall through so the global + // "< 3 chars → warn + empty term" rule still applies. + const branchTerms = branches.map((b) => longestLiteralSequence(b)); + if (branchTerms.every((t) => t.length >= 3)) { + return { term: branchTerms.join(" OR ") }; + } + } + + // 2. Longest literal sequence. + const term = longestLiteralSequence(pattern); + if (term.length < 3) { + return { + term: "", + warn: + "No meaningful search term could be extracted from the regex pattern. " + + "Use --regex-hint to specify the term to send to the GitHub API.", + }; + } + return { term }; +} + +/** + * Split a regex pattern on top-level `|` characters — i.e. `|` that are not + * inside `(...)`, `[...]`, or preceded by a backslash. + */ +function splitTopLevelAlternation(pattern: string): string[] { + const branches: string[] = []; + let depth = 0; // tracks unescaped '(' nesting depth outside character classes + let inClass = false; // tracks whether we are currently inside a character class [...] + let current = ""; + let escaped = false; // true when current char is escaped by a preceding backslash + + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + + if (escaped) { + // Current character is escaped — treat as literal, never as a delimiter. + current += ch; + escaped = false; + continue; + } + + if (ch === "\\") { + // Next character will be escaped. + current += ch; + escaped = true; + continue; + } + + if (ch === "[" && !inClass) { + inClass = true; + current += ch; + } else if (ch === "]" && inClass) { + inClass = false; + current += ch; + } else if (ch === "(" && !inClass) { + depth++; + current += ch; + } else if (ch === ")" && !inClass) { + depth = Math.max(0, depth - 1); + current += ch; + } else if (ch === "|" && depth === 0 && !inClass) { + branches.push(current); + current = ""; + } else { + current += ch; + } + } + branches.push(current); + return branches; +} + +/** + * Extract the longest contiguous sequence of characters useful as a GitHub + * search term from a regex pattern fragment. + * + * Only `[a-zA-Z0-9_-]` characters are accumulated — punctuation and special + * characters that are valid regex literals (e.g. `\(`) are intentionally + * excluded because they produce poor search terms. + * Character classes `[...]` are skipped entirely. + * Uses `>=` when updating `best` so that later (more specific) sequences of + * equal length are preferred over earlier structural ones (e.g. `old-lib` + * is preferred over `require` in `/require\(['"]old-lib['"]\)/`). + */ +function longestLiteralSequence(pattern: string): string { + let best = ""; + let current = ""; + let i = 0; + + while (i < pattern.length) { + const ch = pattern[i]; + + // Skip entire character class [...] — its contents are never good search terms. + if (ch === "[") { + if (current.length >= best.length) best = current; + current = ""; + i++; // skip `[` + // Handle negation `[^` and literal `]` at the very start of the class. + if (i < pattern.length && pattern[i] === "^") i++; + if (i < pattern.length && pattern[i] === "]") i++; + // Advance until unescaped `]`. + while (i < pattern.length && pattern[i] !== "]") { + if (pattern[i] === "\\") i++; // skip escaped char inside class + i++; + } + i++; // skip closing `]` + continue; + } + + // Handle escape sequences. + if (ch === "\\") { + const next = pattern[i + 1] ?? ""; + // Only accumulate if the escaped char is a word character or hyphen + // AND is not a common regex escape or backreference (\b, \d, \s, \w, + // \p, \u, \x, \1–9, …) or control-character escape (\n, \r, \t, \f, \v). + // Note: \a and \e are NOT in this list — in JS without u/v they are + // identity escapes that simply match the literal letter ('a' or 'e'), + // so they should be accumulated, not broken on. + // \c = control escape (\cA–\cZ), \k = named back-reference (\k). + const isWordLike = /[a-zA-Z0-9_-]/.test(next); + const isSpecialEscape = /[bBdDsSwWpPuUxX0-9nrtfvck]/.test(next); + if (isWordLike && !isSpecialEscape) { + current += next; + } else { + // Escaped punctuation or special escape — not a useful literal search + // char → break the current sequence. + if (current.length >= best.length) best = current; + current = ""; + } + i += 2; + continue; + } + + // Only accumulate characters that make a useful GitHub search term. + if (/[a-zA-Z0-9_-]/.test(ch)) { + current += ch; + } else { + if (current.length >= best.length) best = current; + current = ""; + } + i++; + } + + if (current.length >= best.length) best = current; + return best; +}