From 0fed671df8a665cf68c2928a821317a0bf2dbc59 Mon Sep 17 00:00:00 2001 From: Jonathan Haas Date: Sat, 14 Mar 2026 11:40:27 -0700 Subject: [PATCH] feat(parsing): single-quote JSON repair + raw bracket span (#28) - repair_json_candidates: convert single-quoted keys/values to double-quoted via convert_single_quoted_json_to_double (issue #28) - find_balanced_bracket_span: extract raw [..] or {..} when valid JSON not found, so repair can fix and parse - ROADMAP: document #28 repairs; add #25 (dynamic context) to Shipped - Test: parse_json_with_single_quotes Made-with: Cursor --- docs/ROADMAP.md | 3 +- src/parsing/llm_response.rs | 106 ++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 1 deletion(-) diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index bb676a4..f74045c 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -74,7 +74,8 @@ Create labels once: `priority: high`, `priority: medium`, `priority: low`, `area - **Natural language rules (#12):** `review_rules_prose: [ "Rule one", "Rule two" ]` in config; injected as "Custom rules (natural language)" bullets into review guidance. Tests: `test_config_deserialize_review_rules_prose_from_yaml`, `build_review_guidance_includes_prose_rules`. - **Triage skip deletion-only (#29):** `triage_skip_deletion_only: true` in config; when true, deletion-only diffs get `SkipDeletionOnly` and skip expensive review. Default false. Tests: `test_triage_deletion_only_with_skip_true_returns_skip_deletion_only`, config deserialize. -- **LLM parsing (#28):** Repair candidate for diff-style line prefixes (`+` on each line) in `repair_json_candidates`; test `parse_json_with_diff_prefix_artifact`. +- **Dynamic context (#25):** `find_enclosing_boundary_line` in `function_chunker.rs`; `context.rs` expands hunk start to enclosing function/class boundary; asymmetric context (5 before, 1 after). +- **LLM parsing (#28):** Repair candidates in `repair_json_candidates`: diff-style line prefixes (`+`), single-quoted keys/values → double-quoted via `convert_single_quoted_json_to_double`; raw bracket span fallback when valid JSON not found. Tests: `parse_json_with_diff_prefix_artifact`, `parse_json_with_single_quotes`. - **Secrets (#20):** Built-in secret scanner in `plugins/builtin/secret_scanner.rs`. - **Verification (#23):** Verification pass and config (verification.*) in pipeline. diff --git a/src/parsing/llm_response.rs b/src/parsing/llm_response.rs index 85695a5..ac07e57 100644 --- a/src/parsing/llm_response.rs +++ b/src/parsing/llm_response.rs @@ -257,6 +257,10 @@ fn parse_json_format(content: &str, file_path: &Path) -> Vec(&candidate) else { @@ -305,6 +309,26 @@ fn find_json_object(content: &str) -> Option { find_balanced_json(content, '{', '}') } +/// Find the first balanced span for open/close (e.g. [ and ]) without validating JSON. +/// Used when valid JSON isn't found so we can run repair (e.g. single-quote conversion) and retry. +fn find_balanced_bracket_span(content: &str, open: char, close: char) -> Option { + for (start, _) in content.char_indices().filter(|&(_, ch)| ch == open) { + let mut depth = 0i32; + for (offset, ch) in content[start..].char_indices() { + if ch == open { + depth += 1; + } else if ch == close { + depth -= 1; + if depth == 0 { + let end = start + offset; + return Some(content[start..=end].to_string()); + } + } + } + } + None +} + fn find_balanced_json(content: &str, open: char, close: char) -> Option { for (start, _) in content.char_indices().filter(|&(_, ch)| ch == open) { let mut depth = 0i32; @@ -368,9 +392,80 @@ fn repair_json_candidates(candidate: &str) -> Vec { candidates.push(without_diff_prefix.to_string()); } + // When LLM outputs single-quoted keys/values (e.g. {'line': 9}), convert to valid JSON (issue #28). + let with_double_quotes = convert_single_quoted_json_to_double(trimmed); + if with_double_quotes != trimmed + && (with_double_quotes.starts_with('[') || with_double_quotes.starts_with('{')) + { + candidates.push(with_double_quotes); + } + candidates } +/// Convert single-quoted JSON-like strings to double-quoted so serde_json can parse. +/// Only converts single-quoted regions that are outside any double-quoted string. +fn convert_single_quoted_json_to_double(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + let mut chars = s.chars().peekable(); + let mut in_double = false; + let mut escape_next = false; + + while let Some(c) = chars.next() { + if escape_next { + escape_next = false; + out.push(c); + continue; + } + if in_double { + if c == '\\' { + escape_next = true; + out.push(c); + } else if c == '"' { + in_double = false; + out.push(c); + } else { + out.push(c); + } + continue; + } + if c == '"' { + in_double = true; + out.push(c); + continue; + } + if c == '\'' { + // Start of single-quoted string: emit " and copy until unescaped ', escaping " and \. + out.push('"'); + for c in chars.by_ref() { + if c == '\\' { + escape_next = true; + out.push(c); + } else if c == '\'' { + if escape_next { + escape_next = false; + out.push('\''); + } else { + out.push('"'); + break; + } + } else if c == '"' { + out.push('\\'); + out.push('"'); + } else { + out.push(c); + } + } + if escape_next { + escape_next = false; + } + continue; + } + out.push(c); + } + out +} + fn extract_structured_items(value: serde_json::Value) -> Vec { if let Some(items) = value.as_array() { return items.clone(); @@ -1243,6 +1338,17 @@ let data = &input; assert!(comments[0].content.contains("Missing check")); } + #[test] + fn parse_json_with_single_quotes() { + // LLM sometimes outputs JSON with single-quoted keys/values; repair converts to double quotes (issue #28). + let input = r#"[{'line': 9, 'issue': 'Use of deprecated API'}]"#; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + assert_eq!(comments.len(), 1); + assert_eq!(comments[0].line_number, 9); + assert!(comments[0].content.contains("deprecated")); + } + // ── Bug: find_json_array uses mismatched brackets ────────────────── // // `find_json_array` uses `find('[')` (first) + `rfind(']')` (last).