Skip to content

Commit 5e44d7a

Browse files
committed
Make copilot a linter, not an author
1 parent 1846938 commit 5e44d7a

4 files changed

Lines changed: 381 additions & 40 deletions

File tree

scripts/generate_rules/misra_help/dump_rules_json.py

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,36 @@
7676
)
7777

7878

79+
def _load_impl_scope_lookup(
80+
query_repo: Path, standard: str,
81+
) -> dict[tuple[str, str], dict]:
82+
"""Build a (rule_id, short_name) -> implementation_scope lookup
83+
from the rule_packages JSON files."""
84+
lang, _ = STANDARD_INFO[standard]
85+
pkg_dir = query_repo / "rule_packages" / lang
86+
if not pkg_dir.is_dir():
87+
return {}
88+
lookup: dict[tuple[str, str], dict] = {}
89+
for pkg_file in sorted(pkg_dir.glob("*.json")):
90+
try:
91+
data = json.loads(pkg_file.read_text(encoding="utf-8"))
92+
except (OSError, json.JSONDecodeError):
93+
continue
94+
# Top-level key is the standard name (e.g. "MISRA-C-2012").
95+
for std_key, rules in data.items():
96+
if not isinstance(rules, dict):
97+
continue
98+
for rule_id, rule_data in rules.items():
99+
if not isinstance(rule_data, dict):
100+
continue
101+
for q in rule_data.get("queries", []):
102+
sn = q.get("short_name")
103+
impl = q.get("implementation_scope")
104+
if sn and impl:
105+
lookup[(rule_id, sn)] = impl
106+
return lookup
107+
108+
79109
def _rule_to_jsonable(rule: Rule) -> dict:
80110
"""Serialize a Rule to JSON, including the example layout."""
81111
d = asdict(rule)
@@ -89,7 +119,9 @@ def _rule_to_jsonable(rule: Rule) -> dict:
89119

90120
def _query_entries(rule_id: str, ql_paths: list[Path],
91121
query_repo: Path, help_repo: Path,
92-
lang_src: Path) -> list[dict]:
122+
lang_src: Path,
123+
impl_lookup: dict[tuple[str, str], dict] | None = None,
124+
) -> list[dict]:
93125
out: list[dict] = []
94126
for ql in sorted(ql_paths):
95127
rel_dir = ql.parent.relative_to(query_repo / lang_src)
@@ -98,12 +130,17 @@ def _query_entries(rule_id: str, ql_paths: list[Path],
98130
existing = md.read_text(encoding="utf-8")
99131
except FileNotFoundError:
100132
existing = None
101-
out.append({
133+
entry: dict = {
102134
"ql_path": str(ql.relative_to(query_repo)),
103135
"ql_name_title": _read_ql_name(ql) or "",
104136
"md_path": str(md.relative_to(help_repo)),
105137
"existing_md": existing,
106-
})
138+
}
139+
if impl_lookup:
140+
impl = impl_lookup.get((rule_id, ql.stem))
141+
if impl:
142+
entry["implementation_scope"] = impl
143+
out.append(entry)
107144
return out
108145

109146

@@ -129,14 +166,17 @@ def main() -> int:
129166
lang, lang_src = STANDARD_INFO[args.standard]
130167
queries = collect_queries(args.query_repo, args.standard)
131168

169+
impl_lookup = _load_impl_scope_lookup(args.query_repo, args.standard)
170+
132171
rules_json: dict[str, dict] = {}
133172
for r in rules:
134173
rules_json[r.rule_id] = _rule_to_jsonable(r)
135174

136175
queries_json: dict[str, list[dict]] = {}
137176
for rule_id, ql_paths in queries.items():
138177
queries_json[rule_id] = _query_entries(
139-
rule_id, ql_paths, args.query_repo, args.help_repo, lang_src)
178+
rule_id, ql_paths, args.query_repo, args.help_repo, lang_src,
179+
impl_lookup)
140180

141181
payload = {
142182
"standard": args.standard,

scripts/generate_rules/misra_help/extract_rules.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ def _format_code_lines(text: str) -> str:
560560
# ----------------------------------------------------------------------------
561561

562562
STD_DISPLAY = {
563-
"MISRA-C-2023": "MISRA C 2023",
563+
"MISRA-C-2023": "MISRA C 2012",
564564
"MISRA-C-2012": "MISRA C 2012",
565565
"MISRA-C++-2023": "MISRA C++ 2023",
566566
}
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
"""Re-generate query help files in two stages without needing docling.
2+
3+
This script reuses the existing .misra-rule-cache/<standard>.json
4+
(produced by a prior dump_rules_json.py run) to:
5+
6+
Stage 1: Deterministically re-render every .md from the cached rule
7+
data via render_help().
8+
Patch: Update the cache JSON with current existing_md content and
9+
implementation_scope from rule_packages/*.json.
10+
Stage 2: Run rewrite_help.py (LLM lint/proofread) over the patched
11+
cache.
12+
13+
Usage:
14+
python refresh_help.py --standard MISRA-C-2012
15+
python refresh_help.py --standard MISRA-C++-2023
16+
python refresh_help.py --standard MISRA-C-2012 --stage1-only
17+
"""
18+
from __future__ import annotations
19+
20+
import argparse
21+
import json
22+
import subprocess
23+
import sys
24+
from dataclasses import dataclass
25+
from pathlib import Path
26+
from typing import Any
27+
28+
sys.path.insert(0, str(Path(__file__).parent))
29+
from extract_rules import Rule, render_help, _format_code_lines # noqa: E402
30+
31+
SCRIPT_DIR = Path(__file__).resolve().parent
32+
QUERY_REPO = SCRIPT_DIR.parents[2]
33+
DEFAULT_HELP_REPO = QUERY_REPO.parent / "codeql-coding-standards-help"
34+
35+
STANDARD_INFO = {
36+
"MISRA-C-2012": ("c", "c/misra/src/rules"),
37+
"MISRA-C-2023": ("c", "c/misra/src/rules"),
38+
"MISRA-C++-2023": ("cpp", "cpp/misra/src/rules"),
39+
}
40+
41+
42+
def _rule_from_json(d: dict[str, Any]) -> Rule:
43+
"""Reconstruct a Rule from the cache JSON dict."""
44+
r = Rule(
45+
rule_id=d["rule_id"],
46+
raw_id=d["raw_id"],
47+
standard=d["standard"],
48+
title=d["title"],
49+
category=d.get("category", ""),
50+
analysis=d.get("analysis", ""),
51+
applies_to=d.get("applies_to", ""),
52+
amplification=d.get("amplification", ""),
53+
rationale=d.get("rationale", ""),
54+
exceptions=d.get("exceptions", []),
55+
example=d.get("example", ""),
56+
see_also=d.get("see_also", []),
57+
)
58+
# Restore example_layout if present.
59+
layout = d.get("example_layout", [])
60+
if layout:
61+
r._example_layout = [(item["kind"], item["text"]) for item in layout]
62+
return r
63+
64+
65+
def _load_impl_scope_lookup(
66+
query_repo: Path, standard: str,
67+
) -> dict[tuple[str, str], dict]:
68+
"""Build (rule_id, short_name) -> implementation_scope from rule_packages."""
69+
lang, _ = STANDARD_INFO[standard]
70+
pkg_dir = query_repo / "rule_packages" / lang
71+
if not pkg_dir.is_dir():
72+
return {}
73+
lookup: dict[tuple[str, str], dict] = {}
74+
for pkg_file in sorted(pkg_dir.glob("*.json")):
75+
try:
76+
data = json.loads(pkg_file.read_text(encoding="utf-8"))
77+
except (OSError, json.JSONDecodeError):
78+
continue
79+
for _std_key, rules in data.items():
80+
if not isinstance(rules, dict):
81+
continue
82+
for rule_id, rule_data in rules.items():
83+
if not isinstance(rule_data, dict):
84+
continue
85+
for q in rule_data.get("queries", []):
86+
sn = q.get("short_name")
87+
impl = q.get("implementation_scope")
88+
if sn and impl:
89+
lookup[(rule_id, sn)] = impl
90+
return lookup
91+
92+
93+
def stage1_render(cache: dict, help_repo: Path) -> tuple[int, int]:
94+
"""Re-render all .md files from cached rule data. Returns (wrote, skipped)."""
95+
lang = cache["lang"]
96+
rules_json = cache["rules"]
97+
queries_json = cache["queries"]
98+
99+
wrote = skipped = 0
100+
for rule_id, query_list in sorted(queries_json.items()):
101+
rule_data = rules_json.get(rule_id)
102+
if not rule_data:
103+
skipped += len(query_list)
104+
continue
105+
rule = _rule_from_json(rule_data)
106+
body = render_help(rule, lang)
107+
for q in query_list:
108+
md_path = help_repo / q["md_path"]
109+
md_path.parent.mkdir(parents=True, exist_ok=True)
110+
md_path.write_text(body, encoding="utf-8")
111+
wrote += 1
112+
113+
return wrote, skipped
114+
115+
116+
def patch_cache(
117+
cache: dict, help_repo: Path, query_repo: Path, standard: str,
118+
) -> dict:
119+
"""Update existing_md and add implementation_scope to the cache."""
120+
impl_lookup = _load_impl_scope_lookup(query_repo, standard)
121+
queries_json = cache["queries"]
122+
123+
for rule_id, query_list in queries_json.items():
124+
for q in query_list:
125+
md_path = help_repo / q["md_path"]
126+
try:
127+
q["existing_md"] = md_path.read_text(encoding="utf-8")
128+
except FileNotFoundError:
129+
q["existing_md"] = None
130+
131+
# Add implementation_scope from rule_packages.
132+
ql_stem = Path(q["ql_path"]).stem
133+
impl = impl_lookup.get((rule_id, ql_stem))
134+
if impl:
135+
q["implementation_scope"] = impl
136+
elif "implementation_scope" in q:
137+
del q["implementation_scope"]
138+
139+
return cache
140+
141+
142+
def main() -> int:
143+
p = argparse.ArgumentParser(description=__doc__,
144+
formatter_class=argparse.RawDescriptionHelpFormatter)
145+
p.add_argument("--standard", required=True, choices=sorted(STANDARD_INFO))
146+
p.add_argument("--help-repo", type=Path, default=DEFAULT_HELP_REPO)
147+
p.add_argument("--query-repo", type=Path, default=QUERY_REPO)
148+
p.add_argument("--stage1-only", action="store_true",
149+
help="Only run deterministic stage 1 (no LLM).")
150+
p.add_argument("--model", default=None,
151+
help="Copilot model id for stage 2.")
152+
args = p.parse_args()
153+
154+
help_repo = args.help_repo.resolve()
155+
cache_path = help_repo / ".misra-rule-cache" / f"{args.standard}.json"
156+
if not cache_path.exists():
157+
print(f"Cache not found: {cache_path}", file=sys.stderr)
158+
return 2
159+
160+
cache = json.loads(cache_path.read_text(encoding="utf-8"))
161+
total_queries = sum(len(v) for v in cache["queries"].values())
162+
print(f"Loaded cache: {len(cache['rules'])} rules, {total_queries} queries")
163+
164+
# Stage 1: deterministic render.
165+
print("\n=== Stage 1: deterministic render ===")
166+
wrote, skipped = stage1_render(cache, help_repo)
167+
print(f"Stage 1 done: wrote={wrote} skipped={skipped}")
168+
169+
# Patch cache with fresh existing_md + implementation_scope.
170+
print("\n=== Patching cache ===")
171+
cache = patch_cache(cache, help_repo, args.query_repo, args.standard)
172+
cache_path.write_text(
173+
json.dumps(cache, indent=2, ensure_ascii=False), encoding="utf-8")
174+
impl_count = sum(
175+
1 for qs in cache["queries"].values()
176+
for q in qs if q.get("implementation_scope")
177+
)
178+
print(f"Cache updated: implementation_scope on {impl_count} queries")
179+
180+
if args.stage1_only:
181+
print("\n--stage1-only: skipping LLM pass.")
182+
return 0
183+
184+
# Stage 2: LLM lint/proofread via rewrite_help.py.
185+
print("\n=== Stage 2: LLM lint/proofread ===")
186+
cmd = [
187+
sys.executable,
188+
str(SCRIPT_DIR / "rewrite_help.py"),
189+
"--standard", args.standard,
190+
"--help-repo", str(help_repo),
191+
]
192+
if args.model:
193+
cmd += ["--model", args.model]
194+
print(f"Running: {' '.join(cmd)}")
195+
return subprocess.call(cmd)
196+
197+
198+
if __name__ == "__main__":
199+
raise SystemExit(main())

0 commit comments

Comments
 (0)