Skip to content

Commit a9df508

Browse files
Copilotdata-douser
andauthored
fix: address review feedback — sarifClient race, dedup formula comment, handler tests
1. Fix data race in sarifClient() by replacing nil-check with sync.Once 2. Add clarifying comment on overlap scoring formula in sarif_deduplicate_rules 3. Add 3 handler-level tests for sarif_deduplicate_rules (overlap detection, no-overlap, missing input error) Agent-Logs-Url: https://github.com/advanced-security/codeql-development-mcp-server/sessions/7aefb02d-b415-49ed-87da-f03d9f4a2b3d Co-authored-by: data-douser <70299490+data-douser@users.noreply.github.com>
1 parent 71d8452 commit a9df508

File tree

4 files changed

+127
-19
lines changed

4 files changed

+127
-19
lines changed

client/internal/github/client.go

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,17 @@ import (
77
"fmt"
88
"net/url"
99
"strconv"
10+
"sync"
1011

1112
ghapi "github.com/cli/go-gh/v2/pkg/api"
1213
)
1314

1415
// Client wraps the go-gh REST client for Code Scanning API calls.
1516
type Client struct {
16-
rest *ghapi.RESTClient
17-
sarifRest *ghapi.RESTClient // lazily initialized; uses Accept: application/sarif+json
17+
rest *ghapi.RESTClient
18+
sarifRest *ghapi.RESTClient // lazily initialized; uses Accept: application/sarif+json
19+
sarifOnce sync.Once
20+
sarifInitErr error
1821
}
1922

2023
// NewClient creates a new GitHub API client using gh auth credentials.
@@ -32,22 +35,21 @@ func NewClient() (*Client, error) {
3235
}
3336

3437
// sarifClient returns or lazily creates a REST client with Accept: application/sarif+json.
38+
// Uses sync.Once to prevent data races from concurrent goroutines.
3539
func (c *Client) sarifClient() (*ghapi.RESTClient, error) {
36-
if c.sarifRest != nil {
37-
return c.sarifRest, nil
38-
}
39-
sarifOpts := ghapi.ClientOptions{
40-
Headers: map[string]string{
41-
"Accept": "application/sarif+json",
42-
"X-GitHub-Api-Version": "2022-11-28",
43-
},
44-
}
45-
client, err := ghapi.NewRESTClient(sarifOpts)
46-
if err != nil {
47-
return nil, fmt.Errorf("create SARIF client: %w", err)
48-
}
49-
c.sarifRest = client
50-
return c.sarifRest, nil
40+
c.sarifOnce.Do(func() {
41+
sarifOpts := ghapi.ClientOptions{
42+
Headers: map[string]string{
43+
"Accept": "application/sarif+json",
44+
"X-GitHub-Api-Version": "2022-11-28",
45+
},
46+
}
47+
c.sarifRest, c.sarifInitErr = ghapi.NewRESTClient(sarifOpts)
48+
if c.sarifInitErr != nil {
49+
c.sarifInitErr = fmt.Errorf("create SARIF client: %w", c.sarifInitErr)
50+
}
51+
})
52+
return c.sarifRest, c.sarifInitErr
5153
}
5254

5355
// ListAnalysesOptions configures the list analyses request.

server/dist/codeql-development-mcp-server.js.map

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

server/src/tools/sarif-tools.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,11 @@ function registerSarifDeduplicateRulesTool(server: McpServer): void {
495495
}
496496
}
497497

498+
// Overlap scoring: We use the higher of two matching strategies:
499+
// 1. Location-based: `overlaps.length` from full-path structural comparison
500+
// 2. Fingerprint-based: `matchedAIndices.size` from partialFingerprints
501+
// The score is Jaccard-like: matchedAlerts / (totalA + totalB - matchedAlerts).
502+
// We cap matchedAlerts at min(totalA, totalB) so unmatched counts stay non-negative.
498503
const matchedAlerts = Math.max(overlaps.length, matchedAIndices.size);
499504
const minResults = Math.min(resultsA.length, resultsB.length);
500505
// Cap matched alerts at the smaller set size to avoid negative unmatched counts

server/test/src/tools/sarif-tools.test.ts

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,5 +402,106 @@ describe('SARIF Tools', () => {
402402
expect(result.content[0].text).toContain('Invalid SARIF');
403403
});
404404
});
405+
406+
describe('sarif_deduplicate_rules', () => {
407+
it('should detect duplicate rules across two SARIF files with overlapping results', async () => {
408+
// Create SARIF B with same rule and matching codeFlow locations to A.
409+
// Both results must have codeFlows for full-path overlap to detect them.
410+
const sarifB = {
411+
version: '2.1.0',
412+
runs: [{
413+
tool: {
414+
driver: {
415+
name: 'CodeQL',
416+
version: '2.25.1',
417+
rules: [
418+
{ id: 'js/sql-injection-v2', shortDescription: { text: 'SQL injection (v2)' } },
419+
],
420+
},
421+
},
422+
results: [
423+
{
424+
ruleId: 'js/sql-injection-v2',
425+
ruleIndex: 0,
426+
message: { text: 'SQL injection from user input.' },
427+
locations: [{ physicalLocation: { artifactLocation: { uri: 'src/db.js' }, region: { startLine: 42, startColumn: 5, endColumn: 38 } } }],
428+
codeFlows: [{ threadFlows: [{ locations: [
429+
{ location: { physicalLocation: { artifactLocation: { uri: 'src/handler.js' }, region: { startLine: 10 } }, message: { text: 'req.query.id' } } },
430+
{ location: { physicalLocation: { artifactLocation: { uri: 'src/db.js' }, region: { startLine: 42 } }, message: { text: 'query(...)' } } },
431+
] }] }],
432+
},
433+
],
434+
}],
435+
};
436+
const pathB = join(testStorageDir, 'dedup-b.sarif');
437+
writeFileSync(pathB, JSON.stringify(sarifB));
438+
439+
// Use a low threshold since only the codeFlow result will match (1 of 2 in A)
440+
const result = await handlers.sarif_deduplicate_rules({
441+
sarifPathA: testSarifPath,
442+
sarifPathB: pathB,
443+
overlapThreshold: 0.3,
444+
});
445+
const parsed = JSON.parse(result.content[0].text);
446+
447+
expect(parsed.summary.totalRulesA).toBe(2);
448+
expect(parsed.summary.totalRulesB).toBe(1);
449+
expect(parsed.duplicateGroups.length).toBeGreaterThan(0);
450+
451+
// The js/sql-injection (A) vs js/sql-injection-v2 (B) pair should be detected
452+
const sqlPair = parsed.duplicateGroups.find(
453+
(g: any) => g.ruleIdA === 'js/sql-injection' && g.ruleIdB === 'js/sql-injection-v2',
454+
);
455+
expect(sqlPair).toBeDefined();
456+
expect(sqlPair.overlapScore).toBeGreaterThan(0);
457+
expect(sqlPair.matchedAlerts).toBeGreaterThan(0);
458+
expect(sqlPair.totalA).toBe(2);
459+
expect(sqlPair.totalB).toBe(1);
460+
});
461+
462+
it('should return empty groups when no rules overlap', async () => {
463+
// Create SARIF B with completely different rule and locations
464+
const sarifB = {
465+
version: '2.1.0',
466+
runs: [{
467+
tool: {
468+
driver: {
469+
name: 'CodeQL',
470+
version: '2.25.1',
471+
rules: [
472+
{ id: 'py/command-injection', shortDescription: { text: 'Command injection' } },
473+
],
474+
},
475+
},
476+
results: [
477+
{
478+
ruleId: 'py/command-injection',
479+
ruleIndex: 0,
480+
message: { text: 'Command injection.' },
481+
locations: [{ physicalLocation: { artifactLocation: { uri: 'src/run.py' }, region: { startLine: 100, startColumn: 1, endColumn: 20 } } }],
482+
},
483+
],
484+
}],
485+
};
486+
const pathB = join(testStorageDir, 'dedup-none.sarif');
487+
writeFileSync(pathB, JSON.stringify(sarifB));
488+
489+
const result = await handlers.sarif_deduplicate_rules({
490+
sarifPathA: testSarifPath,
491+
sarifPathB: pathB,
492+
});
493+
const parsed = JSON.parse(result.content[0].text);
494+
495+
expect(parsed.summary.overlapThreshold).toBe(0.8);
496+
expect(parsed.duplicateGroups).toHaveLength(0);
497+
});
498+
499+
it('should return error when sarifPathA is missing', async () => {
500+
const result = await handlers.sarif_deduplicate_rules({
501+
sarifPathB: testSarifPath,
502+
});
503+
expect(result.content[0].text).toContain('Either sarifPath or cacheKey is required');
504+
});
505+
});
405506
});
406507
});

0 commit comments

Comments
 (0)