Skip to content

Commit 88cb3b7

Browse files
committed
Stream-count large files & detect ambiguous DB paths
- search-ql-code: use streaming (readline) for totalMatches counting on large files in the early-exit path; eliminates TOCTOU race from prior lstatSync check - cli-tool-registry: resolveDatabasePath now collects all candidate children and throws on ambiguity instead of silently picking the first - Add tests for cross-file totalMatches accuracy under truncation, single- child DB auto-resolve, and multi-child DB ambiguity error
1 parent 87250fa commit 88cb3b7

File tree

8 files changed

+170
-22
lines changed

8 files changed

+170
-22
lines changed

client/integration-tests/primitives/tools/profile_codeql_query_from_logs/multi_query_raw_log/after/query-evaluation-detail.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# CodeQL Evaluator Profile — Predicate Detail
2-
# Generated: 2026-03-10T18:36:19.438Z
32
# Log format: raw
43
# CodeQL version: 2.23.1
54
# Use read_file with line ranges from the JSON response to access individual predicates.

client/integration-tests/primitives/tools/profile_codeql_query_from_logs/single_query_raw_log/after/query-evaluation-detail.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# CodeQL Evaluator Profile — Predicate Detail
2-
# Generated: 2026-03-10T18:36:19.453Z
32
# Log format: raw
43
# CodeQL version: 2.23.1
54
# Use read_file with line ranges from the JSON response to access individual predicates.

server/dist/codeql-development-mcp-server.js

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57013,17 +57013,30 @@ function resolveDatabasePath(dbPath) {
5701357013
}
5701457014
try {
5701557015
const entries = readdirSync2(dbPath);
57016+
const candidates = [];
5701657017
for (const entry of entries) {
5701757018
const candidate = join6(dbPath, entry);
5701857019
try {
5701957020
if (statSync2(candidate).isDirectory() && existsSync4(join6(candidate, "codeql-database.yml"))) {
57020-
logger.info(`Resolved multi-language database directory: ${dbPath} -> ${candidate}`);
57021-
return candidate;
57021+
candidates.push(candidate);
5702257022
}
5702357023
} catch {
5702457024
}
5702557025
}
57026-
} catch {
57026+
if (candidates.length === 1) {
57027+
logger.info(`Resolved database directory: ${dbPath} -> ${candidates[0]}`);
57028+
return candidates[0];
57029+
}
57030+
if (candidates.length > 1) {
57031+
const names = candidates.map((c) => basename2(c)).join(", ");
57032+
throw new Error(
57033+
`Ambiguous database path: ${dbPath} contains multiple databases (${names}). Specify the full path to the desired database subfolder.`
57034+
);
57035+
}
57036+
} catch (err) {
57037+
if (err instanceof Error && err.message.startsWith("Ambiguous database path")) {
57038+
throw err;
57039+
}
5702757040
}
5702857041
return dbPath;
5702957042
}
@@ -63058,9 +63071,21 @@ async function searchQlCode(params) {
6305863071
for (const file of filesToSearch) {
6305963072
if (collectedEnough) {
6306063073
try {
63061-
const content = readFileSync9(file, "utf-8");
63062-
for (const line of content.replace(/\r\n/g, "\n").split("\n")) {
63063-
if (regex.test(line)) totalMatches++;
63074+
const st = lstatSync(file);
63075+
if (!st.isFile()) continue;
63076+
if (st.size > MAX_FILE_SIZE_BYTES) {
63077+
const rl = createInterface3({
63078+
input: createReadStream3(file, { encoding: "utf-8" }),
63079+
crlfDelay: Infinity
63080+
});
63081+
for await (const line of rl) {
63082+
if (regex.test(line)) totalMatches++;
63083+
}
63084+
} else {
63085+
const content = readFileSync9(file, "utf-8");
63086+
for (const line of content.replace(/\r\n/g, "\n").split("\n")) {
63087+
if (regex.test(line)) totalMatches++;
63088+
}
6306463089
}
6306563090
} catch {
6306663091
}

server/dist/codeql-development-mcp-server.js.map

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

server/src/lib/cli-tool-registry.ts

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,47 @@ export type { CLIExecutionResult } from './cli-executor';
1818
/**
1919
* Resolve a database path that may point to a multi-language database root
2020
* (i.e. a directory that does not contain `codeql-database.yml` itself but
21-
* has a single language subfolder that does). This handles the common case
22-
* where vscode-codeql stores databases in a parent directory with language
21+
* has a language subfolder that does). This handles the common case where
22+
* vscode-codeql stores databases in a parent directory with language
2323
* subfolders like `javascript/`, `python/`, etc.
24+
*
25+
* When multiple candidate children are found, throws an error describing
26+
* the ambiguity so the caller can pick the right one explicitly.
2427
*/
2528
function resolveDatabasePath(dbPath: string): string {
2629
if (existsSync(join(dbPath, 'codeql-database.yml'))) {
2730
return dbPath;
2831
}
2932
try {
3033
const entries = readdirSync(dbPath);
34+
const candidates: string[] = [];
3135
for (const entry of entries) {
3236
const candidate = join(dbPath, entry);
3337
try {
3438
if (statSync(candidate).isDirectory() &&
3539
existsSync(join(candidate, 'codeql-database.yml'))) {
36-
logger.info(`Resolved multi-language database directory: ${dbPath} -> ${candidate}`);
37-
return candidate;
40+
candidates.push(candidate);
3841
}
3942
} catch {
4043
// Skip inaccessible entries
4144
}
4245
}
43-
} catch {
46+
if (candidates.length === 1) {
47+
logger.info(`Resolved database directory: ${dbPath} -> ${candidates[0]}`);
48+
return candidates[0];
49+
}
50+
if (candidates.length > 1) {
51+
const names = candidates.map((c) => basename(c)).join(', ');
52+
throw new Error(
53+
`Ambiguous database path: ${dbPath} contains multiple databases (${names}). ` +
54+
'Specify the full path to the desired database subfolder.'
55+
);
56+
}
57+
} catch (err) {
58+
// Re-throw ambiguity errors
59+
if (err instanceof Error && err.message.startsWith('Ambiguous database path')) {
60+
throw err;
61+
}
4462
// Parent directory not readable — return original path
4563
}
4664
return dbPath;

server/src/tools/codeql/search-ql-code.ts

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -293,11 +293,25 @@ export async function searchQlCode(params: {
293293
for (const file of filesToSearch) {
294294
if (collectedEnough) {
295295
// Still need totalMatches count but skip storing results.
296-
// Read directly and catch errors to avoid TOCTOU race.
296+
// Use streaming for large files to avoid loading into memory.
297297
try {
298-
const content = readFileSync(file, 'utf-8');
299-
for (const line of content.replace(/\r\n/g, '\n').split('\n')) {
300-
if (regex.test(line)) totalMatches++;
298+
const st = lstatSync(file);
299+
if (!st.isFile()) continue;
300+
if (st.size > MAX_FILE_SIZE_BYTES) {
301+
// Stream large files line-by-line for counting
302+
const rl = createInterface({
303+
input: createReadStream(file, { encoding: 'utf-8' }),
304+
crlfDelay: Infinity
305+
});
306+
for await (const line of rl) {
307+
if (regex.test(line)) totalMatches++;
308+
}
309+
} else {
310+
// Small files: read directly (fast path)
311+
const content = readFileSync(file, 'utf-8');
312+
for (const line of content.replace(/\r\n/g, '\n').split('\n')) {
313+
if (regex.test(line)) totalMatches++;
314+
}
301315
}
302316
} catch {
303317
// skip unreadable files

server/test/src/lib/cli-tool-registry.test.ts

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,21 @@
33
*/
44

55
import { describe, it, expect, vi, beforeEach } from 'vitest';
6+
import { mkdirSync, rmSync, writeFileSync } from 'fs';
7+
import { join } from 'path';
68
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp';
79
import { z } from 'zod';
8-
import {
9-
registerCLITool,
10-
defaultCLIResultProcessor,
10+
import {
11+
registerCLITool,
12+
defaultCLIResultProcessor,
1113
createCodeQLSchemas,
1214
createQLTSchemas,
1315
createBQRSResultProcessor,
1416
createDatabaseResultProcessor,
1517
CLIToolDefinition
1618
} from '../../../src/lib/cli-tool-registry';
1719
import { CLIExecutionResult } from '../../../src/lib/cli-executor';
20+
import { createTestTempDir } from '../../utils/temp-dir';
1821

1922
// Mock the CLI executor
2023
vi.mock('../../../src/lib/cli-executor', () => ({
@@ -335,6 +338,76 @@ describe('registerCLITool handler behavior', () => {
335338
);
336339
});
337340

341+
it('should auto-resolve single-child database directory for codeql_resolve_database', async () => {
342+
// Create a parent directory with one database subdirectory
343+
const parentDir = createTestTempDir('db-resolve-single-');
344+
const dbDir = join(parentDir, 'javascript');
345+
mkdirSync(dbDir, { recursive: true });
346+
writeFileSync(join(dbDir, 'codeql-database.yml'), 'primaryLanguage: javascript\n');
347+
348+
const definition: CLIToolDefinition = {
349+
name: 'codeql_resolve_database',
350+
description: 'Resolve database',
351+
command: 'codeql',
352+
subcommand: 'resolve database',
353+
inputSchema: { database: z.string() }
354+
};
355+
356+
registerCLITool(mockServer, definition);
357+
const handler = (mockServer.tool as ReturnType<typeof vi.fn>).mock.calls[0][3];
358+
359+
executeCodeQLCommand.mockResolvedValueOnce({
360+
stdout: '{"languages": ["javascript"]}',
361+
stderr: '',
362+
success: true
363+
});
364+
365+
await handler({ database: parentDir });
366+
367+
// Should resolve to the child directory
368+
expect(executeCodeQLCommand).toHaveBeenCalledWith(
369+
'resolve database',
370+
expect.any(Object),
371+
[dbDir],
372+
undefined
373+
);
374+
375+
// Cleanup
376+
rmSync(parentDir, { recursive: true, force: true });
377+
});
378+
379+
it('should error on ambiguous multi-database directory for codeql_resolve_database', async () => {
380+
// Create a parent directory with two database subdirectories
381+
const parentDir = createTestTempDir('db-resolve-ambiguous-');
382+
for (const lang of ['javascript', 'python']) {
383+
const dbDir = join(parentDir, lang);
384+
mkdirSync(dbDir, { recursive: true });
385+
writeFileSync(join(dbDir, 'codeql-database.yml'), `primaryLanguage: ${lang}\n`);
386+
}
387+
388+
const definition: CLIToolDefinition = {
389+
name: 'codeql_resolve_database',
390+
description: 'Resolve database',
391+
command: 'codeql',
392+
subcommand: 'resolve database',
393+
inputSchema: { database: z.string() }
394+
};
395+
396+
registerCLITool(mockServer, definition);
397+
const handler = (mockServer.tool as ReturnType<typeof vi.fn>).mock.calls[0][3];
398+
399+
const result = await handler({ database: parentDir });
400+
401+
// Should return an error about ambiguous databases
402+
expect(result.isError).toBe(true);
403+
expect(result.content[0].text).toContain('Ambiguous database path');
404+
expect(result.content[0].text).toContain('javascript');
405+
expect(result.content[0].text).toContain('python');
406+
407+
// Cleanup
408+
rmSync(parentDir, { recursive: true, force: true });
409+
});
410+
338411
it('should handle database parameter as positional argument for codeql_database_create', async () => {
339412
const definition: CLIToolDefinition = {
340413
name: 'codeql_database_create',

server/test/src/tools/codeql/search-ql-code.test.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,26 @@ describe('searchQlCode', () => {
174174
expect(result.results).toHaveLength(5);
175175
});
176176

177+
it('should report accurate totalMatches across files when truncated', async () => {
178+
tempDir = createTestTempDir('search-ql-code');
179+
// Create 5 files, each with 4 matches = 20 total matches
180+
for (let i = 0; i < 5; i++) {
181+
const lines = Array.from({ length: 4 }, (_, j) => `match_${i}_${j}`).join('\n');
182+
writeFileSync(join(tempDir, `File${i}.qll`), lines);
183+
}
184+
185+
const result = await searchQlCode({
186+
pattern: 'match_',
187+
paths: [tempDir],
188+
maxResults: 3
189+
});
190+
191+
expect(result.returnedMatches).toBe(3);
192+
expect(result.totalMatches).toBe(20);
193+
expect(result.truncated).toBe(true);
194+
expect(result.filesSearched).toBe(5);
195+
});
196+
177197
it('should not set truncated when within limit', async () => {
178198
const dir = setupTestFiles({
179199
'Test.qll': 'match1\nmatch2\nmatch3\n'

0 commit comments

Comments
 (0)