diff --git a/README.md b/README.md index ebe453af..5739e843 100644 --- a/README.md +++ b/README.md @@ -148,12 +148,34 @@ Useful options: - `--refresh-image` (rebuild local Docker image) - `--refresh` (both cache and image refresh) - `--no-cache-volume` (run without persistent cache) +- `--lock-timeout SECONDS` (wait for shared cache-volume lock) +- `--no-lock` (disable cache-volume serialization; unsafe if shared) > [!TIP] > > This is useful for editor build systems and for AI agents, > because test runs no longer commandeer your active editor window. +You can also add the headless runner as build system: + +```json +"build_systems": +[ + { + "name": "Test Package (Docker)", + "shell_cmd": "\"${packages}\\UnitTesting\\docker\\ut-run-tests.cmd\" .", + "working_dir": "$project_path", + "file_regex": "^\\s*File \"/root/\\.config/sublime-text/Packages/[^/]+/(.+)\", line ([0-9]+)", + }, + { + "name": "Test Current File (Docker)", + "shell_cmd": "\"${packages}\\UnitTesting\\docker\\ut-run-tests.cmd\" . --file \"${file}\"", + "working_dir": "$project_path", + "file_regex": "^\\s*File \"/root/\\.config/sublime-text/Packages/[^/]+/(.+)\", line ([0-9]+)", + } +] +``` + ## GitHub Actions diff --git a/docker/README.md b/docker/README.md index b3b33e13..c55e549b 100644 --- a/docker/README.md +++ b/docker/README.md @@ -50,6 +50,38 @@ The container entrypoint writes a marker in `/root/.cache/unittesting`. With `-v unittesting-home:/root`, bootstrap/install runs once and later runs only refresh your package files and execute tests. +## Serialized runs + +The shared cache volume contains the Sublime data directory, including +`Packages`, `Lib`, UnitTesting schedules and test output files. Concurrent +runs against the same volume are serialized by default to avoid races while +copying packages, writing schedules and syncing Package Control libraries. + +Use `--lock-timeout SECONDS` to control how long a runner waits for the cache +volume lock. Use `--no-lock` only if you know the selected cache volume is not +shared by another runner. + +## Concurrent runs + +You can control concurrency by choosing how many cache volumes you use. The +default single volume serializes all runs. A stable volume per package allows +different packages to run concurrently while still keeping warm caches: + +```sh +ut-run-tests . --cache-volume unittesting-home-gitsavvy +``` + +To maximize concurrency, use a stable volume per checkout directory. For +example, in a POSIX shell: + +```sh +volume="unittesting-home-$(pwd -P | sha256sum | cut -c1-12)" +ut-run-tests . --cache-volume "$volume" +``` + +Runs that choose different volumes do not wait on each other. Runs that choose +the same volume remain serialized. + ## Refresh/update controls (without direct docker commands) Use launcher flags instead of calling `docker` manually: diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 945dde83..05dfa2d6 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -6,6 +6,7 @@ set -e PATH="$HOME/.local/bin:$PATH" BOOTSTRAP_MARKER="$HOME/.cache/unittesting/bootstrap.done" +DEPENDENCY_MARKER_DIR="$HOME/.cache/unittesting/package-dependencies" ensure_git_identity() { # Align local container behavior with typical CI runners where git @@ -49,6 +50,40 @@ ensure_ci_platform_compat() { fi } +package_control_sync_required() { + if [ ! -f "${ST_PACKAGES_DIR%/*}/Installed Packages/Package Control.sublime-package" ]; then + return 0 + fi + + local dependency_fingerprint + dependency_fingerprint="$(package_dependency_fingerprint)" + if [ -z "$dependency_fingerprint" ]; then + return 1 + fi + + [ "$(cat "$DEPENDENCY_MARKER_DIR/$PACKAGE.sha256" 2>/dev/null || true)" != "$dependency_fingerprint" ] +} + +package_dependency_fingerprint() { + local dependencies_file="$ST_PACKAGES_DIR/$PACKAGE/dependencies.json" + if [ ! -f "$dependencies_file" ]; then + return 0 + fi + + sha256sum "$dependencies_file" | awk '{ print $1 }' +} + +write_package_dependency_marker() { + local dependency_fingerprint + dependency_fingerprint="$(package_dependency_fingerprint)" + if [ -z "$dependency_fingerprint" ]; then + return + fi + + mkdir -p "$DEPENDENCY_MARKER_DIR" + printf '%s\n' "$dependency_fingerprint" > "$DEPENDENCY_MARKER_DIR/$PACKAGE.sha256" +} + sudo sh -e /etc/init.d/xvfb start ensure_git_identity ensure_ci_platform_compat @@ -72,18 +107,30 @@ if [ -d "$UNITTESTING_SOURCE/sbin" ]; then fi fi +NEEDS_PACKAGE_CONTROL_SYNC=false if [ ! -f "$BOOTSTRAP_MARKER" ]; then # Bootstrap from a neutral cwd to avoid Windows worktree .git indirection # paths breaking git commands inside the Linux container. (cd /tmp && /docker.sh bootstrap skip_package_copy) + NEEDS_PACKAGE_CONTROL_SYNC=true +fi + +# Always refresh checked-out package into Packages/ before syncing +# Package Control libraries, so dependencies.json from the package under test +# is visible to Package Control. +/docker.sh copy_tested_package overwrite + +if package_control_sync_required; then + NEEDS_PACKAGE_CONTROL_SYNC=true +fi + +if [ "$NEEDS_PACKAGE_CONTROL_SYNC" = true ]; then /docker.sh install_package_control + write_package_dependency_marker mkdir -p "$(dirname "$BOOTSTRAP_MARKER")" touch "$BOOTSTRAP_MARKER" fi -# Always refresh checked-out package into Packages/ -/docker.sh copy_tested_package overwrite - if [ "$#" -gt 0 ]; then /docker.sh "$@" else diff --git a/docker/run_tests.py b/docker/run_tests.py index 1f26fc19..23ad2418 100644 --- a/docker/run_tests.py +++ b/docker/run_tests.py @@ -11,14 +11,18 @@ import argparse import hashlib +import os import shutil import subprocess import sys +import tempfile +import time from pathlib import Path DEFAULT_IMAGE = "unittesting-local" DEFAULT_CACHE_VOLUME = "unittesting-home" +DEFAULT_LOCK_TIMEOUT = 3600 DOCKER_CONTEXT_HASH_LABEL = "org.sublimetext.unittesting.context-hash" DOCKER_CONTEXT_INPUTS = ( "Dockerfile", @@ -43,8 +47,15 @@ def main(argv: list[str] | None = None) -> int: maybe_build_image(image, refresh=True) if args.cache_volume and args.refresh_cache: - reset_docker_volume(args.cache_volume) - ensure_docker_volume(args.cache_volume) + if should_lock_cache(args): + with CacheVolumeLock(args.cache_volume, args.lock_timeout): + wait_for_cache_volume_idle(args.cache_volume, args.lock_timeout) + remove_stale_runner_container(args.cache_volume) + reset_docker_volume(args.cache_volume) + ensure_docker_volume(args.cache_volume) + else: + reset_docker_volume(args.cache_volume) + ensure_docker_volume(args.cache_volume) print("Refresh complete.") return 0 @@ -72,12 +83,15 @@ def main(argv: list[str] | None = None) -> int: if args.cache_volume: ensure_docker_volume(args.cache_volume) + lock_enabled = should_lock_cache(args) + runner_name = docker_cache_runner_name(args.cache_volume) if lock_enabled else None command = build_docker_run_command( package_root=package_root, unit_testing_root=unit_testing_root, package_name=package_name, image=image, cache_volume=args.cache_volume, + container_name=runner_name, scheduler_delay_ms=args.scheduler_delay_ms, coverage=args.coverage, failfast=args.failfast, @@ -96,11 +110,21 @@ def main(argv: list[str] | None = None) -> int: print("Image refresh: enabled") if args.cache_volume: print(f"Cache volume: {args.cache_volume}") + if lock_enabled: + print("Cache lock: enabled") if args.refresh_cache: print("Cache refresh: enabled") if tests_dir and pattern: print(f"Test target: {tests_dir}/{pattern}") + if lock_enabled: + with CacheVolumeLock(args.cache_volume, args.lock_timeout): + wait_for_cache_volume_idle(args.cache_volume, args.lock_timeout) + ensure_runner_container_name_available(args.cache_volume, args.lock_timeout) + return call_docker_run_with_name_retry( + command, args.cache_volume, args.lock_timeout + ) + return subprocess.call(command) @@ -186,6 +210,20 @@ def parse_args(argv: list[str] | None) -> argparse.Namespace: "are re-installed." ), ) + docker_group.add_argument( + "--lock-timeout", + type=float, + default=DEFAULT_LOCK_TIMEOUT, + help=( + "Seconds to wait for another runner using the same cache volume " + f"(default: {DEFAULT_LOCK_TIMEOUT})." + ), + ) + docker_group.add_argument( + "--no-lock", + action="store_true", + help="Disable cache-volume serialization (unsafe for concurrent runs).", + ) args = parser.parse_args(argv) @@ -198,6 +236,9 @@ def parse_args(argv: list[str] | None) -> argparse.Namespace: if args.dry_run and (args.refresh or args.refresh_image or args.refresh_cache): parser.error("--dry-run cannot be combined with --refresh* options") + if args.lock_timeout < 0: + parser.error("--lock-timeout must be greater than or equal to 0") + return args @@ -342,6 +383,7 @@ def build_docker_run_command( package_name: str, image: str, cache_volume: str | None, + container_name: str | None, scheduler_delay_ms: int, coverage: bool, failfast: bool, @@ -352,6 +394,9 @@ def build_docker_run_command( pattern: str | None, ) -> list[str]: command = ["docker", "run", "--rm"] + if container_name: + command.extend(["--name", container_name]) + if sys.stdin.isatty(): command.append("-i") if sys.stdout.isatty(): @@ -393,6 +438,265 @@ def build_docker_run_command( return command +def should_lock_cache(args: argparse.Namespace) -> bool: + return bool(args.cache_volume and not args.no_lock) + + +def call_docker_run_with_name_retry( + command: list[str], cache_volume: str, timeout: float +) -> int: + deadline = time.monotonic() + timeout + + while True: + returncode = subprocess.call(command) + if returncode != 125: + return returncode + + # Docker returns 125 for daemon/CLI errors, including the atomic + # container-name conflict we use as a last line of defense if another + # launcher did not observe the host-side lock. + if docker_container_status(docker_cache_runner_name(cache_volume)) is None: + return returncode + + remaining = deadline - time.monotonic() + if remaining <= 0: + return returncode + + ensure_runner_container_name_available(cache_volume, remaining) + + +def wait_for_cache_volume_idle(cache_volume: str, timeout: float) -> None: + deadline = time.monotonic() + timeout + next_notice = 0.0 + + while True: + container_names = docker_running_container_names_using_volume(cache_volume) + if not container_names: + return + + now = time.monotonic() + if now >= deadline: + names = ", ".join(container_names) + raise SystemExit( + f"Timed out waiting for Docker volume '{cache_volume}' " + f"to become idle. Running containers: {names}" + ) + + if now >= next_notice: + names = ", ".join(container_names) + print( + f"Waiting for Docker volume '{cache_volume}' " + f"to become idle: {names}" + ) + next_notice = now + 5 + + time.sleep(min(1, max(0, deadline - now))) + + +def ensure_runner_container_name_available(cache_volume: str, timeout: float) -> None: + runner_name = docker_cache_runner_name(cache_volume) + deadline = time.monotonic() + timeout + next_notice = 0.0 + + while True: + status = docker_container_status(runner_name) + if status is None: + return + + if status in ("created", "exited", "dead"): + remove_docker_container(runner_name) + return + + now = time.monotonic() + if now >= deadline: + raise SystemExit( + f"Timed out waiting for Docker container '{runner_name}' " + f"to finish. Current status: {status}" + ) + + if now >= next_notice: + print( + f"Waiting for Docker container '{runner_name}' " + f"to finish. Current status: {status}" + ) + next_notice = now + 5 + + time.sleep(min(1, max(0, deadline - now))) + + +def remove_stale_runner_container(cache_volume: str) -> None: + runner_name = docker_cache_runner_name(cache_volume) + status = docker_container_status(runner_name) + if status in ("created", "exited", "dead"): + remove_docker_container(runner_name) + + +def docker_cache_runner_name(cache_volume: str) -> str: + digest = hashlib.sha256(cache_volume.encode("utf-8")).hexdigest()[:16] + return f"unittesting-runner-{digest}" + + +def docker_running_container_names_using_volume(cache_volume: str) -> list[str]: + result = subprocess.run( + [ + "docker", + "ps", + "--filter", + f"volume={cache_volume}", + "--format", + "{{.Names}}", + ], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + ) + if result.returncode != 0: + return [] + + return [name for name in result.stdout.splitlines() if name] + + +def docker_container_status(container_name: str) -> str | None: + result = subprocess.run( + [ + "docker", + "container", + "inspect", + container_name, + "--format", + "{{.State.Status}}", + ], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + ) + if result.returncode != 0: + return None + + return result.stdout.strip() or None + + +def remove_docker_container(container_name: str) -> None: + run_checked(["docker", "container", "rm", container_name]) + + +class CacheVolumeLock: + def __init__(self, cache_volume: str, timeout: float) -> None: + self.cache_volume = cache_volume + self.timeout = timeout + self.path = cache_lock_file_path(cache_volume) + self.file = None + + def __enter__(self) -> "CacheVolumeLock": + self.path.parent.mkdir(parents=True, exist_ok=True) + self.file = self.path.open("a+", encoding="utf-8") + + deadline = time.monotonic() + self.timeout + next_notice = 0.0 + + while True: + if try_lock_file(self.file): + self.write_lock_info() + return self + + now = time.monotonic() + if now >= deadline: + raise SystemExit( + f"Timed out waiting for UnitTesting Docker cache lock: {self.path}" + ) + + if now >= next_notice: + print( + f"Waiting for UnitTesting Docker cache lock for " + f"volume '{self.cache_volume}'..." + ) + next_notice = now + 5 + + time.sleep(min(1, max(0, deadline - now))) + + def __exit__(self, exc_type, exc_value, traceback) -> None: + if self.file is None: + return + + unlock_file(self.file) + self.file.close() + self.file = None + + def write_lock_info(self) -> None: + assert self.file is not None + self.file.seek(0) + self.file.truncate() + self.file.write(f"volume={self.cache_volume}\n") + self.file.write(f"pid={os.getpid()}\n") + self.file.write(f"time={time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}\n") + self.file.flush() + + +def cache_lock_file_path(cache_volume: str) -> Path: + digest = hashlib.sha256(cache_volume.encode("utf-8")).hexdigest()[:16] + return Path(tempfile.gettempdir()) / "unittesting-docker-locks" / f"{digest}.lock" + + +def try_lock_file(lock_file) -> bool: + if os.name == "nt": + return try_lock_file_windows(lock_file) + + return try_lock_file_posix(lock_file) + + +def unlock_file(lock_file) -> None: + if os.name == "nt": + unlock_file_windows(lock_file) + else: + unlock_file_posix(lock_file) + + +def try_lock_file_windows(lock_file) -> bool: + import msvcrt + + ensure_lock_byte(lock_file) + lock_file.seek(0) + try: + msvcrt.locking(lock_file.fileno(), msvcrt.LK_NBLCK, 1) + except OSError: + return False + + return True + + +def unlock_file_windows(lock_file) -> None: + import msvcrt + + lock_file.seek(0) + msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1) + + +def try_lock_file_posix(lock_file) -> bool: + import fcntl + + try: + fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) + except OSError: + return False + + return True + + +def unlock_file_posix(lock_file) -> None: + import fcntl + + fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN) + + +def ensure_lock_byte(lock_file) -> None: + lock_file.seek(0, os.SEEK_END) + if lock_file.tell() != 0: + return + + lock_file.write("\0") + lock_file.flush() + + def run_checked(command: list[str]) -> None: result = subprocess.run(command) if result.returncode != 0: