diff --git a/src/borg/archiver/help_cmd.py b/src/borg/archiver/help_cmd.py index ba10f407a6..1f0fe06f1f 100644 --- a/src/borg/archiver/help_cmd.py +++ b/src/borg/archiver/help_cmd.py @@ -314,6 +314,7 @@ class HelpMixIn: - user: exact match on the username who created the archive - host: exact match on the hostname where the archive was created - tags: match on the archive tags + - date: match on the archive creation timestamp In case of a name pattern match, it uses pattern styles similar to the ones described by ``borg help patterns``: @@ -328,6 +329,28 @@ class HelpMixIn: Full regular expression support. This is very powerful, but can also get rather complicated. + Date patterns, selector ``date:`` + Match archives by creation timestamp. Supported forms are: + + - ``YYYY``: 1 year + - ``YYYY-MM``: 1 month + - ``YYYY-MM-DD``: 1 day + - ``YYYY-MM-DDTHH``: 1 hour + - ``YYYY-MM-DDTHH:MM``: 1 minute + - ``YYYY-MM-DDTHH:MM:SS``: 1 second + - ``@1735732800``: 1 second + - ``YYYY-MM-DDTHH:MM:SS.ffffff``: exact timestamp + - ``@1735732800.123456``: exact timestamp + + Date and time patterns match the interval implied by their precision, including + the start and excluding the end. Fractional-second patterns accept 1 to 6 + digits and match exactly. + + Date and time patterns may include a timezone suffix: ``Z``, ``+HH:MM``, + ``-HH:MM``, or ``[Region/City]``. Patterns without a timezone are interpreted + in the local timezone. Unix timestamp patterns are UTC and do not accept a + timezone suffix. + Examples:: # name match, id: style @@ -349,7 +372,12 @@ class HelpMixIn: borg delete -a 'host:kenny-pc' # tags match - borg delete -a 'tags:TAG1' -a 'tags:TAG2'\n\n""" + borg delete -a 'tags:TAG1' -a 'tags:TAG2' + + # archive creation date match + borg delete -a 'date:2025-01' + borg delete -a 'date:2025-01-01T14:30Z' + borg delete -a 'date:2025-01-01T09:30[America/New_York]'\n\n""" ) helptext["placeholders"] = textwrap.dedent( """ diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index e54394cdc0..6b3081809e 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -1,6 +1,7 @@ import os import re -from datetime import UTC, datetime, timedelta +from datetime import UTC, datetime, timedelta, timezone +from zoneinfo import ZoneInfo def parse_timestamp(timestamp, tzinfo=UTC): @@ -198,3 +199,150 @@ def isoformat(self): def archive_ts_now(): """return tz-aware datetime obj for current time for usage as archive timestamp""" return datetime.now(UTC) # utc time / utc timezone + + +class DatePatternError(ValueError): + """Raised when a date: archive pattern cannot be parsed.""" + + +def date_match_exact(dt: datetime): + """Return predicate matching archives whose timestamp equals dt.""" + dt_utc = dt.astimezone(UTC) + return lambda ts: ts.astimezone(UTC) == dt_utc + + +def date_match_interval(start: datetime, end: datetime): + """Return predicate matching archives in the start-inclusive, end-exclusive interval.""" + start_utc = start.astimezone(UTC) + end_utc = end.astimezone(UTC) + return lambda ts: start_utc <= ts.astimezone(UTC) < end_utc + + +def parse_date_pattern_tz(tzstr: str): + """Parse a date: pattern timezone suffix.""" + if not tzstr: + return None + if tzstr == "Z": + return UTC + if tzstr[0] in "+-": + sign = 1 if tzstr[0] == "+" else -1 + try: + hh, mm = map(int, tzstr[1:].split(":")) + if not (0 <= hh <= 23 and 0 <= mm < 60): + raise ValueError + except ValueError: + raise DatePatternError("invalid UTC offset format") + total_minutes = sign * (hh * 60 + mm) + if not (-12 * 60 <= total_minutes <= 14 * 60): + raise DatePatternError("UTC offset outside ISO-8601 bounds") + return timezone(timedelta(minutes=total_minutes)) + if tzstr.startswith("[") and tzstr.endswith("]"): + try: + return ZoneInfo(tzstr[1:-1]) + except Exception: + raise DatePatternError("invalid timezone format") + raise DatePatternError("invalid timezone format") + + +DATE_PATTERN_RE = r""" + ^ + (?: + @(?P\d+)(?:\.(?P\d{1,6}))? + | + (?P\d{4}) + (?: + -(?P\d{2}) + (?: + -(?P\d{2}) + (?: + T(?P\d{2}) + (?: + :(?P\d{2}) + (?: + :(?P\d{2})(?:\.(?P\d{1,6}))? + )? + )? + )? + )? + )? + ) + (?PZ|[+\-]\d\d:\d\d|\[[^]]+\])? + $ +""" + + +def build_date_pattern_datetime(groups: dict, tz) -> datetime: + """Build the earliest datetime represented by a date: pattern.""" + second = 0 + microsecond = 0 + if groups.get("second"): + second = int(groups["second"]) + if groups.get("fraction"): + microsecond = int((groups["fraction"] + "000000")[:6]) + try: + return datetime( + year=int(groups["year"]), + month=int(groups.get("month") or 1), + day=int(groups.get("day") or 1), + hour=int(groups.get("hour") or 0), + minute=int(groups.get("minute") or 0), + second=second, + microsecond=microsecond, + tzinfo=tz, + ) + except ValueError as exc: + raise DatePatternError(str(exc)) + + +def parse_date_pattern_interval(expr: str) -> tuple[datetime, datetime]: + """Parse a static date: pattern into the interval it represents.""" + match = re.match(DATE_PATTERN_RE, expr, re.VERBOSE) + if not match: + raise DatePatternError(f"unrecognised date: {expr!r}") + + groups = match.groupdict() + tz = parse_date_pattern_tz(groups["tz"]) + + if groups["epoch"] and groups["tz"]: + raise DatePatternError("Unix timestamps must not have timezone suffixes") + + try: + if groups["epoch"]: + if groups["epoch_fraction"]: + start = _EPOCH + timedelta( + seconds=int(groups["epoch"]), microseconds=int((groups["epoch_fraction"] + "000000")[:6]) + ) + return start, start + start = _EPOCH + timedelta(seconds=int(groups["epoch"])) + return start, start + timedelta(seconds=1) + + start = build_date_pattern_datetime(groups, tz) + if groups["second"]: + if groups["fraction"]: + return start, start + return start, start + timedelta(seconds=1) + if groups["minute"]: + return start, start + timedelta(minutes=1) + if groups["hour"]: + return start, start + timedelta(hours=1) + if groups["day"]: + return start, start + timedelta(days=1) + if groups["month"]: + return start, offset_n_months(start, 1) + return start, offset_n_months(start, 12) + except (ValueError, OverflowError) as exc: + raise DatePatternError(str(exc)) + + +def compile_date_pattern(expr: str): + """ + Compile a date: archive match expression into a timestamp predicate. + + Supported expressions are static calendar timestamps from year to fractional-second precision, + optional timezone suffixes (Z, +/-HH:MM, or [Region/City]), and Unix epoch timestamps prefixed with @. + """ + expr = expr.strip() + start, end = parse_date_pattern_interval(expr) + if start == end: + return date_match_exact(start) + return date_match_interval(start, end) diff --git a/src/borg/manifest.py b/src/borg/manifest.py index 7a84a343a6..4881cf4780 100644 --- a/src/borg/manifest.py +++ b/src/borg/manifest.py @@ -15,7 +15,13 @@ from .constants import * # NOQA from .helpers.datastruct import StableDict from .helpers.parseformat import bin_to_hex, hex_to_bin -from .helpers.time import parse_timestamp, calculate_relative_offset, archive_ts_now +from .helpers.time import ( + parse_timestamp, + calculate_relative_offset, + archive_ts_now, + compile_date_pattern, + DatePatternError, +) from .helpers.errors import Error, CommandError from .crypto.low_level import IntegrityError as IntegrityErrorBase from .item import ArchiveItem @@ -231,6 +237,13 @@ def _matching_info_tuples(self, match_patterns, match_end, *, deleted=False): elif match.startswith("host:"): wanted_host = match.removeprefix("host:") archive_infos = [x for x in archive_infos if x.host == wanted_host] + elif match.startswith("date:"): + wanted_date = match.removeprefix("date:") + try: + date_matches = compile_date_pattern(wanted_date) + except DatePatternError as exc: + raise CommandError(f"Invalid date pattern: {match} ({exc})") + archive_infos = [x for x in archive_infos if date_matches(x.ts)] else: # do a match on the name match = match.removeprefix("name:") # accept optional name: prefix regex = get_regex_from_pattern(match) diff --git a/src/borg/testsuite/archiver/match_archives_date_test.py b/src/borg/testsuite/archiver/match_archives_date_test.py new file mode 100644 index 0000000000..579fe0f639 --- /dev/null +++ b/src/borg/testsuite/archiver/match_archives_date_test.py @@ -0,0 +1,349 @@ +import calendar +import time +from datetime import datetime, timezone + +import pytest + +from ...constants import * # NOQA +from ...helpers.errors import CommandError +from ...platform import is_win32 +from . import cmd, create_src_archive, generate_archiver_tests, RK_ENCRYPTION + +pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA + + +# (archive_name, timestamp) +YEAR_ARCHIVES = [ + ("archive-year-start", "2025-01-01T00:00:00+00:00"), + ("archive-year-same", "2025-12-31T23:59:59+00:00"), + ("archive-year-diff", "2024-12-31T23:59:59+00:00"), +] + +MONTH_ARCHIVES = [ + ("archive-mon-start", "2025-02-01T00:00:00+00:00"), + ("archive-mon-same", "2025-02-28T23:59:59+00:00"), + ("archive-mon-diff", "2025-01-31T23:59:59+00:00"), +] + +DAY_ARCHIVES = [ + ("archive-day-start", "2025-01-02T00:00:00+00:00"), + ("archive-day-same", "2025-01-02T23:59:59+00:00"), + ("archive-day-diff", "2025-01-01T23:59:59+00:00"), +] + +HOUR_ARCHIVES = [ + ("archive-hour-start", "2025-01-01T14:00:00+00:00"), + ("archive-hour-same", "2025-01-01T14:59:59+00:00"), + ("archive-hour-diff", "2025-01-01T13:59:59+00:00"), +] + +MINUTE_ARCHIVES = [ + ("archive-min-start", "2025-01-01T13:31:00+00:00"), + ("archive-min-same", "2025-01-01T13:31:59+00:00"), + ("archive-min-diff", "2025-01-01T13:30:59+00:00"), +] + +SECOND_ARCHIVES = [ + ("archive-sec-target", "2025-01-01T13:30:45+00:00"), + ("archive-sec-before", "2025-01-01T13:30:44+00:00"), + ("archive-sec-after", "2025-01-01T13:30:46+00:00"), +] + + +def test_match_archives_year(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in YEAR_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # older-year should only hit the 2024 filter + out_2024 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2024Z", exit_code=0) + assert "archive-year-diff" in out_2024 + assert "archive-year-start" not in out_2024 + assert "archive-year-same" not in out_2024 + + # 2025 filter should hit both minimum and maximum possible days in 2025 + out_2025 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025Z", exit_code=0) + assert "archive-year-start" in out_2025 + assert "archive-year-same" in out_2025 + assert "archive-year-diff" not in out_2025 + + +def test_match_archives_month(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in MONTH_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # January only includes January + out_jan = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01Z", exit_code=0) + assert "archive-mon-diff" in out_jan + assert "archive-mon-start" not in out_jan + assert "archive-mon-same" not in out_jan + + # February includes minimum and maximum possible days in February + out_feb = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-02Z", exit_code=0) + assert "archive-mon-start" in out_feb + assert "archive-mon-same" in out_feb + assert "archive-mon-diff" not in out_feb + + +def test_match_archives_day(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in DAY_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # 2025-01-01 only includes 2025-01-01 + out_01 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01Z", exit_code=0) + assert "archive-day-diff" in out_01 + assert "archive-day-start" not in out_01 + assert "archive-day-same" not in out_01 + + # 2025-01-02 includes minimum and maximum possible times in 2025-01-02 + out_02 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-02Z", exit_code=0) + assert "archive-day-start" in out_02 + assert "archive-day-same" in out_02 + assert "archive-day-diff" not in out_02 + + +def test_match_archives_hour(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in HOUR_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # 13:00-range only matches 13:00 hour + out_13 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T13Z", exit_code=0) + assert "archive-hour-diff" in out_13 + assert "archive-hour-start" not in out_13 + assert "archive-hour-same" not in out_13 + + # 14:00-range matches both beginning and end of the hour + out_14 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T14Z", exit_code=0) + assert "archive-hour-start" in out_14 + assert "archive-hour-same" in out_14 + assert "archive-hour-diff" not in out_14 + + +def test_match_archives_minute(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in MINUTE_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # 13:30 only matches 13:30 minute + out_1330 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T13:30Z", exit_code=0) + assert "archive-min-diff" in out_1330 + assert "archive-min-start" not in out_1330 + assert "archive-min-same" not in out_1330 + + # 13:31 matches both beginning and end of the minute + out_1331 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T13:31Z", exit_code=0) + assert "archive-min-start" in out_1331 + assert "archive-min-same" in out_1331 + assert "archive-min-diff" not in out_1331 + + +def test_match_archives_second(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in SECOND_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # exact-second match only + out_exact = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T13:30:45Z", exit_code=0) + assert "archive-sec-target" in out_exact + assert "archive-sec-before" not in out_exact + assert "archive-sec-after" not in out_exact + + +def test_match_archives_fractional_second(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + create_src_archive(archiver, "archive-fraction-target", ts="2025-01-01T13:30:45.123456+00:00") + create_src_archive(archiver, "archive-fraction-other", ts="2025-01-01T13:30:45.123457+00:00") + + output = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T13:30:45.123456Z", exit_code=0) + + assert "archive-fraction-target" in output + assert "archive-fraction-other" not in output + + +def test_unix_timestamps(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + create_src_archive(archiver, "archive-sec-before", ts="2025-01-01T13:30:44+00:00") + create_src_archive(archiver, "archive-sec-target", ts="2025-01-01T13:30:45+00:00") + create_src_archive(archiver, "archive-sec-after", ts="2025-01-01T13:30:46+00:00") + dt_target = datetime(2025, 1, 1, 13, 30, 45, tzinfo=timezone.utc) + utc_ts_target = calendar.timegm(dt_target.utctimetuple()) + + output = cmd(archiver, "repo-list", "-v", f"--match-archives=date:@{utc_ts_target}", exit_code=0) + + assert "archive-sec-target" in output + assert "archive-sec-before" not in output + assert "archive-sec-after" not in output + + +def test_fractional_unix_timestamps(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + create_src_archive(archiver, "archive-fraction-target", ts="2025-01-01T13:30:45.123456+00:00") + create_src_archive(archiver, "archive-fraction-other", ts="2025-01-01T13:30:45.123457+00:00") + dt_target = datetime(2025, 1, 1, 13, 30, 45, 123456, tzinfo=timezone.utc) + utc_ts_target = calendar.timegm(dt_target.utctimetuple()) + + output = cmd( + archiver, "repo-list", "-v", f"--match-archives=date:@{utc_ts_target}.{dt_target.microsecond:06d}", exit_code=0 + ) + + assert "archive-fraction-target" in output + assert "archive-fraction-other" not in output + + +TIMEZONE_ARCHIVES = [("archive-la", "2025-01-01T12:01:00-08:00"), ("archive-utc", "2025-01-02T12:01:00+00:00")] + + +@pytest.mark.parametrize("timezone_variant", ["2025-01-01T12:01:00-08:00", "2025-01-01T12:01:00[America/Los_Angeles]"]) +def test_match_la_equivalents(archivers, request, timezone_variant): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in TIMEZONE_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + output = cmd(archiver, "repo-list", "-v", f"--match-archives=date:{timezone_variant}", exit_code=0) + assert "archive-la" in output + assert "archive-utc" not in output + + +@pytest.mark.parametrize( + "timezone_variant", ["2025-01-02T12:01:00+00:00", "2025-01-02T12:01:00Z", "2025-01-02T12:01:00[Etc/UTC]"] +) +def test_match_utc_equivalents(archivers, request, timezone_variant): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in TIMEZONE_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + output = cmd(archiver, "repo-list", "-v", f"--match-archives=date:{timezone_variant}", exit_code=0) + assert "archive-utc" in output + assert "archive-la" not in output + + +HOUR_TZ_ARCHIVES = [ + ("archive-hour-diff", "2025-01-01T09:59:00Z"), + ("archive-hour-start", "2025-01-01T10:00:00Z"), + ("archive-hour-same", "2025-01-01T10:59:59Z"), +] + + +def test_match_hour_from_different_tz(archivers, request): + """ + Test that the date filter works for hours with archives created in a different timezone. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in HOUR_TZ_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # We're filtering "local 11:00" in +01:00 zone, which is 10:00-10:59:59 UTC + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T11+01:00", exit_code=0) + assert "archive-hour-start" in out + assert "archive-hour-same" in out + assert "archive-hour-diff" not in out + + +LOCAL_TZ_ARCHIVES = [ + ("archive-local-in", "2025-01-15T18:30:00Z"), # 13:30 in America/New_York (EST, UTC-5) + ("archive-local-out", "2025-01-15T17:30:00Z"), # 12:30 in America/New_York +] + + +@pytest.mark.skipif(is_win32, reason="time.tzset() is not available on Windows") +def test_match_bare_pattern_uses_local_timezone(archivers, request, monkeypatch): + """A pattern without a timezone suffix is interpreted in the local timezone.""" + archiver = request.getfixturevalue(archivers) + monkeypatch.setenv("TZ", "America/New_York") + time.tzset() + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in LOCAL_TZ_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # bare 13:30 is "local" EST (UTC-5) or 18:30 UTC, matching only archive-local-in + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-15T13:30", exit_code=0) + assert "archive-local-in" in out + assert "archive-local-out" not in out + + +@pytest.mark.parametrize( + "invalid_expr", + [ + "2025-01-01T00:00:00+14:01", # beyond +14:00 (ISO 8601 boundary) + "2025-01-01T00:00:00-12:01", # beyond -12:00 (ISO 8601 boundary) + "2025-01-01T00:00:00+09:99", # invalid minutes + "2025-01-01T00:00:00[garbage]", # invalid region + "2025-01-01T00:00:00[Not/AZone]", # structured but nonexistent + ], +) +def test_invalid_timezones_rejected(archivers, request, invalid_expr): + """ + Test that invalid timezone expressions are rejected. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + with pytest.raises(CommandError) as excinfo: + cmd(archiver, "repo-list", "-v", f"--match-archives=date:{invalid_expr}") + + msg = str(excinfo.value) + assert "Invalid date pattern" in msg + assert invalid_expr in msg + + +def test_unix_timestamp_rejects_timezone(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + with pytest.raises(CommandError) as excinfo: + cmd(archiver, "repo-list", "-v", "--match-archives=date:@1735732800Z") + + msg = str(excinfo.value) + assert "Invalid date pattern" in msg + assert "@1735732800Z" in msg + + +@pytest.mark.parametrize( + "invalid_expr", + [ + "9999", # year interval end overflows datetime.max + "9999-12", # month interval end overflows datetime.max + "9999-12-31T23", # hour interval end overflows datetime.max + "@253402300799", # ~year 9999 epoch, interval end overflows + "@99999999999999999999", # epoch too large for C int + ], +) +def test_out_of_range_rejected(archivers, request, invalid_expr): + """Out-of-range patterns produce a clean CommandError, not a traceback.""" + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + with pytest.raises(CommandError) as excinfo: + cmd(archiver, "repo-list", "-v", f"--match-archives=date:{invalid_expr}") + + msg = str(excinfo.value) + assert "Invalid date pattern" in msg + assert invalid_expr in msg + + +@pytest.mark.parametrize("invalid_expr", ["2025-01-01T00:00:00.1234567Z", "@1735732800.1234567"]) +def test_fractional_precision_rejected(archivers, request, invalid_expr): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + with pytest.raises(CommandError) as excinfo: + cmd(archiver, "repo-list", "-v", f"--match-archives=date:{invalid_expr}") + + msg = str(excinfo.value) + assert "Invalid date pattern" in msg + assert invalid_expr in msg