diff --git a/docs/conf.rst b/docs/conf.rst index 36f311e..ae8dc63 100644 --- a/docs/conf.rst +++ b/docs/conf.rst @@ -12,3 +12,8 @@ The extension provides the following configuration: Number of recent revisions to return by default when calling ``load_extra('recentupdate')`` without an explicit ``count`` parameter. + +.. autoconfval:: recentupdate_group_by + + Group revisions by time period. When set, revisions are grouped by + UTC time period and author. diff --git a/docs/usage.rst b/docs/usage.rst index f9d6291..94503be 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -26,6 +26,18 @@ following parameters are available: (default ``False``). See also :example:`Recent Updates to Current Document`. +.. role:: py(code) + :language: Python + +``group_by`` + Group revisions by time period. Revisions are grouped by UTC time + period and author. + + Default from :confval:`recentupdate_group_by`, Available values: + :data.render:`{{ load_extra('env').config.values['recentupdate_group_by'].valid_types | autoconfval_types | join(', ') }}`. + + See also :example:`Grouped Recent Updates`. + Each item returned is a :py:class:`~sphinxnotes.recentupdate.Revision` object: .. autoclass:: sphinxnotes.recentupdate.Revision @@ -91,6 +103,19 @@ Examples ``{{ r.date }}`` — {{ r.message[0] }} {% endfor %} +.. example:: Grouped Recent Updates + + .. data.render:: + + Recent updates grouped by day: + + {% for r in load_extra('recentupdate', count=10, group_by='month') %} + ``📅 {{ r.date.strftime('%Y-%m') }}`` + {% for msg in r.message[:3] %} + {{ msg }} + {% endfor %} + {% endfor %} + ``sphinxnotes-render`` ====================== diff --git a/src/sphinxnotes/recentupdate/__init__.py b/src/sphinxnotes/recentupdate/__init__.py index 4e25017..c6253a4 100644 --- a/src/sphinxnotes/recentupdate/__init__.py +++ b/src/sphinxnotes/recentupdate/__init__.py @@ -9,16 +9,19 @@ """ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar, override +from typing import TYPE_CHECKING, ClassVar, Iterator, override from datetime import datetime, timezone from dataclasses import dataclass +from collections import OrderedDict from os import path from pathlib import Path +from itertools import islice from git import Repo from sphinx.util import logging from sphinx.util.matching import Matcher +from sphinx.config import ENUM from sphinxnotes.render import ( extra_context, @@ -63,15 +66,79 @@ class Revision: removed_docs: list[str] +def get_time_period_key(dt: datetime, group_by: str) -> datetime: + """Return the start of the time period for grouping.""" + if group_by == 'day': + return dt.replace(hour=0, minute=0, second=0, microsecond=0) + elif group_by == 'month': + return dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0) + elif group_by == 'year': + return dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0) + return dt + + +def compact_revision(revs: list[Revision]) -> Revision: + if len(revs) == 1: + return revs[0] + + messages = [] + for rev in reversed(revs): + messages.extend(rev.message) + + added, changed, removed = set(), set(), set() + for rev in reversed(revs): + added.update(rev.added_docs) + changed.update(rev.changed_docs) + removed.update(rev.removed_docs) + + # Compute the net effect of all commits in this group: + # If a file was added then deleted, the net effect is removal. + # If a file was added then modified, the net effect is addition. + # If a file was modified then deleted, the net effect is removal. + # FIXME: If a files is removed and then re-added, ... + added -= removed + changed -= removed + changed -= added + + return Revision( + message=messages, + author=revs[0].author, + date=revs[0].date, + added_docs=sorted(added), + changed_docs=sorted(changed), + removed_docs=sorted(removed), + ) + + +def group_revisions( + groups: OrderedDict[tuple[str, datetime], list[Revision]], + rev: Revision, + group_by: str, +) -> None: + """Add revision to groups.""" + key = (rev.author, get_time_period_key(rev.date, group_by)) + groups.setdefault(key, []).append(rev) + + +def compact_groups( + groups: OrderedDict[tuple[str, datetime], list[Revision]], +) -> list[Revision]: + """Compact grouped revisions into a list of Revision.""" + merged = [] + for (author, period_date), revs in groups.items(): + rev = compact_revision(revs) + rev.author, rev.date = author, period_date + merged.append(rev) + return merged + + def get_git_revisions( repo: Repo, env: BuildEnvironment, - count: int, path: str, current_doc: str | None = None, -) -> list[Revision]: - revs: list[Revision] = [] - +) -> Iterator[Revision]: + """Yield Revision objects from git commits.""" for cur in repo.iter_commits(paths=path): matches = [x in cur.message for x in env.config.recentupdate_exclude_commit] if any(matches): @@ -120,23 +187,14 @@ def get_git_revisions( logger.debug(f'Skip commit {cur.hexsha}: no changes to {current_doc}') continue - revs.append( - Revision( - message=str(cur.message).splitlines(), - author=str(cur.author or ''), - date=datetime.fromtimestamp(cur.authored_date, tz=timezone.utc), - changed_docs=m, - added_docs=a, - removed_docs=d, - ) + yield Revision( + message=str(cur.message).splitlines(), + author=str(cur.author or ''), + date=datetime.fromtimestamp(cur.authored_date, tz=timezone.utc), + changed_docs=m, + added_docs=a, + removed_docs=d, ) - if len(revs) >= count: - break - - logger.info( - f'[recentupdate] Intend to get recent {count} commits, eventually get {len(revs)}' - ) - return revs def path2docname(repo: Repo, env: BuildEnvironment, file: str) -> str | None: @@ -176,11 +234,28 @@ def generate( count: int = 0, path: str = '.', current_doc: bool = False, + group_by: str = '', ) -> Any: - if count <= 0: - count = req.env.config.recentupdate_count + count = count or req.env.config.recentupdate_count + group_by = group_by or req.env.config.recentupdate_group_by docname = req.env.docname if current_doc else None - return get_git_revisions(self.repo, req.env, count, path, docname) + + git_revs = get_git_revisions(self.repo, req.env, path, docname) + + if group_by: + groups = OrderedDict() + for rev in git_revs: + group_revisions(groups, rev, group_by) + if len(groups) >= count: + break + revs = compact_groups(groups) + else: + revs = list(islice(git_revs, count)) + logger.info( + f'[recentupdate] Expect {count} revisions, finally get {len(revs)}, group by {group_by}' + ) + + return revs def setup(app: Sphinx): @@ -193,7 +268,9 @@ def setup(app: Sphinx): app.add_config_value( 'recentupdate_exclude_commit', ['skip-recentupdate'], 'env', types=list[str] ) - app.add_config_value('recentupdate_count', 10, 'env', types=int) + app.add_config_value( + 'recentupdate_group_by', None, 'env', types=ENUM(None, 'day', 'month', 'year') + ) return meta.post_setup(app) diff --git a/tests/test_group_revisions.py b/tests/test_group_revisions.py new file mode 100644 index 0000000..3646378 --- /dev/null +++ b/tests/test_group_revisions.py @@ -0,0 +1,122 @@ +import unittest +from collections import OrderedDict +from datetime import datetime, timezone + +from sphinxnotes.recentupdate import ( + Revision, + get_time_period_key, + compact_revision, + group_revisions, + compact_groups, +) + + +def _make_rev( + message: list[str], + author: str, + date: datetime, + added: list[str] | None = None, + changed: list[str] | None = None, + removed: list[str] | None = None, +) -> Revision: + return Revision( + message=message, + author=author, + date=date, + added_docs=added or [], + changed_docs=changed or [], + removed_docs=removed or [], + ) + + +def _group_and_compact(revs: list[Revision], group_by: str) -> list[Revision]: + groups: OrderedDict[tuple[str, datetime], list[Revision]] = OrderedDict() + for rev in revs: + group_revisions(groups, rev, group_by) + return compact_groups(groups) + + +class TestGetTimePeriodKey(unittest.TestCase): + def test_day(self): + dt = datetime(2024, 3, 15, 14, 30, 45, tzinfo=timezone.utc) + result = get_time_period_key(dt, 'day') + self.assertEqual(result, datetime(2024, 3, 15, 0, 0, 0, tzinfo=timezone.utc)) + + def test_month(self): + dt = datetime(2024, 3, 15, 14, 30, 45, tzinfo=timezone.utc) + result = get_time_period_key(dt, 'month') + self.assertEqual(result, datetime(2024, 3, 1, 0, 0, 0, tzinfo=timezone.utc)) + + def test_year(self): + dt = datetime(2024, 3, 15, 14, 30, 45, tzinfo=timezone.utc) + result = get_time_period_key(dt, 'year') + self.assertEqual(result, datetime(2024, 1, 1, 0, 0, 0, tzinfo=timezone.utc)) + + +class TestCompactRevision(unittest.TestCase): + def test_single_revision(self): + rev = _make_rev(['msg'], 'alice', datetime(2024, 1, 1, tzinfo=timezone.utc)) + result = compact_revision([rev]) + self.assertEqual(result.message, ['msg']) + + def test_merge_messages_and_files(self): + rev1 = _make_rev( + ['commit1'], + 'alice', + datetime(2024, 1, 1, 10, 0, tzinfo=timezone.utc), + added=['doc1'], + ) + rev2 = _make_rev( + ['commit2'], + 'alice', + datetime(2024, 1, 1, 15, 0, tzinfo=timezone.utc), + removed=['doc1'], + ) + result = compact_revision([rev2, rev1]) + self.assertEqual(result.message, ['commit1', 'commit2']) + self.assertEqual(result.added_docs, []) + self.assertEqual(result.removed_docs, ['doc1']) + + +class TestGroupRevisions(unittest.TestCase): + def test_same_author_same_day_groups(self): + rev1 = _make_rev( + ['c1'], 'alice', datetime(2024, 1, 1, 10, 0, tzinfo=timezone.utc) + ) + rev2 = _make_rev( + ['c2'], 'alice', datetime(2024, 1, 1, 15, 0, tzinfo=timezone.utc) + ) + result = _group_and_compact([rev2, rev1], 'day') + self.assertEqual(len(result), 1) + self.assertEqual(result[0].date, datetime(2024, 1, 1, tzinfo=timezone.utc)) + + def test_different_author_not_grouped(self): + rev1 = _make_rev( + ['c1'], 'alice', datetime(2024, 1, 1, 10, 0, tzinfo=timezone.utc) + ) + rev2 = _make_rev( + ['c2'], 'bob', datetime(2024, 1, 1, 15, 0, tzinfo=timezone.utc) + ) + result = _group_and_compact([rev2, rev1], 'day') + self.assertEqual(len(result), 2) + + def test_different_day_not_grouped(self): + rev1 = _make_rev( + ['c1'], 'alice', datetime(2024, 1, 1, 10, 0, tzinfo=timezone.utc) + ) + rev2 = _make_rev( + ['c2'], 'alice', datetime(2024, 1, 2, 10, 0, tzinfo=timezone.utc) + ) + result = _group_and_compact([rev1, rev2], 'day') + self.assertEqual(len(result), 2) + + def test_merge_by_month(self): + rev1 = _make_rev(['c1'], 'alice', datetime(2024, 1, 5, tzinfo=timezone.utc)) + rev2 = _make_rev(['c2'], 'alice', datetime(2024, 1, 20, tzinfo=timezone.utc)) + result = _group_and_compact([rev1, rev2], 'month') + self.assertEqual(len(result), 1) + self.assertEqual(result[0].date, datetime(2024, 1, 1, tzinfo=timezone.utc)) + + +if __name__ == '__main__': + unittest.main()