Skip to content

Commit 4a28cc8

Browse files
diff --sort-by: enhanced sorting, fixes #8998
use borg diff --sort-by=spec1,spec2,spec2 for enhanced sorting. keep legacy --sort behaviour (sort by path) for compatibility, but deprecate it. Co-authored-by: Daniel Rudolf <[email protected]> This is a port of #9005 to master branch.
1 parent 6b0badc commit 4a28cc8

File tree

4 files changed

+227
-5
lines changed

4 files changed

+227
-5
lines changed

docs/usage/diff.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,11 @@ Examples
1616
{"path": "file4", "changes": [{"type": "added", "size": 0}]}
1717
{"path": "file3", "changes": [{"type": "removed", "size": 0}]}
1818

19+
20+
# Use --sort-by with a comma-separated list; sorts apply stably from last to first.
21+
# Here: primary by net size change descending, tie-breaker by path ascending
22+
$ borg diff --sort-by=">size_diff,path" archive1 archive2
23+
+17 B -5 B [-rw-r--r-- -> -rwxr-xr-x] file1
24+
removed 0 B file3
25+
added 0 B file4
26+
+135 B -252 B file2

src/borg/archiver/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ def print_file_status(self, status, path):
177177
def preprocess_args(self, args):
178178
deprecations = [
179179
# ('--old', '--new' or None, 'Warning: "--old" has been deprecated. Use "--new" instead.'),
180+
("--sort", None, 'Warning: "--sort" is deprecated. Use "--sort-by=path" instead.')
180181
]
181182
for i, arg in enumerate(args[:]):
182183
for old_name, new_name, warning in deprecations:

src/borg/archiver/diff_cmd.py

Lines changed: 174 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,15 @@
77
from ._common import with_repository, build_matcher, Highlander
88
from ..archive import Archive
99
from ..constants import * # NOQA
10-
from ..helpers import BaseFormatter, DiffFormatter, archivename_validator, PathSpec, BorgJsonEncoder
10+
from ..helpers import (
11+
BaseFormatter,
12+
DiffFormatter,
13+
archivename_validator,
14+
PathSpec,
15+
BorgJsonEncoder,
16+
IncludePatternNeverMatchedWarning,
17+
remove_surrogates,
18+
)
1119
from ..manifest import Manifest
1220
from ..logger import create_logger
1321

@@ -87,11 +95,81 @@ def print_text_output(diff, formatter):
8795
diffs_iter = Archive.compare_archives_iter(
8896
archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids
8997
)
90-
# Conversion to string and filtering for diff.equal to save memory if sorting
98+
# Filter out equal items early (keep as generator; listify only if sorting)
9199
diffs = (diff for diff in diffs_iter if not diff.equal(args.content_only))
92100

93-
if args.sort:
94-
diffs = sorted(diffs, key=lambda diff: diff.path)
101+
# Enhanced sorting support: --sort-by takes precedence; legacy --sort sorts by path
102+
sort_specs = []
103+
if getattr(args, "sort_by", None):
104+
# Comma-separated list
105+
for spec in str(args.sort_by).split(","):
106+
spec = spec.strip()
107+
if spec:
108+
sort_specs.append(spec)
109+
elif getattr(args, "sort", False):
110+
sort_specs = ["path"]
111+
112+
def key_for(field: str, d: "ItemDiff"):
113+
# strip direction markers if present
114+
if field and field[0] in ("<", ">"):
115+
field = field[1:]
116+
# path
117+
if field in (None, "", "path"):
118+
return remove_surrogates(d.path)
119+
# compute size_* from changes
120+
if field in ("size_diff", "size_added", "size_removed"):
121+
added = removed = 0
122+
ch = d.changes().get("content")
123+
if ch is not None:
124+
info = ch.to_dict()
125+
t = info.get("type")
126+
if t == "modified":
127+
added = info.get("added", 0)
128+
removed = info.get("removed", 0)
129+
elif t and t.startswith("added"):
130+
added = info.get("added", info.get("size", 0))
131+
removed = 0
132+
elif t and t.startswith("removed"):
133+
added = 0
134+
removed = info.get("removed", info.get("size", 0))
135+
if field == "size_diff":
136+
return added - removed
137+
if field == "size_added":
138+
return added
139+
if field == "size_removed":
140+
return removed
141+
# timestamp diffs
142+
if field in ("ctime_diff", "mtime_diff"):
143+
it1 = getattr(d, "_item1", None)
144+
it2 = getattr(d, "_item2", None)
145+
base = field.split("_")[0]
146+
t2 = it2.get(base, 0) if it2 is not None else 0
147+
t1 = it1.get(base, 0) if it1 is not None else 0
148+
return t2 - t1
149+
# size of item in archive2
150+
if field == "size":
151+
it2 = getattr(d, "_item2", None)
152+
if it2 is None or it2.get("deleted"):
153+
return 0
154+
return it2.get_size()
155+
# direct attributes from current item (prefer item2)
156+
it = getattr(d, "_item2", None) or getattr(d, "_item1", None)
157+
attr_defaults = {"user": "", "group": "", "uid": -1, "gid": -1, "ctime": 0, "mtime": 0}
158+
if field in attr_defaults:
159+
if it is None:
160+
return attr_defaults[field]
161+
return it.get(field, attr_defaults[field])
162+
raise ValueError(f"Invalid field name: {field}")
163+
164+
if sort_specs:
165+
diffs = list(diffs)
166+
# Apply stable sorts from last to first
167+
for spec in reversed(sort_specs):
168+
desc = False
169+
field = spec
170+
if field and field[0] in ("<", ">"):
171+
desc = field[0] == ">"
172+
diffs.sort(key=lambda di: key_for(field, di), reverse=desc)
95173

96174
formatter = DiffFormatter(format, args.content_only)
97175
for diff in diffs:
@@ -149,7 +227,87 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser):
149227
"""
150228
)
151229
+ DiffFormatter.keys_help()
230+
+ textwrap.dedent(
231+
"""
232+
233+
What is compared
234+
+++++++++++++++++
235+
For each matching item in both archives, Borg reports:
236+
237+
- Content changes: total added/removed bytes within files. If chunker parameters are comparable,
238+
Borg compares chunk IDs quickly; otherwise, it compares the content.
239+
- Metadata changes: user, group, mode, and other metadata shown inline like
240+
"[old_mode -> new_mode]" for mode changes. Use ``--content-only`` to suppress metadata changes.
241+
- Added/removed items: printed as "added SIZE path" or "removed SIZE path".
242+
243+
Output formats
244+
++++++++++++++
245+
The default (text) output shows one line per changed path, e.g.::
246+
247+
+135 B -252 B [ -rw-r--r-- -> -rwxr-xr-x ] path/to/file
248+
249+
JSON Lines output (``--json-lines``) prints one JSON object per changed path, e.g.::
250+
251+
{"path": "PATH", "changes": [
252+
{"type": "modified", "added": BYTES, "removed": BYTES},
253+
{"type": "mode", "old_mode": "-rw-r--r--", "new_mode": "-rwxr-xr-x"},
254+
{"type": "added", "size": SIZE},
255+
{"type": "removed", "size": SIZE}
256+
]}
257+
258+
Sorting
259+
++++++++
260+
Use ``--sort-by FIELDS`` where FIELDS is a comma-separated list of fields.
261+
Sorts are applied stably from last to first in the given list. Prepend ">" for
262+
descending, "<" (or no prefix) for ascending, for example ``--sort-by=">size_added,path"``.
263+
Supported fields include:
264+
265+
- path: the item path
266+
- size_added: total bytes added for the item content
267+
- size_removed: total bytes removed for the item content
268+
- size_diff: size_added - size_removed (net content change)
269+
- size: size of the item as stored in ARCHIVE2 (0 for removed items)
270+
- user, group, uid, gid, ctime, mtime: taken from the item state in ARCHIVE2 when present
271+
- ctime_diff, mtime_diff: timestamp difference (archive2 - archive1)
272+
273+
The ``--sort`` option is deprecated and only sorts by path.
274+
275+
Performance considerations
276+
++++++++++++++++++++++++++
277+
For archives created with Borg 1.1 or newer, diff automatically detects whether
278+
the archives were created with the same chunker parameters. If so, only chunk IDs
279+
are compared, which is very fast.
280+
"""
281+
)
152282
)
283+
284+
def diff_sort_spec_validator(s):
285+
if not isinstance(s, str):
286+
raise argparse.ArgumentTypeError("unsupported sort field (not a string)")
287+
allowed = {
288+
"path",
289+
"size_added",
290+
"size_removed",
291+
"size_diff",
292+
"size",
293+
"user",
294+
"group",
295+
"uid",
296+
"gid",
297+
"ctime",
298+
"mtime",
299+
"ctime_diff",
300+
"mtime_diff",
301+
}
302+
parts = [p.strip() for p in s.split(",") if p.strip()]
303+
if not parts:
304+
raise argparse.ArgumentTypeError("unsupported sort field: empty spec")
305+
for spec in parts:
306+
field = spec[1:] if spec and spec[0] in (">", "<") else spec
307+
if field not in allowed:
308+
raise argparse.ArgumentTypeError(f"unsupported sort field: {field}")
309+
return ",".join(parts)
310+
153311
subparser = subparsers.add_parser(
154312
"diff",
155313
parents=[common_parser],
@@ -172,7 +330,12 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser):
172330
action="store_true",
173331
help="override the check of chunker parameters",
174332
)
175-
subparser.add_argument("--sort", dest="sort", action="store_true", help="Sort the output lines by file path.")
333+
subparser.add_argument(
334+
"--sort",
335+
dest="sort",
336+
action="store_true",
337+
help="Sort the output lines by file path (deprecated, use --sort-by=path).",
338+
)
176339
subparser.add_argument(
177340
"--format",
178341
metavar="FORMAT",
@@ -181,6 +344,12 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser):
181344
help='specify format for differences between archives (default: "{change} {path}{NL}")',
182345
)
183346
subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines.")
347+
subparser.add_argument(
348+
"--sort-by",
349+
dest="sort_by",
350+
type=diff_sort_spec_validator,
351+
help="Sort output by comma-separated fields (e.g., '>size_added,path').",
352+
)
184353
subparser.add_argument(
185354
"--content-only",
186355
action="store_true",

src/borg/testsuite/archiver/diff_cmd_test.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,3 +404,47 @@ def test_hard_link_deletion_and_replacement(archivers, request):
404404
assert_line_exists(lines, "added.*B.*input/a/hardlink")
405405
# But the b/hardlink file was not modified at all.
406406
assert_line_not_exists(lines, ".*input/b/hardlink")
407+
408+
409+
def test_sort_by_path_equivalence(archivers, request):
410+
archiver = request.getfixturevalue(archivers)
411+
cmd(archiver, "repo-create", RK_ENCRYPTION)
412+
413+
# Prepare first archive
414+
create_regular_file(archiver.input_path, "a_file_removed", size=8)
415+
create_regular_file(archiver.input_path, "f_file_removed", size=16)
416+
create_regular_file(archiver.input_path, "c_file_changed", size=32)
417+
create_regular_file(archiver.input_path, "e_file_changed", size=64)
418+
cmd(archiver, "create", "test0", "input")
419+
420+
# Prepare second archive: simulate changes
421+
os.unlink("input/a_file_removed")
422+
os.unlink("input/f_file_removed")
423+
os.unlink("input/c_file_changed")
424+
os.unlink("input/e_file_changed")
425+
create_regular_file(archiver.input_path, "c_file_changed", size=512)
426+
create_regular_file(archiver.input_path, "e_file_changed", size=1024)
427+
create_regular_file(archiver.input_path, "b_file_added", size=128)
428+
create_regular_file(archiver.input_path, "d_file_added", size=256)
429+
cmd(archiver, "create", "test1", "input")
430+
431+
# New sorter should behave like legacy --sort when using path
432+
output = cmd(archiver, "diff", "test0", "test1", "--sort-by=path", "--content-only")
433+
expected = ["a_file_removed", "b_file_added", "c_file_changed", "d_file_added", "e_file_changed", "f_file_removed"]
434+
assert isinstance(output, str)
435+
outputs = output.splitlines()
436+
assert len(outputs) == len(expected)
437+
assert all(x in line for x, line in zip(expected, outputs))
438+
439+
440+
def test_sort_by_invalid_field_is_rejected(archivers, request):
441+
archiver = request.getfixturevalue(archivers)
442+
cmd(archiver, "repo-create", RK_ENCRYPTION)
443+
444+
create_regular_file(archiver.input_path, "file", size=1)
445+
cmd(archiver, "create", "a1", "input")
446+
create_regular_file(archiver.input_path, "file", size=2)
447+
cmd(archiver, "create", "a2", "input")
448+
449+
# Unsupported field should cause argument parsing error
450+
cmd(archiver, "diff", "a1", "a2", "--sort-by=not_a_field", exit_code=EXIT_ERROR)

0 commit comments

Comments
 (0)