Skip to content

Commit 1a92256

Browse files
committed
range-diff: add early size check to prevent long delays
When range-diff is given very large ranges (e.g., 250,000+ commits), it takes an extremely long time in read_patches() before eventually hitting memory or overflow issues. This provides a poor user experience as the command appears to hang. Add an early size check using 'git rev-list --count' to quickly count commits in both ranges before attempting to read all patches. This check typically completes in ~1 second even for ranges with hundreds of thousands of commits, compared to read_patches() which times out after many seconds. The limit of 10,000 total commits is generous for real-world use cases (even large refactoring series rarely exceed 1,000 commits) while preventing impractical operations that would require gigabytes of memory for the cost matrix. When the limit is exceeded, range-diff now immediately returns a clear error message rather than appearing to hang. The existing check in get_correspondences() is retained as defense in depth. This improves the user experience from: - Wait 5+ seconds (or longer) for read_patches() to process - Eventually crash or hang To: - Get an immediate error message in ~1 second - Clear explanation of the problem Signed-off-by: Paulo Casaretto <[email protected]> Signed-off-by: pcasaretto <[email protected]>
1 parent c44beea commit 1a92256

File tree

1 file changed

+64
-4
lines changed

1 file changed

+64
-4
lines changed

range-diff.c

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@
2121
#include "apply.h"
2222
#include "revision.h"
2323

24+
/*
25+
* Maximum total patches (a->nr + b->nr) for range-diff to prevent
26+
* unreasonable memory usage. This allows up to 10,000 total patches
27+
* which uses ~400MB for the cost matrix.
28+
*/
29+
#define MAX_RANGE_DIFF_TOTAL_PATCHES 10000
30+
2431
struct patch_util {
2532
/* For the search for an exact match */
2633
struct hashmap_entry e;
@@ -324,13 +331,18 @@ static int diffsize(const char *a, const char *b)
324331
return COST_MAX;
325332
}
326333

327-
static void get_correspondences(struct string_list *a, struct string_list *b,
334+
static int get_correspondences(struct string_list *a, struct string_list *b,
328335
int creation_factor)
329336
{
330337
int n = a->nr + b->nr;
331338
int *cost, c, *a2b, *b2a;
332339
int i, j;
333340

341+
/* Check for unreasonably large ranges */
342+
if (n > MAX_RANGE_DIFF_TOTAL_PATCHES)
343+
return error(_("ranges too large for range-diff: %d total patches (max: %d)"),
344+
n, MAX_RANGE_DIFF_TOTAL_PATCHES);
345+
334346
ALLOC_ARRAY(cost, st_mult(n, n));
335347
ALLOC_ARRAY(a2b, n);
336348
ALLOC_ARRAY(b2a, n);
@@ -383,6 +395,8 @@ static void get_correspondences(struct string_list *a, struct string_list *b,
383395
free(cost);
384396
free(a2b);
385397
free(b2a);
398+
399+
return 0;
386400
}
387401

388402
static void output_pair_header(struct diff_options *diffopt,
@@ -492,6 +506,33 @@ static const char *output_prefix_cb(struct diff_options *opt UNUSED, void *data)
492506
return data;
493507
}
494508

509+
static int count_range_commits(const char *range)
510+
{
511+
struct child_process cp = CHILD_PROCESS_INIT;
512+
struct strbuf buf = STRBUF_INIT;
513+
int count = -1;
514+
515+
strvec_pushl(&cp.args, "rev-list", "--count", range, NULL);
516+
cp.git_cmd = 1;
517+
cp.no_stdin = 1;
518+
cp.out = -1;
519+
520+
if (start_command(&cp))
521+
return -1;
522+
523+
if (strbuf_read(&buf, cp.out, 0) >= 0) {
524+
char *end;
525+
count = strtol(buf.buf, &end, 10);
526+
if (*end && *end != '\n')
527+
count = -1;
528+
}
529+
530+
finish_command(&cp);
531+
strbuf_release(&buf);
532+
533+
return count;
534+
}
535+
495536
static void output(struct string_list *a, struct string_list *b,
496537
struct range_diff_options *range_diff_opts)
497538
{
@@ -575,6 +616,7 @@ int show_range_diff(const char *range1, const char *range2,
575616
struct range_diff_options *range_diff_opts)
576617
{
577618
int res = 0;
619+
int count1, count2, total;
578620

579621
struct string_list branch1 = STRING_LIST_INIT_DUP;
580622
struct string_list branch2 = STRING_LIST_INIT_DUP;
@@ -583,16 +625,34 @@ int show_range_diff(const char *range1, const char *range2,
583625
if (range_diff_opts->left_only && range_diff_opts->right_only)
584626
res = error(_("options '%s' and '%s' cannot be used together"), "--left-only", "--right-only");
585627

628+
/* Check range sizes early before reading patches */
629+
if (!res) {
630+
count1 = count_range_commits(range1);
631+
count2 = count_range_commits(range2);
632+
633+
if (count1 < 0)
634+
res = error(_("could not count commits in range '%s'"), range1);
635+
else if (count2 < 0)
636+
res = error(_("could not count commits in range '%s'"), range2);
637+
else {
638+
total = count1 + count2;
639+
if (total > MAX_RANGE_DIFF_TOTAL_PATCHES)
640+
res = error(_("ranges too large for range-diff: %d total commits (max: %d)"),
641+
total, MAX_RANGE_DIFF_TOTAL_PATCHES);
642+
}
643+
}
644+
586645
if (!res && read_patches(range1, &branch1, range_diff_opts->other_arg, include_merges))
587646
res = error(_("could not parse log for '%s'"), range1);
588647
if (!res && read_patches(range2, &branch2, range_diff_opts->other_arg, include_merges))
589648
res = error(_("could not parse log for '%s'"), range2);
590649

591650
if (!res) {
592651
find_exact_matches(&branch1, &branch2);
593-
get_correspondences(&branch1, &branch2,
594-
range_diff_opts->creation_factor);
595-
output(&branch1, &branch2, range_diff_opts);
652+
res = get_correspondences(&branch1, &branch2,
653+
range_diff_opts->creation_factor);
654+
if (!res)
655+
output(&branch1, &branch2, range_diff_opts);
596656
}
597657

598658
string_list_clear(&branch1, 1);

0 commit comments

Comments
 (0)