From c173b32c940b6784f5dadba2a4bc594b93e69e8b Mon Sep 17 00:00:00 2001
From: Andrew Kern <adkern@uoregon.edu>
Date: Thu, 26 Jun 2025 14:39:34 -0700
Subject: [PATCH] Fix: genetic_relatedness to allow single sample set with
 self-comparisons

Previously, genetic_relatedness would fail with TSK_ERR_INSUFFICIENT_SAMPLE_SETS
when given a single sample set and indexes referring to that set, e.g.:
  ts.genetic_relatedness([[0]], indexes=[(0,0)])

This was because the C API requires at least k=2 sample sets for k-way statistics,
even when indexes only reference a single set for self-comparison.

The fix is more general: it removes the restriction that there be at
least k sample sets for k-way stats, because this wasn't a useful or
consistently applied restriction.

Closes #3055
---
 c/tests/test_stats.c            | 20 +----------------
 c/tskit/trees.c                 |  2 +-
 python/CHANGELOG.rst            |  6 +++++
 python/tests/test_lowlevel.py   |  8 ++-----
 python/tests/test_tree_stats.py | 40 +++++++++++++++++++++++++++++++++
 5 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/c/tests/test_stats.c b/c/tests/test_stats.c
index 74aa961aa4..b722d5e951 100644
--- a/c/tests/test_stats.c
+++ b/c/tests/test_stats.c
@@ -881,9 +881,6 @@ verify_two_way_stat_func_errors(
     ret = method(ts, 0, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
         options | TSK_STAT_SITE, &result);
     CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
-    ret = method(ts, 1, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
-        options | TSK_STAT_SITE, &result);
-    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
 
     ret = method(ts, 2, sample_set_sizes, samples, 0, set_indexes, 0, NULL,
         options | TSK_STAT_SITE, &result);
@@ -938,12 +935,6 @@ verify_three_way_stat_func_errors(tsk_treeseq_t *ts, general_sample_stat_method
     ret = method(ts, 0, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
         TSK_STAT_SITE, &result);
     CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
-    ret = method(ts, 1, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
-        TSK_STAT_SITE, &result);
-    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
-    ret = method(ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
-        TSK_STAT_SITE, &result);
-    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
 
     ret = method(ts, 3, sample_set_sizes, samples, 0, set_indexes, 0, NULL,
         TSK_STAT_SITE, &result);
@@ -972,15 +963,6 @@ verify_four_way_stat_func_errors(tsk_treeseq_t *ts, general_sample_stat_method *
     ret = method(ts, 0, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
         TSK_STAT_SITE, &result);
     CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
-    ret = method(ts, 1, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
-        TSK_STAT_SITE, &result);
-    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
-    ret = method(ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
-        TSK_STAT_SITE, &result);
-    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
-    ret = method(ts, 3, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
-        TSK_STAT_SITE, &result);
-    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
 
     ret = method(ts, 4, sample_set_sizes, samples, 0, set_indexes, 0, NULL,
         TSK_STAT_SITE, &result);
@@ -3684,7 +3666,7 @@ test_two_locus_stat_input_errors(void)
         NULL, 0, result);
     CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_INDEX_TUPLES);
 
-    num_sample_sets = 1;
+    num_sample_sets = 0;
     num_index_tuples = 1;
     ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,
         num_index_tuples, index_tuples, num_sites, row_sites, NULL, num_sites, col_sites,
diff --git a/c/tskit/trees.c b/c/tskit/trees.c
index a536b08910..443a881f4c 100644
--- a/c/tskit/trees.c
+++ b/c/tskit/trees.c
@@ -4515,7 +4515,7 @@ check_sample_stat_inputs(tsk_size_t num_sample_sets, tsk_size_t tuple_size,
 {
     int ret = 0;
 
-    if (num_sample_sets < tuple_size) {
+    if (num_sample_sets < 1) {
         ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
         goto out;
     }
diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst
index dfc6807aa9..8817be1891 100644
--- a/python/CHANGELOG.rst
+++ b/python/CHANGELOG.rst
@@ -47,6 +47,12 @@
   when using ``TableCollection.assert_equals`` and ``Table.assert_equals``.
   (:user:`benjeffery`, :pr:`3246`, :issue:`3244`)
 
+- k-way statistics no longer require k sample sets, allowing in particular
+  "self" comparisons for `TreeSequence.genetic_relatedness`. This changes the
+  error code returned in some situations.
+  (:user:`andrewkern`, :user:`petrelharp`, :pr:`3235`, :issue:`3055`)
+  
+
 **Breaking changes** 
 
 - ``ltrim``, ``rtrim``, ``trim`` and ``shift`` raise an error if used on a tree sequence
diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py
index 8df0b49724..042c15cd7a 100644
--- a/python/tests/test_lowlevel.py
+++ b/python/tests/test_lowlevel.py
@@ -1871,17 +1871,13 @@ def test_ld_matrix_multipop(self, stat_method_name):
             stat_method(
                 bad_ss_sizes, bad_ss, indexes, None, None, row_pos, col_pos, "branch"
             )
-        with pytest.raises(
-            _tskit.LibraryError, match="TSK_ERR_INSUFFICIENT_SAMPLE_SETS"
-        ):
+        with pytest.raises(_tskit.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"):
             bad_ss = np.array([], dtype=np.int32)
             bad_ss_sizes = np.array([0], dtype=np.uint32)
             stat_method(
                 bad_ss_sizes, bad_ss, indexes, row_sites, col_sites, None, None, "site"
             )
-        with pytest.raises(
-            _tskit.LibraryError, match="TSK_ERR_INSUFFICIENT_SAMPLE_SETS"
-        ):
+        with pytest.raises(_tskit.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"):
             bad_ss = np.array([], dtype=np.int32)
             bad_ss_sizes = np.array([0], dtype=np.uint32)
             stat_method(
diff --git a/python/tests/test_tree_stats.py b/python/tests/test_tree_stats.py
index d7b2d280a0..417f0230ae 100644
--- a/python/tests/test_tree_stats.py
+++ b/python/tests/test_tree_stats.py
@@ -2307,6 +2307,46 @@ def test_shapes(self, proportion):
         else:
             assert x.shape == (2,)
 
+    def test_single_sample_set_self_comparison(self, ts_12_highrecomb_fixture):
+        if self.mode is None:
+            return
+        # Test for issue #3055 - self-comparisons with single sample set
+        ts = ts_12_highrecomb_fixture
+        # Single sample set with self-comparison
+        result = ts.genetic_relatedness([[0]], indexes=[(0, 0)], mode=self.mode)
+        result_shape = (ts.num_nodes, 1) if self.mode == "node" else (1,)
+        assert result.shape == result_shape
+        # Should work for multiple samples in single set too
+        result = ts.genetic_relatedness([[0, 1, 2]], indexes=[(0, 0)], mode=self.mode)
+        assert result.shape == result_shape
+        # Test with multiple self-comparisons
+        result = ts.genetic_relatedness(
+            [[0, 1], [2, 3]], indexes=[(0, 0), (1, 1)], mode=self.mode
+        )
+        result_shape = (ts.num_nodes, 2) if self.mode == "node" else (2,)
+        assert result.shape == result_shape
+
+    def test_single_sample_set_invalid_indexes(self, ts_12_highrecomb_fixture):
+        if self.mode is None:
+            return
+        # Test that invalid indexes raise ValueError with single sample set
+        ts = ts_12_highrecomb_fixture
+        # Index out of bounds (only have 1 sample set, but trying to access index 1)
+        with pytest.raises(
+            exceptions.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"
+        ):
+            ts.genetic_relatedness([[0]], indexes=[(0, 1)], mode=self.mode)
+        # Negative index
+        with pytest.raises(
+            exceptions.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"
+        ):
+            ts.genetic_relatedness([[0]], indexes=[(-1, 0)], mode=self.mode)
+        # Both indexes out of bounds
+        with pytest.raises(
+            exceptions.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"
+        ):
+            ts.genetic_relatedness([[0, 1]], indexes=[(2, 2)], mode=self.mode)
+
 
 class TestBranchGeneticRelatedness(TestGeneticRelatedness, TopologyExamplesMixin):
     mode = "branch"