Skip to content

Commit 34d6826

Browse files
andrewkernpetrelharp
authored andcommitted
Fix: genetic_relatedness to allow single sample set with self-comparisons
Previously, genetic_relatedness would fail with TSK_ERR_INSUFFICIENT_SAMPLE_SETS when given a single sample set and indexes referring to that set, e.g.: ts.genetic_relatedness([[0]], indexes=[(0,0)]) This was because the C API requires at least k=2 sample sets for k-way statistics, even when indexes only reference a single set for self-comparison. The fix is more general: it removes the restriction that there be at least k sample sets for k-way stats, because this wasn't a useful or consistently applied restriction. Closes #3055
1 parent 1d8f453 commit 34d6826

File tree

5 files changed

+51
-26
lines changed

5 files changed

+51
-26
lines changed

c/tests/test_stats.c

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -881,9 +881,6 @@ verify_two_way_stat_func_errors(
881881
ret = method(ts, 0, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
882882
options | TSK_STAT_SITE, &result);
883883
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
884-
ret = method(ts, 1, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
885-
options | TSK_STAT_SITE, &result);
886-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
887884

888885
ret = method(ts, 2, sample_set_sizes, samples, 0, set_indexes, 0, NULL,
889886
options | TSK_STAT_SITE, &result);
@@ -938,12 +935,6 @@ verify_three_way_stat_func_errors(tsk_treeseq_t *ts, general_sample_stat_method
938935
ret = method(ts, 0, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
939936
TSK_STAT_SITE, &result);
940937
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
941-
ret = method(ts, 1, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
942-
TSK_STAT_SITE, &result);
943-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
944-
ret = method(ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
945-
TSK_STAT_SITE, &result);
946-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
947938

948939
ret = method(ts, 3, sample_set_sizes, samples, 0, set_indexes, 0, NULL,
949940
TSK_STAT_SITE, &result);
@@ -972,15 +963,6 @@ verify_four_way_stat_func_errors(tsk_treeseq_t *ts, general_sample_stat_method *
972963
ret = method(ts, 0, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
973964
TSK_STAT_SITE, &result);
974965
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
975-
ret = method(ts, 1, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
976-
TSK_STAT_SITE, &result);
977-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
978-
ret = method(ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
979-
TSK_STAT_SITE, &result);
980-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
981-
ret = method(ts, 3, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
982-
TSK_STAT_SITE, &result);
983-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
984966

985967
ret = method(ts, 4, sample_set_sizes, samples, 0, set_indexes, 0, NULL,
986968
TSK_STAT_SITE, &result);
@@ -3684,7 +3666,7 @@ test_two_locus_stat_input_errors(void)
36843666
NULL, 0, result);
36853667
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_INDEX_TUPLES);
36863668

3687-
num_sample_sets = 1;
3669+
num_sample_sets = 0;
36883670
num_index_tuples = 1;
36893671
ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,
36903672
num_index_tuples, index_tuples, num_sites, row_sites, NULL, num_sites, col_sites,

c/tskit/trees.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4515,7 +4515,7 @@ check_sample_stat_inputs(tsk_size_t num_sample_sets, tsk_size_t tuple_size,
45154515
{
45164516
int ret = 0;
45174517

4518-
if (num_sample_sets < tuple_size) {
4518+
if (num_sample_sets < 1) {
45194519
ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
45204520
goto out;
45214521
}
@@ -4624,6 +4624,7 @@ tsk_treeseq_genetic_relatedness(const tsk_treeseq_t *self, tsk_size_t num_sample
46244624
const double *windows, tsk_flags_t options, double *result)
46254625
{
46264626
int ret = 0;
4627+
// we want to allow self comparisons
46274628
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
46284629
if (ret != 0) {
46294630
goto out;

python/CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@
4747
when using ``TableCollection.assert_equals`` and ``Table.assert_equals``.
4848
(:user:`benjeffery`, :pr:`3246`, :issue:`3244`)
4949

50+
- k-way statistics no longer require k sample sets, allowing in particular
51+
"self" comparisons for `TreeSequence.genetic_relatedness`. This changes the
52+
error code returned in some situations.
53+
(:user:`andrewkern`, :user:`petrelharp`, :pr:`3235`, :issue:`3055`)
54+
55+
5056
**Breaking changes**
5157

5258
- ``ltrim``, ``rtrim``, ``trim`` and ``shift`` raise an error if used on a tree sequence

python/tests/test_lowlevel.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1871,17 +1871,13 @@ def test_ld_matrix_multipop(self, stat_method_name):
18711871
stat_method(
18721872
bad_ss_sizes, bad_ss, indexes, None, None, row_pos, col_pos, "branch"
18731873
)
1874-
with pytest.raises(
1875-
_tskit.LibraryError, match="TSK_ERR_INSUFFICIENT_SAMPLE_SETS"
1876-
):
1874+
with pytest.raises(_tskit.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"):
18771875
bad_ss = np.array([], dtype=np.int32)
18781876
bad_ss_sizes = np.array([0], dtype=np.uint32)
18791877
stat_method(
18801878
bad_ss_sizes, bad_ss, indexes, row_sites, col_sites, None, None, "site"
18811879
)
1882-
with pytest.raises(
1883-
_tskit.LibraryError, match="TSK_ERR_INSUFFICIENT_SAMPLE_SETS"
1884-
):
1880+
with pytest.raises(_tskit.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"):
18851881
bad_ss = np.array([], dtype=np.int32)
18861882
bad_ss_sizes = np.array([0], dtype=np.uint32)
18871883
stat_method(

python/tests/test_tree_stats.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2307,6 +2307,46 @@ def test_shapes(self, proportion):
23072307
else:
23082308
assert x.shape == (2,)
23092309

2310+
def test_single_sample_set_self_comparison(self, ts_12_highrecomb_fixture):
2311+
if self.mode is None:
2312+
return
2313+
# Test for issue #3055 - self-comparisons with single sample set
2314+
ts = ts_12_highrecomb_fixture
2315+
# Single sample set with self-comparison
2316+
result = ts.genetic_relatedness([[0]], indexes=[(0, 0)], mode=self.mode)
2317+
result_shape = (ts.num_nodes, 1) if self.mode == "node" else (1,)
2318+
assert result.shape == result_shape
2319+
# Should work for multiple samples in single set too
2320+
result = ts.genetic_relatedness([[0, 1, 2]], indexes=[(0, 0)], mode=self.mode)
2321+
assert result.shape == result_shape
2322+
# Test with multiple self-comparisons
2323+
result = ts.genetic_relatedness(
2324+
[[0, 1], [2, 3]], indexes=[(0, 0), (1, 1)], mode=self.mode
2325+
)
2326+
result_shape = (ts.num_nodes, 2) if self.mode == "node" else (2,)
2327+
assert result.shape == result_shape
2328+
2329+
def test_single_sample_set_invalid_indexes(self, ts_12_highrecomb_fixture):
2330+
if self.mode is None:
2331+
return
2332+
# Test that invalid indexes raise ValueError with single sample set
2333+
ts = ts_12_highrecomb_fixture
2334+
# Index out of bounds (only have 1 sample set, but trying to access index 1)
2335+
with pytest.raises(
2336+
exceptions.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"
2337+
):
2338+
ts.genetic_relatedness([[0]], indexes=[(0, 1)], mode=self.mode)
2339+
# Negative index
2340+
with pytest.raises(
2341+
exceptions.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"
2342+
):
2343+
ts.genetic_relatedness([[0]], indexes=[(-1, 0)], mode=self.mode)
2344+
# Both indexes out of bounds
2345+
with pytest.raises(
2346+
exceptions.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"
2347+
):
2348+
ts.genetic_relatedness([[0, 1]], indexes=[(2, 2)], mode=self.mode)
2349+
23102350

23112351
class TestBranchGeneticRelatedness(TestGeneticRelatedness, TopologyExamplesMixin):
23122352
mode = "branch"

0 commit comments

Comments
 (0)