Skip to content

Commit 57ad49f

Browse files
committed
removed restriction on having >1 sample set for relatedness
1 parent cd830f7 commit 57ad49f

File tree

3 files changed

+42
-95
lines changed

3 files changed

+42
-95
lines changed

c/tskit/trees.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4470,12 +4470,12 @@ tsk_treeseq_pi2_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
44704470
***********************************/
44714471

44724472
static int
4473-
check_sample_stat_inputs(tsk_size_t num_sample_sets, tsk_size_t tuple_size,
4474-
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples)
4473+
check_sample_stat_inputs(tsk_size_t num_sample_sets, tsk_size_t min_num_sample_sets,
4474+
tsk_size_t tuple_size, tsk_size_t num_index_tuples, const tsk_id_t *index_tuples)
44754475
{
44764476
int ret = 0;
44774477

4478-
if (num_sample_sets < tuple_size) {
4478+
if (num_sample_sets < min_num_sample_sets) {
44794479
ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
44804480
goto out;
44814481
}
@@ -4520,7 +4520,7 @@ tsk_treeseq_divergence(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
45204520
const double *windows, tsk_flags_t options, double *result)
45214521
{
45224522
int ret = 0;
4523-
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
4523+
ret = check_sample_stat_inputs(num_sample_sets, 2, 2, num_index_tuples, index_tuples);
45244524
if (ret != 0) {
45254525
goto out;
45264526
}
@@ -4584,7 +4584,8 @@ tsk_treeseq_genetic_relatedness(const tsk_treeseq_t *self, tsk_size_t num_sample
45844584
const double *windows, tsk_flags_t options, double *result)
45854585
{
45864586
int ret = 0;
4587-
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
4587+
// we want to allow self comparisons
4588+
ret = check_sample_stat_inputs(num_sample_sets, 1, 2, num_index_tuples, index_tuples);
45884589
if (ret != 0) {
45894590
goto out;
45904591
}
@@ -4729,7 +4730,7 @@ tsk_treeseq_Y2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
47294730
const double *windows, tsk_flags_t options, double *result)
47304731
{
47314732
int ret = 0;
4732-
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
4733+
ret = check_sample_stat_inputs(num_sample_sets, 2, 2, num_index_tuples, index_tuples);
47334734
if (ret != 0) {
47344735
goto out;
47354736
}
@@ -4770,7 +4771,7 @@ tsk_treeseq_f2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
47704771
const double *windows, tsk_flags_t options, double *result)
47714772
{
47724773
int ret = 0;
4773-
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
4774+
ret = check_sample_stat_inputs(num_sample_sets, 2, 2, num_index_tuples, index_tuples);
47744775
if (ret != 0) {
47754776
goto out;
47764777
}
@@ -4816,7 +4817,7 @@ tsk_treeseq_Y3(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
48164817
const double *windows, tsk_flags_t options, double *result)
48174818
{
48184819
int ret = 0;
4819-
ret = check_sample_stat_inputs(num_sample_sets, 3, num_index_tuples, index_tuples);
4820+
ret = check_sample_stat_inputs(num_sample_sets, 3, 3, num_index_tuples, index_tuples);
48204821
if (ret != 0) {
48214822
goto out;
48224823
}
@@ -4859,7 +4860,7 @@ tsk_treeseq_f3(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
48594860
const double *windows, tsk_flags_t options, double *result)
48604861
{
48614862
int ret = 0;
4862-
ret = check_sample_stat_inputs(num_sample_sets, 3, num_index_tuples, index_tuples);
4863+
ret = check_sample_stat_inputs(num_sample_sets, 3, 3, num_index_tuples, index_tuples);
48634864
if (ret != 0) {
48644865
goto out;
48654866
}
@@ -4908,7 +4909,7 @@ tsk_treeseq_f4(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
49084909
const double *windows, tsk_flags_t options, double *result)
49094910
{
49104911
int ret = 0;
4911-
ret = check_sample_stat_inputs(num_sample_sets, 4, num_index_tuples, index_tuples);
4912+
ret = check_sample_stat_inputs(num_sample_sets, 4, 4, num_index_tuples, index_tuples);
49124913
if (ret != 0) {
49134914
goto out;
49144915
}

python/tests/test_tree_stats.py

Lines changed: 31 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -2307,32 +2307,49 @@ def test_shapes(self, proportion):
23072307
else:
23082308
assert x.shape == (2,)
23092309

2310-
2311-
class TestBranchGeneticRelatedness(TestGeneticRelatedness, TopologyExamplesMixin):
2312-
mode = "branch"
2313-
23142310
def test_single_sample_set_self_comparison(self, ts_12_highrecomb_fixture):
2311+
if self.mode is None:
2312+
return
23152313
# Test for issue #3055 - self-comparisons with single sample set
23162314
ts = ts_12_highrecomb_fixture
23172315
# Single sample set with self-comparison
2318-
result = ts.genetic_relatedness([[0]], indexes=[(0, 0)], mode="branch")
2319-
assert result.shape == (1,)
2316+
result = ts.genetic_relatedness([[0]], indexes=[(0, 0)], mode=self.mode)
2317+
result_shape = (ts.num_nodes, 1) if self.mode == "node" else (1,)
2318+
assert result.shape == result_shape
23202319
# Should work for multiple samples in single set too
2321-
result = ts.genetic_relatedness([[0, 1, 2]], indexes=[(0, 0)], mode="branch")
2322-
assert result.shape == (1,)
2320+
result = ts.genetic_relatedness([[0, 1, 2]], indexes=[(0, 0)], mode=self.mode)
2321+
assert result.shape == result_shape
2322+
# Test with multiple self-comparisons
2323+
result = ts.genetic_relatedness(
2324+
[[0, 1], [2, 3]], indexes=[(0, 0), (1, 1)], mode=self.mode
2325+
)
2326+
result_shape = (ts.num_nodes, 2) if self.mode == "node" else (2,)
2327+
assert result.shape == result_shape
23232328

23242329
def test_single_sample_set_invalid_indexes(self, ts_12_highrecomb_fixture):
2330+
if self.mode is None:
2331+
return
23252332
# Test that invalid indexes raise ValueError with single sample set
23262333
ts = ts_12_highrecomb_fixture
23272334
# Index out of bounds (only have 1 sample set, but trying to access index 1)
2328-
with pytest.raises(ValueError, match="Index out of bounds"):
2329-
ts.genetic_relatedness([[0]], indexes=[(0, 1)], mode="branch")
2335+
with pytest.raises(
2336+
exceptions.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"
2337+
):
2338+
ts.genetic_relatedness([[0]], indexes=[(0, 1)], mode=self.mode)
23302339
# Negative index
2331-
with pytest.raises(ValueError, match="Index out of bounds"):
2332-
ts.genetic_relatedness([[0]], indexes=[(-1, 0)], mode="branch")
2340+
with pytest.raises(
2341+
exceptions.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"
2342+
):
2343+
ts.genetic_relatedness([[0]], indexes=[(-1, 0)], mode=self.mode)
23332344
# Both indexes out of bounds
2334-
with pytest.raises(ValueError, match="Index out of bounds"):
2335-
ts.genetic_relatedness([[0, 1]], indexes=[(2, 2)], mode="branch")
2345+
with pytest.raises(
2346+
exceptions.LibraryError, match="TSK_ERR_BAD_SAMPLE_SET_INDEX"
2347+
):
2348+
ts.genetic_relatedness([[0, 1]], indexes=[(2, 2)], mode=self.mode)
2349+
2350+
2351+
class TestBranchGeneticRelatedness(TestGeneticRelatedness, TopologyExamplesMixin):
2352+
mode = "branch"
23362353

23372354
@pytest.mark.parametrize("polarised", [True, False])
23382355
def test_simple_tree_noncentred(self, polarised):
@@ -2388,61 +2405,10 @@ def test_simple_tree_noncentred(self, polarised):
23882405
class TestNodeGeneticRelatedness(TestGeneticRelatedness, TopologyExamplesMixin):
23892406
mode = "node"
23902407

2391-
def test_single_sample_set_self_comparison(self, ts_12_highrecomb_fixture):
2392-
# Test for issue #3055 - self-comparisons with single sample set
2393-
ts = ts_12_highrecomb_fixture
2394-
# Single sample set with self-comparison
2395-
result = ts.genetic_relatedness([[0]], indexes=[(0, 0)], mode="node")
2396-
assert result.shape == (ts.num_nodes, 1)
2397-
# Should work for multiple samples in single set too
2398-
result = ts.genetic_relatedness([[0, 1, 2]], indexes=[(0, 0)], mode="node")
2399-
assert result.shape == (ts.num_nodes, 1)
2400-
2401-
def test_single_sample_set_invalid_indexes(self, ts_12_highrecomb_fixture):
2402-
# Test that invalid indexes raise ValueError with single sample set
2403-
ts = ts_12_highrecomb_fixture
2404-
# Index out of bounds (only have 1 sample set, but trying to access index 1)
2405-
with pytest.raises(ValueError, match="Index out of bounds"):
2406-
ts.genetic_relatedness([[0]], indexes=[(0, 1)], mode="node")
2407-
# Negative index
2408-
with pytest.raises(ValueError, match="Index out of bounds"):
2409-
ts.genetic_relatedness([[0]], indexes=[(-1, 0)], mode="node")
2410-
# Both indexes out of bounds
2411-
with pytest.raises(ValueError, match="Index out of bounds"):
2412-
ts.genetic_relatedness([[0, 1]], indexes=[(2, 2)], mode="node")
2413-
24142408

24152409
class TestSiteGeneticRelatedness(TestGeneticRelatedness, MutatedTopologyExamplesMixin):
24162410
mode = "site"
24172411

2418-
def test_single_sample_set_self_comparison(self, ts_12_highrecomb_fixture):
2419-
# Test for issue #3055 - self-comparisons with single sample set
2420-
ts = ts_12_highrecomb_fixture
2421-
# Single sample set with self-comparison
2422-
result = ts.genetic_relatedness([[0]], indexes=[(0, 0)], mode="site")
2423-
assert result.shape == (1,)
2424-
# Should work for multiple samples in single set too
2425-
result = ts.genetic_relatedness([[0, 1, 2]], indexes=[(0, 0)], mode="site")
2426-
assert result.shape == (1,)
2427-
# Test with multiple self-comparisons
2428-
result = ts.genetic_relatedness(
2429-
[[0], [1]], indexes=[(0, 0), (1, 1)], mode="site"
2430-
)
2431-
assert result.shape == (2,)
2432-
2433-
def test_single_sample_set_invalid_indexes(self, ts_12_highrecomb_fixture):
2434-
# Test that invalid indexes raise ValueError with single sample set
2435-
ts = ts_12_highrecomb_fixture
2436-
# Index out of bounds (only have 1 sample set, but trying to access index 1)
2437-
with pytest.raises(ValueError, match="Index out of bounds"):
2438-
ts.genetic_relatedness([[0]], indexes=[(0, 1)], mode="site")
2439-
# Negative index
2440-
with pytest.raises(ValueError, match="Index out of bounds"):
2441-
ts.genetic_relatedness([[0]], indexes=[(-1, 0)], mode="site")
2442-
# Both indexes out of bounds
2443-
with pytest.raises(ValueError, match="Index out of bounds"):
2444-
ts.genetic_relatedness([[0, 1]], indexes=[(2, 2)], mode="site")
2445-
24462412
def test_match_K_c0(self):
24472413
# This test checks that ts.genetic_relatedness() matches K_c0
24482414
# from Speed & Balding (2014) https://www.nature.com/articles/nrg3821

python/tskit/trees.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8106,7 +8106,6 @@ def __k_way_sample_set_stat(
81068106
span_normalise=True,
81078107
polarised=False,
81088108
centre=True,
8109-
allow_self_comparisons=False,
81108109
):
81118110
sample_set_sizes = np.array(
81128111
[len(sample_set) for sample_set in sample_sets], dtype=np.uint32
@@ -8133,24 +8132,6 @@ def __k_way_sample_set_stat(
81338132
"Indexes must be convertable to a 2D numpy array with {} "
81348133
"columns".format(k)
81358134
)
8136-
# For genetic_relatedness, we allow self-comparisons with a single sample set
8137-
if allow_self_comparisons and len(sample_sets) < k:
8138-
# Check that all indexes are valid
8139-
if np.any(indexes >= len(sample_sets)) or np.any(indexes < 0):
8140-
raise ValueError("Index out of bounds")
8141-
# Find which sample sets we actually need
8142-
unique_indexes = np.unique(indexes)
8143-
if np.max(unique_indexes) < len(sample_sets):
8144-
# we need to pad with dummy sets to satisfy the C side
8145-
# requirement of having at least k sets
8146-
sample_sets = list(sample_sets)
8147-
sample_set_sizes = list(sample_set_sizes)
8148-
while len(sample_sets) < k:
8149-
# Add a dummy sample set that won't be used
8150-
sample_sets.append(np.array([sample_sets[0][0]], dtype=np.int32))
8151-
sample_set_sizes.append(1)
8152-
sample_set_sizes = np.array(sample_set_sizes, dtype=np.uint32)
8153-
flattened = util.safe_np_int_cast(np.hstack(sample_sets), np.int32)
81548135
stat = self.__run_windowed_stat(
81558136
windows,
81568137
ll_method,
@@ -8721,7 +8702,6 @@ def genetic_relatedness(
87218702
span_normalise=span_normalise,
87228703
polarised=polarised,
87238704
centre=centre,
8724-
allow_self_comparisons=True,
87258705
)
87268706
if proportion:
87278707
# TODO this should be done in C also

0 commit comments

Comments
 (0)