Skip to content

Commit 53f20ad

Browse files
committed
Tidy up
1 parent 7564875 commit 53f20ad

File tree

1 file changed

+38
-37
lines changed

1 file changed

+38
-37
lines changed

python/tests/test_imputation.py

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1-
from io import StringIO
1+
"""
2+
Tests for genotype imputation (forward and Baum-Welsh algorithms).
3+
"""
4+
import io
25

36
import numpy as np
47
import pandas as pd
58

69
import tskit
710

811

9-
"""
10-
A tree sequence containing 3 diploid individuals with 5 sites and 5 mutations
11-
(one per site). The first 2 individuals are used as reference panel,
12-
the last one is the target individual.
13-
"""
12+
# A tree sequence containing 3 diploid individuals with 5 sites and 5 mutations
13+
# (one per site). The first 2 individuals are used as reference panel,
14+
# the last one is the target individual.
15+
1416
_toy_ts_nodes_text = """\
1517
id is_sample time population individual metadata
1618
0 1 0.000000 0 0
@@ -81,41 +83,40 @@ def get_toy_ts():
8183
Returns the toy tree sequence in text format above.
8284
"""
8385
ts = tskit.load_text(
84-
nodes=StringIO(_toy_ts_nodes_text),
85-
edges=StringIO(_toy_ts_edges_text),
86-
sites=StringIO(_toy_ts_sites_text),
87-
mutations=StringIO(_toy_ts_mutations_text),
88-
individuals=StringIO(_toy_ts_individuals_text),
86+
nodes=io.StringIO(_toy_ts_nodes_text),
87+
edges=io.StringIO(_toy_ts_edges_text),
88+
sites=io.StringIO(_toy_ts_sites_text),
89+
mutations=io.StringIO(_toy_ts_mutations_text),
90+
individuals=io.StringIO(_toy_ts_individuals_text),
8991
strict=False,
9092
)
9193
return ts
9294

9395

94-
"""
95-
BEAGLE 4.1 was run on the toy data set above using default parameters.
96-
The following are the forward probability matrices and backward probability
97-
matrices calculated when imputing into the third individual above. There are
98-
two sets of matrices, one for each haplotype.
99-
100-
Notes about calculations:
101-
n = number of haplotypes in ref. panel
102-
M = number of markers
103-
m = index of marker (site)
104-
h = index of haplotype in ref. panel
105-
106-
In forward probability matrix,
107-
fwd[m][h] = emission prob., if m = 0 (first marker)
108-
fwd[m][h] = emission prob. * (scale * fwd[m - 1][h] + shift), otherwise
109-
where scale = (1 - switch prob.)/sum of fwd[m - 1],
110-
and shift = switch prob./n.
111-
112-
In backward probability matrix,
113-
bwd[m][h] = 1, if m = M - 1 (last marker) // DON'T SEE THIS IN BEAGLE
114-
unadj. bwd[m][h] = emission prob. / n
115-
bwd[m][h] = (unadj. bwd[m][h] + shift) * scale, otherwise
116-
where scale = (1 - switch prob.)/sum of unadj. bwd[m],
117-
and shift = switch prob./n.
118-
"""
96+
# BEAGLE 4.1 was run on the toy data set above using default parameters.
97+
# The following are the forward probability matrices and backward probability
98+
# matrices calculated when imputing into the third individual above. There are
99+
# two sets of matrices, one for each haplotype.
100+
#
101+
# Notes about calculations:
102+
# n = number of haplotypes in ref. panel
103+
# M = number of markers
104+
# m = index of marker (site)
105+
# h = index of haplotype in ref. panel
106+
#
107+
# In forward probability matrix,
108+
# fwd[m][h] = emission prob., if m = 0 (first marker)
109+
# fwd[m][h] = emission prob. * (scale * fwd[m - 1][h] + shift), otherwise
110+
# where scale = (1 - switch prob.)/sum of fwd[m - 1],
111+
# and shift = switch prob./n.
112+
#
113+
# In backward probability matrix,
114+
# bwd[m][h] = 1, if m = M - 1 (last marker) // DON'T SEE THIS IN BEAGLE
115+
# unadj. bwd[m][h] = emission prob. / n
116+
# bwd[m][h] = (unadj. bwd[m][h] + shift) * scale, otherwise
117+
# where scale = (1 - switch prob.)/sum of unadj. bwd[m],
118+
# and shift = switch prob./n.
119+
119120
_fwd_matrix_text_1 = """
120121
m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
121122
0,0,0.000000,1.000000,0.999900,0.000100,1,0,0.000000,1.000000,0.000100,0.000100
@@ -201,7 +202,7 @@ def convert_to_pd_df(matrix_text):
201202
"""
202203
Converts a matrix in text to a Pandas dataframe and returns it.
203204
"""
204-
df = pd.read_csv(StringIO(matrix_text))
205+
df = pd.read_csv(io.StringIO(matrix_text))
205206
for i in np.arange(df.shape[0]):
206207
# Check that switch and non-switch probabilities sum to 1
207208
assert df.probRec[i] + df.probNoRec[i] == 1 or np.isnan(df.probRec[i])

0 commit comments

Comments
 (0)