6
6
import hvplot .pandas
7
7
import holoviews as hv
8
8
import pandas as pd
9
- import holoviews . operation . datashader as hd
9
+
10
10
import tskit
11
11
import utils
12
- import bokeh . models as bkm
12
+
13
13
import pathlib
14
14
import functools
15
+ import model
16
+ import pages
17
+
15
18
16
19
logger = logging .Logger (__file__ )
17
20
18
21
# Usage: panel serve app.py --args /path/to/trees-file
19
22
path = pathlib .Path (sys .argv [1 ])
20
- trees_file = path .name
21
- logger .warning (f"Loading { path } " )
22
- ts = tskit .load (path )
23
- ti = utils .TreeInfo (ts , 1 )
24
-
25
- # NOTE using "warning" here so that we can get some output
26
- # from them. Will need to do this better at some point,
27
- # with configurable output levels.
28
- logger .warning (f"Computing mutations data frame" )
29
- df_mutations = ti .mutations_data ()
30
- logger .warning (f"Computing edges data frame" )
31
- df_edges = ti .edges_data ()
32
- logger .warning (f"Computing Trees data frame" )
33
- df_trees = ti .trees_data ()
34
- # TODO REMOVE
35
- logger .warning (f"Done" )
36
-
37
- # Global plot settings
38
- plot_width = 1000
39
- plot_height = 600
40
- threshold = 1000 # max number of points to overlay on a plot
41
-
42
-
43
- def filter_points (points , x_range , y_range ):
44
- if x_range and y_range :
45
- return points [x_range , y_range ]
46
- return points
47
-
48
-
49
- def hover_points (points , threshold = 5000 ):
50
- ### Return points to interact with via hover if there are fewer than threshold
51
- if len (points ) > threshold :
52
- return points .iloc [:0 ]
53
- return points
54
-
55
-
56
- def shaded_points (points , threshold = 5000 ):
57
- ### Return points to datashade if there are more than threshold
58
- if len (points ) > threshold :
59
- return points
60
- return points .iloc [:0 ]
61
-
62
-
63
- def make_hist_on_axis (dimension , points , num_bins = 30 ):
64
- ### Make histogram function for a specified axis of a scatter plot
65
- def compute_hist (x_range , y_range ):
66
- filtered_points = filter_points (points , x_range , y_range )
67
- hist = hv .operation .histogram (
68
- filtered_points , dimension = dimension , num_bins = num_bins , normed = "height"
69
- )
70
- return hist
71
-
72
- return compute_hist
73
-
74
-
75
- def make_hist (data , title , bins_range , log_y = True , plot_width = 800 ):
76
- ### Make histogram from given count data
77
- count , bins = np .histogram (data , bins = bins_range )
78
- ylabel = "log(Count)" if log_y else "Count"
79
- np .seterr (divide = "ignore" )
80
- if log_y :
81
- count = np .log10 (count )
82
- count [count == - np .inf ] = 0
83
- histogram = hv .Histogram ((count , bins )).opts (
84
- title = title , ylabel = ylabel , tools = ["hover" ]
85
- )
86
- histogram = histogram .opts (shared_axes = False , width = round (plot_width / 2 ))
87
- return histogram
88
-
89
-
90
- def make_hist_matplotlib (data , title , num_bins = "auto" , log_y = True , xlim = (None , None )):
91
- ### Make histogram from given count data using parameters suitable for the matplotlib backend
92
- # TODO return something sensible when data contains one row
93
- if xlim [1 ] is not None :
94
- data = data [data < xlim [1 ]]
95
- if xlim [0 ] is not None :
96
- data = data [data > xlim [0 ]]
97
- count , bins = np .histogram (data , bins = num_bins )
98
- ylabel = "log(Count)" if log_y else "Count"
99
- np .seterr (divide = "ignore" )
100
- if log_y :
101
- count = np .log10 (count )
102
- count [count == - np .inf ] = 0
103
- return hv .Histogram ((count , bins )).opts (title = title , ylabel = ylabel )
104
-
105
-
106
- def make_hist_panel (log_y ):
107
- ### Make row of histograms for holoviews panel
108
- overall_site_hist = make_hist (
109
- ti .sites_num_mutations ,
110
- "Mutations per site" ,
111
- range (29 ),
112
- log_y = log_y ,
113
- plot_width = plot_width ,
114
- )
115
- overall_node_hist = make_hist (
116
- ti .nodes_num_mutations ,
117
- "Mutations per node" ,
118
- range (10 ),
119
- log_y = log_y ,
120
- plot_width = plot_width ,
121
- )
122
- return pn .Row (overall_site_hist , overall_node_hist )
123
-
124
-
125
- def page1 ():
126
- return pn .pane .HTML (ts )
127
- # hv_layout
128
-
129
-
130
- def page2 ():
131
- hv .extension ("bokeh" )
132
- plot_width = 1000
133
- log_y_checkbox = pn .widgets .Checkbox (
134
- name = "Log y-axis of Mutations per site/node plots" , value = False
135
- )
136
-
137
- points = df_mutations .hvplot .scatter (
138
- x = "position" ,
139
- y = "time" ,
140
- hover_cols = ["position" , "time" , "mutation_node" , "node_flag" ],
141
- ).opts (width = plot_width , height = plot_height )
142
-
143
- range_stream = hv .streams .RangeXY (source = points )
144
- streams = [range_stream ]
145
-
146
- filtered = points .apply (filter_points , streams = streams )
147
- time_hist = hv .DynamicMap (
148
- make_hist_on_axis (dimension = "time" , points = points , num_bins = 10 ), streams = streams
149
- )
150
- site_hist = hv .DynamicMap (
151
- make_hist_on_axis (dimension = "position" , points = points , num_bins = 10 ),
152
- streams = streams ,
153
- )
154
- hover = filtered .apply (hover_points , threshold = threshold )
155
- shaded = hd .datashade (filtered , width = 400 , height = 400 , streams = streams )
156
-
157
- main = (shaded * hover ).opts (
158
- hv .opts .Points (tools = ["hover" ], alpha = 0.1 , hover_alpha = 0.2 , size = 10 )
159
- )
160
-
161
- hist_panel = pn .bind (make_hist_panel , log_y = log_y_checkbox )
162
-
163
- plot_options = pn .Column (
164
- pn .pane .Markdown ("## Plot Options" ),
165
- log_y_checkbox ,
166
- )
167
-
168
- return pn .Column (main << time_hist << site_hist , hist_panel , plot_options )
169
-
170
-
171
- def page3 ():
172
- hv .extension ("bokeh" )
173
- df_edges ["parent_time_right" ] = df_edges ["parent_time" ]
174
- lines = hv .Segments (
175
- df_edges , kdims = ["left" , "parent_time" , "right" , "parent_time_right" ]
176
- )
177
- range_stream = hv .streams .RangeXY (source = lines )
178
- streams = [range_stream ]
179
- filtered = lines .apply (filter_points , streams = streams )
180
- hover = filtered .apply (hover_points , threshold = threshold )
181
- shaded = hd .datashade (filtered , streams = streams )
182
- hover_tool = bkm .HoverTool (
183
- tooltips = [
184
- ("child" , "@child" ),
185
- ("parent" , "@parent" ),
186
- ("span" , "@span" ),
187
- ("branch_length" , "@branch_length" ),
188
- ]
189
- )
190
- main = (shaded * hover ).opts (
191
- hv .opts .Segments (
192
- tools = [hover_tool ],
193
- width = plot_width ,
194
- height = plot_height ,
195
- xlabel = "Position" ,
196
- ylabel = "Time" ,
197
- )
198
- )
199
-
200
- return pn .Column (main )
201
-
202
-
203
- def page4 ():
204
- hv .extension ("bokeh" )
205
- node_id_input = pn .widgets .TextInput (value = "" , name = "Node ID" )
206
- df_edges ["parent_time_right" ] = df_edges ["parent_time" ]
207
- tabulator = pn .widgets .Tabulator (show_index = False )
208
-
209
- def plot_data (node_id ):
210
- if len (node_id ) > 0 :
211
- filtered_df = df_edges [df_edges ["child" ] == int (node_id )]
212
- segments = hv .Segments (
213
- filtered_df ,
214
- kdims = ["left" , "parent_time" , "right" , "parent_time_right" ],
215
- vdims = ["child" , "parent" , "span" , "branch_length" ],
216
- )
217
- hover_tool = bkm .HoverTool (
218
- tooltips = [
219
- ("child" , "@child" ),
220
- ("parent" , "@parent" ),
221
- ("span" , "@span" ),
222
- ("branch_length" , "@branch_length" ),
223
- ]
224
- )
225
- segments = segments .opts (
226
- width = plot_width ,
227
- height = plot_height ,
228
- tools = [hover_tool ],
229
- xlabel = "Position" ,
230
- ylabel = "Time" ,
231
- )
232
-
233
- filtered_df = filtered_df .drop (columns = ["parent_time_right" ])
234
- tabulator .value = filtered_df
235
-
236
- return segments
237
- else :
238
- return pn .pane .Markdown ("Please enter a Node ID." )
239
-
240
- dynamic_plot = pn .bind (plot_data , node_id = node_id_input )
241
-
242
- return pn .Column (node_id_input , dynamic_plot , tabulator )
243
-
244
-
245
- def page5 ():
246
- hv .extension ("matplotlib" )
247
- bins = min (50 , int (np .sqrt (len (df_trees ))))
248
-
249
- sites_hist_func = functools .partial (
250
- make_hist_matplotlib ,
251
- df_trees .num_sites ,
252
- "Sites per tree" ,
253
- num_bins = bins ,
254
- log_y = True ,
255
- )
256
-
257
- log_y_checkbox = pn .widgets .Checkbox (name = "log y-axis" , value = True )
258
-
259
- sites_hist_panel = pn .bind (
260
- sites_hist_func ,
261
- log_y = log_y_checkbox ,
262
- )
263
-
264
- spans = df_trees .right - df_trees .left
265
-
266
- spans_hist_func = functools .partial (
267
- make_hist_matplotlib ,
268
- spans ,
269
- "Genomic span per tree" ,
270
- num_bins = bins ,
271
- log_y = True ,
272
- )
273
-
274
- spans_hist_panel = pn .bind (
275
- spans_hist_func ,
276
- log_y = log_y_checkbox ,
277
- )
278
-
279
- muts_hist_func = functools .partial (
280
- make_hist_matplotlib ,
281
- df_trees .num_mutations ,
282
- "Mutations per tree" ,
283
- num_bins = bins ,
284
- log_y = True ,
285
- )
286
-
287
- muts_hist_panel = pn .bind (
288
- muts_hist_func ,
289
- log_y = log_y_checkbox ,
290
- )
291
-
292
- tbl_hist_func = functools .partial (
293
- make_hist_matplotlib ,
294
- df_trees .total_branch_length ,
295
- "Total branch length per tree" ,
296
- num_bins = bins ,
297
- log_y = True ,
298
- )
299
-
300
- tbl_hist_panel = pn .bind (
301
- tbl_hist_func ,
302
- log_y = log_y_checkbox ,
303
- )
304
-
305
- mean_arity_hist_func = functools .partial (
306
- make_hist_matplotlib ,
307
- df_trees .mean_internal_arity ,
308
- f"Mean arity per tree \n (not yet implemented)" ,
309
- num_bins = bins ,
310
- log_y = True ,
311
- )
312
-
313
- mean_arity_hist_panel = pn .bind (
314
- mean_arity_hist_func ,
315
- log_y = log_y_checkbox ,
316
- )
317
-
318
- max_arity_hist_func = functools .partial (
319
- make_hist_matplotlib ,
320
- df_trees .max_internal_arity ,
321
- "Max arity per tree" ,
322
- num_bins = bins ,
323
- log_y = True ,
324
- )
325
-
326
- max_arity_hist_panel = pn .bind (
327
- max_arity_hist_func ,
328
- log_y = log_y_checkbox ,
329
- )
330
-
331
- plot_options = pn .Column (
332
- pn .pane .Markdown ("# Plot Options" ),
333
- log_y_checkbox ,
334
- )
335
-
336
- hist_panel = pn .Column (
337
- pn .Row (
338
- sites_hist_panel ,
339
- spans_hist_panel ,
340
- muts_hist_panel ,
341
- ),
342
- pn .Row (tbl_hist_panel , mean_arity_hist_panel , max_arity_hist_panel ),
343
- )
344
-
345
- return pn .Column (hist_panel , plot_options )
23
+ tsm = model .TSModel (tskit .load (path ), path .name )
346
24
347
25
348
26
pn .extension (sizing_mode = "stretch_width" )
349
27
pn .extension ("tabulator" )
350
28
351
29
pages = {
352
- "Overview" : page1 ,
353
- "Mutations" : page2 ,
354
- "Edges" : page3 ,
355
- "Edge Explorer" : page4 ,
356
- "Trees" : page5 ,
30
+ "Overview" : pages . overview ,
31
+ "Mutations" : pages . mutations ,
32
+ "Edges" : pages . edges ,
33
+ "Edge Explorer" : pages . edge_explorer ,
34
+ "Trees" : pages . trees ,
357
35
}
358
36
359
37
360
38
def show (page ):
361
- return pages [page ]()
39
+ return pages [page ](tsm )
362
40
363
41
364
42
starting_page = pn .state .session_args .get ("page" , [b"Overview" ])[0 ].decode ()
@@ -380,7 +58,7 @@ def show(page):
380
58
"header_background" : ACCENT_COLOR ,
381
59
}
382
60
pn .template .FastListTemplate (
383
- title = f" { trees_file } " ,
61
+ title = tsm . name ,
384
62
sidebar = [page ],
385
63
main = [ishow ],
386
64
** DEFAULT_PARAMS ,
0 commit comments