Skip to content

Commit d405315

Browse files
Merge pull request #58 from benjeffery/refactor
Refactor to separate data model from plots, separate pages into files
2 parents be37415 + a34e95d commit d405315

File tree

13 files changed

+784
-549
lines changed

13 files changed

+784
-549
lines changed

app.py

Lines changed: 13 additions & 335 deletions
Original file line numberDiff line numberDiff line change
@@ -6,359 +6,37 @@
66
import hvplot.pandas
77
import holoviews as hv
88
import pandas as pd
9-
import holoviews.operation.datashader as hd
9+
1010
import tskit
1111
import utils
12-
import bokeh.models as bkm
12+
1313
import pathlib
1414
import functools
15+
import model
16+
import pages
17+
1518

1619
logger = logging.Logger(__file__)
1720

1821
# Usage: panel serve app.py --args /path/to/trees-file
1922
path = pathlib.Path(sys.argv[1])
20-
trees_file = path.name
21-
logger.warning(f"Loading {path}")
22-
ts = tskit.load(path)
23-
ti = utils.TreeInfo(ts, 1)
24-
25-
# NOTE using "warning" here so that we can get some output
26-
# from them. Will need to do this better at some point,
27-
# with configurable output levels.
28-
logger.warning(f"Computing mutations data frame")
29-
df_mutations = ti.mutations_data()
30-
logger.warning(f"Computing edges data frame")
31-
df_edges = ti.edges_data()
32-
logger.warning(f"Computing Trees data frame")
33-
df_trees = ti.trees_data()
34-
# TODO REMOVE
35-
logger.warning(f"Done")
36-
37-
# Global plot settings
38-
plot_width = 1000
39-
plot_height = 600
40-
threshold = 1000 # max number of points to overlay on a plot
41-
42-
43-
def filter_points(points, x_range, y_range):
44-
if x_range and y_range:
45-
return points[x_range, y_range]
46-
return points
47-
48-
49-
def hover_points(points, threshold=5000):
50-
### Return points to interact with via hover if there are fewer than threshold
51-
if len(points) > threshold:
52-
return points.iloc[:0]
53-
return points
54-
55-
56-
def shaded_points(points, threshold=5000):
57-
### Return points to datashade if there are more than threshold
58-
if len(points) > threshold:
59-
return points
60-
return points.iloc[:0]
61-
62-
63-
def make_hist_on_axis(dimension, points, num_bins=30):
64-
### Make histogram function for a specified axis of a scatter plot
65-
def compute_hist(x_range, y_range):
66-
filtered_points = filter_points(points, x_range, y_range)
67-
hist = hv.operation.histogram(
68-
filtered_points, dimension=dimension, num_bins=num_bins, normed="height"
69-
)
70-
return hist
71-
72-
return compute_hist
73-
74-
75-
def make_hist(data, title, bins_range, log_y=True, plot_width=800):
76-
### Make histogram from given count data
77-
count, bins = np.histogram(data, bins=bins_range)
78-
ylabel = "log(Count)" if log_y else "Count"
79-
np.seterr(divide="ignore")
80-
if log_y:
81-
count = np.log10(count)
82-
count[count == -np.inf] = 0
83-
histogram = hv.Histogram((count, bins)).opts(
84-
title=title, ylabel=ylabel, tools=["hover"]
85-
)
86-
histogram = histogram.opts(shared_axes=False, width=round(plot_width / 2))
87-
return histogram
88-
89-
90-
def make_hist_matplotlib(data, title, num_bins="auto", log_y=True, xlim=(None, None)):
91-
### Make histogram from given count data using parameters suitable for the matplotlib backend
92-
# TODO return something sensible when data contains one row
93-
if xlim[1] is not None:
94-
data = data[data < xlim[1]]
95-
if xlim[0] is not None:
96-
data = data[data > xlim[0]]
97-
count, bins = np.histogram(data, bins=num_bins)
98-
ylabel = "log(Count)" if log_y else "Count"
99-
np.seterr(divide="ignore")
100-
if log_y:
101-
count = np.log10(count)
102-
count[count == -np.inf] = 0
103-
return hv.Histogram((count, bins)).opts(title=title, ylabel=ylabel)
104-
105-
106-
def make_hist_panel(log_y):
107-
### Make row of histograms for holoviews panel
108-
overall_site_hist = make_hist(
109-
ti.sites_num_mutations,
110-
"Mutations per site",
111-
range(29),
112-
log_y=log_y,
113-
plot_width=plot_width,
114-
)
115-
overall_node_hist = make_hist(
116-
ti.nodes_num_mutations,
117-
"Mutations per node",
118-
range(10),
119-
log_y=log_y,
120-
plot_width=plot_width,
121-
)
122-
return pn.Row(overall_site_hist, overall_node_hist)
123-
124-
125-
def page1():
126-
return pn.pane.HTML(ts)
127-
# hv_layout
128-
129-
130-
def page2():
131-
hv.extension("bokeh")
132-
plot_width = 1000
133-
log_y_checkbox = pn.widgets.Checkbox(
134-
name="Log y-axis of Mutations per site/node plots", value=False
135-
)
136-
137-
points = df_mutations.hvplot.scatter(
138-
x="position",
139-
y="time",
140-
hover_cols=["position", "time", "mutation_node", "node_flag"],
141-
).opts(width=plot_width, height=plot_height)
142-
143-
range_stream = hv.streams.RangeXY(source=points)
144-
streams = [range_stream]
145-
146-
filtered = points.apply(filter_points, streams=streams)
147-
time_hist = hv.DynamicMap(
148-
make_hist_on_axis(dimension="time", points=points, num_bins=10), streams=streams
149-
)
150-
site_hist = hv.DynamicMap(
151-
make_hist_on_axis(dimension="position", points=points, num_bins=10),
152-
streams=streams,
153-
)
154-
hover = filtered.apply(hover_points, threshold=threshold)
155-
shaded = hd.datashade(filtered, width=400, height=400, streams=streams)
156-
157-
main = (shaded * hover).opts(
158-
hv.opts.Points(tools=["hover"], alpha=0.1, hover_alpha=0.2, size=10)
159-
)
160-
161-
hist_panel = pn.bind(make_hist_panel, log_y=log_y_checkbox)
162-
163-
plot_options = pn.Column(
164-
pn.pane.Markdown("## Plot Options"),
165-
log_y_checkbox,
166-
)
167-
168-
return pn.Column(main << time_hist << site_hist, hist_panel, plot_options)
169-
170-
171-
def page3():
172-
hv.extension("bokeh")
173-
df_edges["parent_time_right"] = df_edges["parent_time"]
174-
lines = hv.Segments(
175-
df_edges, kdims=["left", "parent_time", "right", "parent_time_right"]
176-
)
177-
range_stream = hv.streams.RangeXY(source=lines)
178-
streams = [range_stream]
179-
filtered = lines.apply(filter_points, streams=streams)
180-
hover = filtered.apply(hover_points, threshold=threshold)
181-
shaded = hd.datashade(filtered, streams=streams)
182-
hover_tool = bkm.HoverTool(
183-
tooltips=[
184-
("child", "@child"),
185-
("parent", "@parent"),
186-
("span", "@span"),
187-
("branch_length", "@branch_length"),
188-
]
189-
)
190-
main = (shaded * hover).opts(
191-
hv.opts.Segments(
192-
tools=[hover_tool],
193-
width=plot_width,
194-
height=plot_height,
195-
xlabel="Position",
196-
ylabel="Time",
197-
)
198-
)
199-
200-
return pn.Column(main)
201-
202-
203-
def page4():
204-
hv.extension("bokeh")
205-
node_id_input = pn.widgets.TextInput(value="", name="Node ID")
206-
df_edges["parent_time_right"] = df_edges["parent_time"]
207-
tabulator = pn.widgets.Tabulator(show_index=False)
208-
209-
def plot_data(node_id):
210-
if len(node_id) > 0:
211-
filtered_df = df_edges[df_edges["child"] == int(node_id)]
212-
segments = hv.Segments(
213-
filtered_df,
214-
kdims=["left", "parent_time", "right", "parent_time_right"],
215-
vdims=["child", "parent", "span", "branch_length"],
216-
)
217-
hover_tool = bkm.HoverTool(
218-
tooltips=[
219-
("child", "@child"),
220-
("parent", "@parent"),
221-
("span", "@span"),
222-
("branch_length", "@branch_length"),
223-
]
224-
)
225-
segments = segments.opts(
226-
width=plot_width,
227-
height=plot_height,
228-
tools=[hover_tool],
229-
xlabel="Position",
230-
ylabel="Time",
231-
)
232-
233-
filtered_df = filtered_df.drop(columns=["parent_time_right"])
234-
tabulator.value = filtered_df
235-
236-
return segments
237-
else:
238-
return pn.pane.Markdown("Please enter a Node ID.")
239-
240-
dynamic_plot = pn.bind(plot_data, node_id=node_id_input)
241-
242-
return pn.Column(node_id_input, dynamic_plot, tabulator)
243-
244-
245-
def page5():
246-
hv.extension("matplotlib")
247-
bins = min(50, int(np.sqrt(len(df_trees))))
248-
249-
sites_hist_func = functools.partial(
250-
make_hist_matplotlib,
251-
df_trees.num_sites,
252-
"Sites per tree",
253-
num_bins=bins,
254-
log_y=True,
255-
)
256-
257-
log_y_checkbox = pn.widgets.Checkbox(name="log y-axis", value=True)
258-
259-
sites_hist_panel = pn.bind(
260-
sites_hist_func,
261-
log_y=log_y_checkbox,
262-
)
263-
264-
spans = df_trees.right - df_trees.left
265-
266-
spans_hist_func = functools.partial(
267-
make_hist_matplotlib,
268-
spans,
269-
"Genomic span per tree",
270-
num_bins=bins,
271-
log_y=True,
272-
)
273-
274-
spans_hist_panel = pn.bind(
275-
spans_hist_func,
276-
log_y=log_y_checkbox,
277-
)
278-
279-
muts_hist_func = functools.partial(
280-
make_hist_matplotlib,
281-
df_trees.num_mutations,
282-
"Mutations per tree",
283-
num_bins=bins,
284-
log_y=True,
285-
)
286-
287-
muts_hist_panel = pn.bind(
288-
muts_hist_func,
289-
log_y=log_y_checkbox,
290-
)
291-
292-
tbl_hist_func = functools.partial(
293-
make_hist_matplotlib,
294-
df_trees.total_branch_length,
295-
"Total branch length per tree",
296-
num_bins=bins,
297-
log_y=True,
298-
)
299-
300-
tbl_hist_panel = pn.bind(
301-
tbl_hist_func,
302-
log_y=log_y_checkbox,
303-
)
304-
305-
mean_arity_hist_func = functools.partial(
306-
make_hist_matplotlib,
307-
df_trees.mean_internal_arity,
308-
f"Mean arity per tree \n(not yet implemented)",
309-
num_bins=bins,
310-
log_y=True,
311-
)
312-
313-
mean_arity_hist_panel = pn.bind(
314-
mean_arity_hist_func,
315-
log_y=log_y_checkbox,
316-
)
317-
318-
max_arity_hist_func = functools.partial(
319-
make_hist_matplotlib,
320-
df_trees.max_internal_arity,
321-
"Max arity per tree",
322-
num_bins=bins,
323-
log_y=True,
324-
)
325-
326-
max_arity_hist_panel = pn.bind(
327-
max_arity_hist_func,
328-
log_y=log_y_checkbox,
329-
)
330-
331-
plot_options = pn.Column(
332-
pn.pane.Markdown("# Plot Options"),
333-
log_y_checkbox,
334-
)
335-
336-
hist_panel = pn.Column(
337-
pn.Row(
338-
sites_hist_panel,
339-
spans_hist_panel,
340-
muts_hist_panel,
341-
),
342-
pn.Row(tbl_hist_panel, mean_arity_hist_panel, max_arity_hist_panel),
343-
)
344-
345-
return pn.Column(hist_panel, plot_options)
23+
tsm = model.TSModel(tskit.load(path), path.name)
34624

34725

34826
pn.extension(sizing_mode="stretch_width")
34927
pn.extension("tabulator")
35028

35129
pages = {
352-
"Overview": page1,
353-
"Mutations": page2,
354-
"Edges": page3,
355-
"Edge Explorer": page4,
356-
"Trees": page5,
30+
"Overview": pages.overview,
31+
"Mutations": pages.mutations,
32+
"Edges": pages.edges,
33+
"Edge Explorer": pages.edge_explorer,
34+
"Trees": pages.trees,
35735
}
35836

35937

36038
def show(page):
361-
return pages[page]()
39+
return pages[page](tsm)
36240

36341

36442
starting_page = pn.state.session_args.get("page", [b"Overview"])[0].decode()
@@ -380,7 +58,7 @@ def show(page):
38058
"header_background": ACCENT_COLOR,
38159
}
38260
pn.template.FastListTemplate(
383-
title=f"{trees_file}",
61+
title=tsm.name,
38462
sidebar=[page],
38563
main=[ishow],
38664
**DEFAULT_PARAMS,

config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Global plot settings
2+
PLOT_WIDTH = 1000
3+
PLOT_HEIGHT = 600
4+
THRESHOLD = 1000 # max number of points to overlay on a plot

0 commit comments

Comments
 (0)