tskit-dev · hyanwong · May 30, 2025 · jeromekelleher · Jun 2, 2025
diff --git a/docs/Makefile b/docs/Makefile
@@ -19,3 +19,5 @@ dist:
 clean:
 	rm -fR $(BUILDDIR)
 	rm -rf _static/example_data.vcz/ancestral_state
+	rm -rf notebook-simulation*
+
diff --git a/docs/usage.md b/docs/usage.md
@@ -320,10 +320,9 @@ import sys
 import os
 import subprocess
 
-from Bio import bgzf
-import numpy as np
-
+import bio2zarr.tskit as ts2z
 import msprime
+import numpy as np
 import tsinfer
 
 if getattr(builtins, "__IPYTHON__", False):  # if running IPython: e.g. in a notebook
@@ -343,27 +342,16 @@ ts = msprime.sim_ancestry(
     random_seed=6,
 )
 ts = msprime.sim_mutations(ts, rate=1e-8, random_seed=7)
-ts.dump(name + "-source.trees")
+ts.dump(f"{name}-source.trees")
 print(
     f"Simulated {ts.num_samples} samples over {seq_len/1e6} Mb:",
     f"{ts.num_trees} trees and {ts.num_sites} sites"
 )
 
-# Convert to a zarr file: this should be easier once a tskit2zarr utility is made, see
-# https://github.com/sgkit-dev/bio2zarr/issues/232
-np.save(f"{name}-AA.npy", [s.ancestral_state for s in ts.sites()])
-vcf_name = f"{name}.vcf.gz"
-with bgzf.open(vcf_name, "wt") as f:
-    ts.write_vcf(f)
-subprocess.run(["tabix", vcf_name])
-ret = subprocess.run(
-    [python, "-m", "bio2zarr", "vcf2zarr", "convert", "--force", vcf_name, f"{name}.vcz"],
-    stderr = subprocess.DEVNULL if name == "notebook-simulation" else None,
-)
+ts2z.convert(f"{name}-source.trees", f"{name}.vcz")
 assert os.path.exists(f"{name}.vcz")
 
-if ret.returncode == 0:
-    print(f"Converted to {name}.vcz")
+print(f"Converted to {name}.vcz")
 ```
 
 Here we first run a simulation then we create a vcf file and convert it to .vcz format.
@@ -431,8 +419,8 @@ Once we have our `.vcz` file created, running the inference is straightforward.
 
 ```{code-cell} ipython3
 # Infer & save a ts from the notebook simulation.
-ancestral_states = np.load(f"{name}-AA.npy")
-vdata = tsinfer.VariantData(f"{name}.vcz", ancestral_states)
+vcf_zarr = zarr.load(f"{name}.vcz")
+vdata = tsinfer.VariantData(f"{name}.vcz", ancestral_state=vcf_zarr["variant_allele"][:, 0])
 tsinfer.infer(vdata, progress_monitor=True, num_threads=4).dump(name + ".trees")
 ```
 

diff --git a/requirements/CI-docs/requirements.txt b/requirements/CI-docs/requirements.txt
@@ -5,10 +5,11 @@ humanize==4.12.1
 lmdb==1.6.2
 tqdm==4.67.1
 daiquiri==3.3.0
+tskit==0.6.4
 msprime==1.3.3
 sgkit[vcf]==0.9.0
 ipywidgets==8.1.5
 Bio==1.7.1
-bio2zarr==0.1.4
+bio2zarr==0.1.6
 sphinx-book-theme #Unpinned to allow easy updating.
-pyfaidx==0.8.1.3
+pyfaidx==0.8.1.3
diff --git a/requirements/CI-tests-complete/requirements.txt b/requirements/CI-tests-complete/requirements.txt
@@ -10,6 +10,6 @@ pytest==8.3.5
 pytest-cov==6.0.0
 seaborn==0.13.2
 sgkit[vcf]==0.9.0
-tskit==0.6.0
+tskit==0.6.4
 tqdm==4.67.1
 twine==6.1.0
diff --git a/requirements/CI-tests-conda/requirements.txt b/requirements/CI-tests-conda/requirements.txt
@@ -7,4 +7,4 @@ daiquiri==3.0.0 # Pinned as conda package not updating
 matplotlib==3.9.4
 seaborn==0.13.2
 colorama==0.4.6
-tskit==0.6.0
+tskit==0.6.4
diff --git a/requirements/development.txt b/requirements/development.txt
@@ -7,7 +7,8 @@ tqdm
 humanize
 daiquiri
 msprime >= 1.0.0
-tskit >= 0.5.3
+tskit >= 0.6.4
+bio2zarr >= 0.1.6
 lmdb
 pre-commit
 pytest