Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ dist:
clean:
rm -fR $(BUILDDIR)
rm -rf _static/example_data.vcz/ancestral_state
rm -rf notebook-simulation*

26 changes: 7 additions & 19 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -320,10 +320,9 @@ import sys
import os
import subprocess

from Bio import bgzf
import numpy as np

import bio2zarr.tskit as ts2z
import msprime
import numpy as np
import tsinfer

if getattr(builtins, "__IPYTHON__", False): # if running IPython: e.g. in a notebook
Expand All @@ -343,27 +342,16 @@ ts = msprime.sim_ancestry(
random_seed=6,
)
ts = msprime.sim_mutations(ts, rate=1e-8, random_seed=7)
ts.dump(name + "-source.trees")
ts.dump(f"{name}-source.trees")
print(
f"Simulated {ts.num_samples} samples over {seq_len/1e6} Mb:",
f"{ts.num_trees} trees and {ts.num_sites} sites"
)

# Convert to a zarr file: this should be easier once a tskit2zarr utility is made, see
# https://github.com/sgkit-dev/bio2zarr/issues/232
np.save(f"{name}-AA.npy", [s.ancestral_state for s in ts.sites()])
vcf_name = f"{name}.vcf.gz"
with bgzf.open(vcf_name, "wt") as f:
ts.write_vcf(f)
subprocess.run(["tabix", vcf_name])
ret = subprocess.run(
[python, "-m", "bio2zarr", "vcf2zarr", "convert", "--force", vcf_name, f"{name}.vcz"],
stderr = subprocess.DEVNULL if name == "notebook-simulation" else None,
)
ts2z.convert(f"{name}-source.trees", f"{name}.vcz")
assert os.path.exists(f"{name}.vcz")

if ret.returncode == 0:
print(f"Converted to {name}.vcz")
print(f"Converted to {name}.vcz")
```

Here we first run a simulation then we create a vcf file and convert it to .vcz format.
Expand Down Expand Up @@ -431,8 +419,8 @@ Once we have our `.vcz` file created, running the inference is straightforward.

```{code-cell} ipython3
# Infer & save a ts from the notebook simulation.
ancestral_states = np.load(f"{name}-AA.npy")
vdata = tsinfer.VariantData(f"{name}.vcz", ancestral_states)
vcf_zarr = zarr.load(f"{name}.vcz")
vdata = tsinfer.VariantData(f"{name}.vcz", ancestral_state=vcf_zarr["variant_allele"][:, 0])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see, this is ugly. sgkit-dev/bio2zarr#387

tsinfer.infer(vdata, progress_monitor=True, num_threads=4).dump(name + ".trees")
```

Expand Down
5 changes: 3 additions & 2 deletions requirements/CI-docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ humanize==4.12.1
lmdb==1.6.2
tqdm==4.67.1
daiquiri==3.3.0
tskit==0.6.4
msprime==1.3.3
sgkit[vcf]==0.9.0
ipywidgets==8.1.5
Bio==1.7.1
bio2zarr==0.1.4
bio2zarr==0.1.6
sphinx-book-theme #Unpinned to allow easy updating.
pyfaidx==0.8.1.3
pyfaidx==0.8.1.3
2 changes: 1 addition & 1 deletion requirements/CI-tests-complete/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ pytest==8.3.5
pytest-cov==6.0.0
seaborn==0.13.2
sgkit[vcf]==0.9.0
tskit==0.6.0
tskit==0.6.4
tqdm==4.67.1
twine==6.1.0
2 changes: 1 addition & 1 deletion requirements/CI-tests-conda/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ daiquiri==3.0.0 # Pinned as conda package not updating
matplotlib==3.9.4
seaborn==0.13.2
colorama==0.4.6
tskit==0.6.0
tskit==0.6.4
3 changes: 2 additions & 1 deletion requirements/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ tqdm
humanize
daiquiri
msprime >= 1.0.0
tskit >= 0.5.3
tskit >= 0.6.4
bio2zarr >= 0.1.6
lmdb
pre-commit
pytest
Expand Down
Loading