Skip to content

Commit 14d1422

Browse files
authored
Merge pull request #120 from J35P312/master
Tiddit 3.9.0
2 parents 9bb201f + 5106380 commit 14d1422

13 files changed

+502
-174
lines changed

CMakeLists.txt

Lines changed: 0 additions & 37 deletions
This file was deleted.

Dockerfile

Lines changed: 9 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,27 @@
1-
FROM python:3.8-slim
2-
3-
RUN apt-get update && \
4-
apt-get upgrade -y && \
5-
apt-get install -y \
6-
autoconf \
7-
automake \
8-
build-essential \
9-
cmake \
10-
libbz2-dev \
11-
libcurl4-gnutls-dev \
12-
liblzma-dev \
13-
libncurses5-dev \
14-
libssl-dev \
15-
make \
16-
unzip \
17-
wget \
18-
zlib1g-dev && \
19-
apt-get clean && \
20-
apt-get purge && \
21-
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
1+
FROM condaforge/mambaforge:24.9.2-0
222

233
WORKDIR /app
244

25-
## Install samtools for cram processing
26-
RUN wget --no-verbose https://github.com/samtools/samtools/releases/download/1.10/samtools-1.10.tar.bz2 && \
27-
bunzip2 samtools-1.10.tar.bz2 && \
28-
tar -xf samtools-1.10.tar && \
29-
cd samtools-1.10 && \
30-
./configure && \
31-
make all all-htslib && \
32-
make install install-htslib && \
33-
rm /app/samtools-1.10.tar
34-
355
## Set TIDDIT version
36-
ARG TIDDIT_VERSION=2.12.1
6+
ARG TIDDIT_VERSION=3.9.0
377

388
## Add some info
399
LABEL base_image="python:3.8-slim"
4010
LABEL software="TIDDIT.py"
4111
LABEL software.version=${TIDDIT_VERSION}
4212

4313
## Download and extract
14+
RUN conda install conda-forge::unzip
15+
RUN conda install -c conda-forge pip gcc joblib
16+
RUN conda install -c bioconda numpy cython pysam bwa
17+
4418
RUN wget https://github.com/SciLifeLab/TIDDIT/archive/TIDDIT-${TIDDIT_VERSION}.zip && \
4519
unzip TIDDIT-${TIDDIT_VERSION}.zip && \
4620
rm TIDDIT-${TIDDIT_VERSION}.zip
4721

4822
## Install
49-
RUN cd TIDDIT-TIDDIT-${TIDDIT_VERSION} && \
50-
./INSTALL.sh && \
51-
chmod +x /app/TIDDIT-TIDDIT-${TIDDIT_VERSION}/TIDDIT.py && \
52-
ln -s /app/TIDDIT-TIDDIT-${TIDDIT_VERSION}/TIDDIT.py /usr/local/bin
23+
RUN cd TIDDIT-TIDDIT-${TIDDIT_VERSION} && \
24+
pip install -e .
5325

54-
ENTRYPOINT ["TIDDIT.py"]
26+
ENTRYPOINT ["tiddit"]
5527
CMD ["--help"]

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
DESCRIPTION
22
==============
33
TIDDIT: Is a tool to used to identify chromosomal rearrangements using Mate Pair or Paired End sequencing data. TIDDIT identifies intra and inter-chromosomal translocations, deletions, tandem-duplications and inversions, using supplementary alignments as well as discordant pairs.
4-
TIDDIT searches for discordant reads and splti reads (supplementary alignments). The supplementary alignments are assembled and aligned using a fermikit-like workflow.
4+
TIDDIT searches for discordant reads and split reads (supplementary alignments). TIDDIT also performs local assembly using a custom local de novo assembler.
55
Next all signals (contigs, split-reads, and discordant pairs) are clustered using DBSCAN. The resulting clusters are filtered and annotated, and reported as SV depending on the statistics.
66
TIDDIT has two analysis modules. The sv mode, which is used to search for structural variants. And the cov mode that analyse the read depth of a bam file and generates a coverage report.
77
On a 30X human genome, the TIDDIT SV module typically completetes within 5 hours, and requires less than 10Gb ram.
@@ -27,11 +27,11 @@ cd tiddit
2727
pip install -e .
2828
```
2929

30-
Next install fermi2, ropebwt2, and bwa, I recommend using conda:
30+
Next install bwa, I recommend using conda:
3131

32-
conda install fermi2 ropebwt2 bwa
32+
conda install bwa
3333

34-
You may also compile bwa, fermi2, and ropebwt2 yourself. Remember to add executables to path, or provide path through the command line parameters.
34+
You may also compile bwa yourself. Remember to add executables to path, or provide path through the command line parameters.
3535
```
3636
tiddit --help
3737
tiddit --sv --help

Singularity

Lines changed: 0 additions & 36 deletions
This file was deleted.

setup.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,16 @@
1818
"tiddit/tiddit_cluster.pyx",
1919
"tiddit/tiddit_coverage_analysis.pyx",
2020
"tiddit/tiddit_gc.pyx",
21+
"tiddit/silverfish.pyx",
22+
"tiddit/graphlib.pyx",
2123
"tiddit/tiddit_variant.pyx",
2224
"tiddit/tiddit_contig_analysis.pyx"])
2325
else:
2426
ext_modules = []
2527

2628
setup(
2729
name = 'tiddit',
28-
version = '3.8.0',
30+
version = '3.9.0',
2931

3032

3133
url = "https://github.com/SciLifeLab/TIDDIT",

tiddit/DBSCAN.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def x_coordinate_clustering(data,epsilon,m):
4141

4242
distances=[]
4343
current=data[i,:]
44-
points=data[i+1:i+m,:]
44+
points=data[i+1:i+m+1,:]
4545

4646
#print points
4747
distances=[]

tiddit/__main__.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import tiddit.tiddit_gc as tiddit_gc
1919

2020
def main():
21-
version="3.8.0"
21+
version="3.9.0"
2222
parser = argparse.ArgumentParser("""tiddit-{}""".format(version),add_help=False)
2323
parser.add_argument("--sv" , help="call structural variation", required=False, action="store_true")
2424
parser.add_argument("--cov" , help="generate a coverage bed file", required=False, action="store_true")
@@ -27,9 +27,13 @@ def main():
2727
if args.sv == True:
2828

2929
parser = argparse.ArgumentParser("""tiddit --sv --bam inputfile [-o prefix] --ref ref.fasta""")
30+
#required parameters
3031
parser.add_argument('--sv' , help="call structural variation", required=False, action="store_true")
3132
parser.add_argument('--force_overwrite' , help="force the analysis and overwrite any data in the output folder", required=False, action="store_true")
3233
parser.add_argument('--bam', type=str,required=True, help="coordinate sorted bam file(required)")
34+
parser.add_argument('--ref', type=str, help="reference fasta",required=True)
35+
36+
#related to sv calling
3337
parser.add_argument('-o', type=str,default="output", help="output prefix(default=output)")
3438
parser.add_argument('-i', type=int, help="paired reads maximum allowed insert size. Pairs aligning on the same chr at a distance higher than this are considered candidates for SV (default= 99.9th percentile of insert size)")
3539
parser.add_argument('-d', type=str,help="expected reads orientations, possible values \"innie\" (-> <-) or \"outtie\" (<- ->). Default: major orientation within the dataset")
@@ -42,19 +46,28 @@ def main():
4246
parser.add_argument('-c', type=float, help="average coverage, overwrites the estimated average coverage (useful for exome or panel data)")
4347
parser.add_argument('-l', type=int,default=3, help="min-pts parameter (default=3),must be set >= 2")
4448
parser.add_argument('-s', type=int,default=25000000, help="Number of reads to sample when computing library statistics(default=25000000)")
45-
parser.add_argument('-z', type=int,default=50, help="minimum variant size (default=50), variants smaller than this will not be printed ( z < 10 is not recomended)")
4649
parser.add_argument('--force_ploidy',action="store_true", help="force the ploidy to be set to -n across the entire genome (i.e skip coverage normalisation of chromosomes)")
4750
parser.add_argument('--n_mask',type=float,default=0.5, help="exclude regions from coverage calculation if they contain more than this fraction of N (default = 0.5)")
48-
parser.add_argument('--ref', type=str, help="reference fasta",required=True)
49-
parser.add_argument('--bwa', type=str,default="bwa", help="path to bwa executable file(default=bwa)")
50-
parser.add_argument('--fermi2', type=str,default="fermi2", help="path to fermi2 executable file (default=fermi2)")
51-
parser.add_argument('--ropebwt2', type=str , default="ropebwt2", help="path to ropebwt2 executable file (default=ropebwt2)")
52-
parser.add_argument('--skip_assembly', action="store_true", help="Skip running local assembly, tiddit will perform worse, but wont require fermi2, bwa, ropebwt and bwa indexed ref")
53-
#parser.add_argument('--skip_index', action="store_true", help="Do not generate the csi index")
51+
52+
#stuff related to filtering
5453
parser.add_argument('--p_ratio', type=float,default=0.1, help="minimum discordant pair/normal pair ratio at the breakpoint junction(default=0.1)")
5554
parser.add_argument('--r_ratio', type=float,default=0.1, help="minimum split read/coverage ratio at the breakpoint junction(default=0.1)")
5655
parser.add_argument('--max_coverage', type=float,default=4, help="filter call if X times higher than chromosome average coverage (default=4)")
5756
parser.add_argument('--min_contig', type=int,default=10000, help="Skip calling on small contigs (default < 10000 bp)")
57+
parser.add_argument('-z', type=int,default=50, help="minimum variant size (default=50), variants smaller than this will not be printed ( z < 10 is not recomended)")
58+
59+
#assembly related stuff
60+
parser.add_argument('--skip_assembly', action="store_true", help="Skip running local assembly, tiddit will perform worse, but wont require bwa and bwa indexed ref, and will complete quicker")
61+
parser.add_argument('--bwa', type=str,default="bwa", help="path to bwa executable file(default=bwa)")
62+
parser.add_argument('--min_clip', type=int,default=4, help="Minimum clip reads to initiate local assembly of a region(default=4)")
63+
parser.add_argument('--padding', type=int,default=4, help="Extend the local assembly by this number of bases (default=200bp)")
64+
parser.add_argument('--min_pts_clips', type=int,default=3, help="min-pts parameter for the clustering of candidates for local assembly (default=3)")
65+
parser.add_argument('--max_assembly_reads', type=int,default=100000, help="Skip assembly of regions containing too many reads (default=10000 reads)")
66+
parser.add_argument('--max_local_assembly_region', type=int,default=2000, help="maximum size of the clip read cluster for being considered a local assembly candidate (default=2000 bp)")
67+
parser.add_argument('--min_anchor_len', type=int,default=60, help="minimum mapped bases to be considered a clip read (default=60 bp)")
68+
parser.add_argument('--min_clip_len', type=int,default=25, help="minimum clipped bases to be considered a clip read (default=25 bp)")
69+
parser.add_argument('--min_contig_len', type=int,default=200, help="minimum contig length for SV analysis (default=200 bp)")
70+
parser.add_argument('-k', type=int,default=91, help="kmer lenght used by the local assembler (default=91 bp)")
5871
args= parser.parse_args()
5972

6073
if args.l < 2:
@@ -63,15 +76,7 @@ def main():
6376

6477
if not args.skip_assembly:
6578
if not os.path.isfile(args.bwa) and not shutil.which(args.bwa):
66-
print("error, BWA executable missing, add BWA to path, or specify using --bwa")
67-
quit()
68-
69-
if not os.path.isfile(args.fermi2) and not shutil.which(args.fermi2):
70-
print("error, fermi2 executable missing, add fermi2 to path, or specify using --fermi2")
71-
quit()
72-
73-
if not os.path.isfile(args.ropebwt2) and not shutil.which(args.ropebwt2):
74-
print("error, ropebwt2 executable missing, add ropebwt2 to path, or specify using --ropebwt2")
79+
print("error, BWA executable missing, add BWA to path, or specify using --bwa, or skip local assembly (--skip_assembly)")
7580
quit()
7681

7782
if not glob.glob("{}*.bwt*".format(args.ref)):
@@ -150,7 +155,7 @@ def main():
150155

151156

152157
t=time.time()
153-
coverage_data=tiddit_signal.main(bam_file_name,args.ref,prefix,min_mapq,max_ins_len,sample_id,args.threads,args.min_contig,False)
158+
coverage_data=tiddit_signal.main(bam_file_name,args.ref,prefix,min_mapq,max_ins_len,sample_id,args.threads,args.min_contig,False,args.min_anchor_len,args.min_clip_len)
154159
print("extracted signals in:")
155160
print(t-time.time())
156161

tiddit/graphlib.pyx

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
class graph:
2+
3+
def __init__(self):
4+
self.predecessors={}
5+
self.sucessors={}
6+
self.kmers={}
7+
self.vertices={}
8+
self.vertice_set=set([])
9+
self.in_branch_points=set([])
10+
self.out_branch_points=set([])
11+
self.starting_points=set([])
12+
self.end_points=set([])
13+
14+
#add node to the graph
15+
def add_kmer(self,kmer,read):
16+
17+
if not kmer in self.kmers:
18+
self.kmers[kmer]=set([])
19+
self.predecessors[kmer]=set([])
20+
self.sucessors[kmer]=set([])
21+
self.starting_points.add(kmer)
22+
self.end_points.add(kmer)
23+
24+
self.kmers[kmer].add(read)
25+
26+
#add vertices between nodes
27+
def add_vertice(self,kmer1,kmer2,read):
28+
29+
self.add_kmer(kmer1,read)
30+
self.add_kmer(kmer2,read)
31+
32+
if not kmer1 in self.vertices:
33+
self.vertices[kmer1]={}
34+
35+
if kmer1 in self.end_points:
36+
self.end_points.remove(kmer1)
37+
38+
if not kmer2 in self.vertices[kmer1]:
39+
self.vertices[kmer1][kmer2]=set([])
40+
41+
if kmer2 in self.starting_points:
42+
self.starting_points.remove(kmer2)
43+
44+
self.vertices[kmer1][kmer2].add(read)
45+
46+
self.vertice_set.add((kmer1,kmer2))
47+
48+
self.sucessors[kmer1].add(kmer2)
49+
if len(self.sucessors[kmer1]) > 1:
50+
self.in_branch_points.add(kmer1)
51+
52+
self.predecessors[kmer2].add(kmer1)
53+
if len(self.predecessors[kmer2]) > 1:
54+
self.out_branch_points.add(kmer2)
55+
56+
def delete_vertice(self,kmer1,kmer2):
57+
58+
if kmer1 in self.vertices:
59+
if kmer2 in self.vertices[kmer1]:
60+
del self.vertices[kmer1][kmer2]
61+
62+
if len(self.vertices[kmer1]) == 0:
63+
del self.vertices[kmer1]
64+
65+
if kmer1 in self.in_branch_points:
66+
if len(self.sucessors[kmer1]) < 3:
67+
self.in_branch_points.remove(kmer1)
68+
69+
if kmer1 in self.sucessors:
70+
if kmer2 in self.sucessors[kmer1]:
71+
self.sucessors[kmer1].remove(kmer2)
72+
73+
if not self.sucessors[kmer1]:
74+
self.end_points.add(kmer1)
75+
76+
if kmer2 in self.out_branch_points:
77+
if len(self.predecessors[kmer2]) < 3:
78+
self.out_branch_points.remove(kmer2)
79+
80+
if kmer1 in self.predecessors[kmer2]:
81+
self.predecessors[kmer2].remove(kmer1)
82+
83+
if not len(self.predecessors[kmer2]):
84+
self.starting_points.add(kmer2)
85+
86+
if (kmer1,kmer2) in self.vertice_set:
87+
self.vertice_set.remove((kmer1,kmer2))
88+
89+
def delete_kmer(self,kmer):
90+
if kmer in self.kmers:
91+
del self.kmers[kmer]
92+
93+
sucessors=set([])
94+
for sucessor in self.sucessors[kmer]:
95+
sucessors.add(sucessor)
96+
97+
predecessors=set([])
98+
for predecessor in self.predecessors[kmer]:
99+
predecessors.add(predecessor)
100+
101+
for predecessor in predecessors:
102+
self.delete_vertice(predecessor,kmer)
103+
104+
for sucessor in sucessors:
105+
self.delete_vertice(kmer,sucessor)

0 commit comments

Comments
 (0)