Skip to content

Commit 71f8ffd

Browse files
authored
Merge pull request #90 from J35P312/master
TIDDIT 3.3.0
2 parents 71f34d1 + bf3324e commit 71f8ffd

File tree

8 files changed

+455
-390
lines changed

8 files changed

+455
-390
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ INSTALLATION
1010
==============
1111
TIDDIT requires python3 (=> 3.8), cython, pysam, and Numpy.
1212

13+
1314
By default, tiddit will require, bwa, fermi2 and ropebwt2 for local assembly; local assembly may be disabled through the "--skip_assembly" parameter.
1415

1516
Installation
@@ -74,6 +75,7 @@ TIDDIT may be fine-tuned by altering these optional parameters:
7475
-d expected reads orientations, possible values "innie" (-> <-) or "outtie" (<- ->). Default: major orientation within the dataset
7576
-p Minimum number of supporting pairs in order to call a variant (default 3)
7677
-r Minimum number of supporting split reads to call a variant (default 3)
78+
--threads Number of threads (default 1)
7779
-q Minimum mapping quality to consider an alignment (default 5)
7880
-n the ploidy of the organism,(default = 2)
7981
-e clustering distance parameter, discordant pairs closer than this distance are considered to belong to the same variant(default = sqrt(insert-size*2)*12)

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020

2121
setup(
2222
name = 'tiddit',
23-
version = '3.2.1',
23+
version = '3.3.0',
24+
2425
url = "https://github.com/SciLifeLab/TIDDIT",
2526
author = "Jesper Eisfeldt",
2627
author_email= "[email protected]",

tiddit/__main__.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import tiddit.tiddit_contig_analysis as tiddit_contig_analysis
1818

1919
def main():
20-
version="3.2.1"
20+
version="3.3.0"
2121
parser = argparse.ArgumentParser("""tiddit-{}""".format(version),add_help=False)
2222
parser.add_argument("--sv" , help="call structural variation", required=False, action="store_true")
2323
parser.add_argument("--cov" , help="generate a coverage bed file", required=False, action="store_true")
@@ -32,6 +32,7 @@ def main():
3232
parser.add_argument('-i', type=int, help="paired reads maximum allowed insert size. Pairs aligning on the same chr at a distance higher than this are considered candidates for SV (default= 99.9th percentile of insert size)")
3333
parser.add_argument('-d', type=str,help="expected reads orientations, possible values \"innie\" (-> <-) or \"outtie\" (<- ->). Default: major orientation within the dataset")
3434
parser.add_argument('-p', type=int,default=3, help="Minimum number of supporting pairs in order to call a variant (default 3)")
35+
parser.add_argument('--threads', type=int,default=1, help="Number of threads (default=1)")
3536
parser.add_argument('-r', type=int,default=3, help="Minimum number of supporting split reads to call a variant (default 3)")
3637
parser.add_argument('-q', type=int,default=5, help="Minimum mapping quality to consider an alignment (default 5)")
3738
parser.add_argument('-n', type=int,default=2, help="the ploidy of the organism,(default = 2)")
@@ -90,6 +91,7 @@ def main():
9091

9192
bam_file_name=args.bam
9293
samfile = pysam.AlignmentFile(bam_file_name, "r",reference_filename=args.ref)
94+
9395
bam_header=samfile.header
9496
samfile.close()
9597

@@ -110,14 +112,16 @@ def main():
110112
contigs.append(contig["SN"])
111113
contig_number[contig["SN"]]=i
112114
contig_length[ contig["SN"] ]=contig["LN"]
113-
i+=0
115+
i+=1
114116

115117
prefix=args.o
116118
try:
117119
os.mkdir( "{}_tiddit".format(prefix) )
118120
os.mkdir("{}_tiddit/clips".format(prefix) )
119121
except:
120122
print("Folder already exists")
123+
124+
pysam.index("-c","-m","6","-@",str(args.threads),bam_file_name,"{}_tiddit/{}.csi".format(args.o,sample_id))
121125

122126
min_mapq=args.q
123127
max_ins_len=100000
@@ -131,7 +135,7 @@ def main():
131135

132136

133137
t=time.time()
134-
coverage_data=tiddit_signal.main(bam_file_name,args.ref,prefix,min_mapq,max_ins_len,sample_id)
138+
coverage_data=tiddit_signal.main(bam_file_name,args.ref,prefix,min_mapq,max_ins_len,sample_id,args.threads,args.min_contig)
135139
print("extracted signals in:")
136140
print(t-time.time())
137141

@@ -163,7 +167,6 @@ def main():
163167
f.write(vcf_header+"\n")
164168

165169
t=time.time()
166-
#print(sv_clusters)
167170
variants=tiddit_variant.main(bam_file_name,sv_clusters,args,library,min_mapq,samples,coverage_data,contig_number,max_ins_len)
168171
print("analyzed clusters in")
169172
print(time.time()-t)
@@ -203,7 +206,12 @@ def main():
203206
t=time.time()
204207
if read.mapq >= args.q:
205208
n_reads+=1
206-
coverage_data[read.reference_name]=tiddit_coverage.update_coverage(read,args.z,coverage_data[read.reference_name],args.q,end_bin_size[read.reference_name])
209+
210+
read_position=read.reference_start
211+
read_end=read.reference_end
212+
read_reference_name=read.reference_name
213+
214+
coverage_data[read_reference_name]=tiddit_coverage.update_coverage(read_position,read_end,args.z,coverage_data[read_reference_name],end_bin_size[read_reference_name])
207215

208216
if args.w:
209217
tiddit_coverage.print_coverage(coverage_data,bam_header,args.z,"wig",args.o +".wig")

tiddit/tiddit_contig_analysis.pyx

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,14 @@ def read_contigs(aligned_contigs,prefix,sample_id,min_size):
2828
continue
2929

3030
if read.has_tag("SA") and not (read.is_supplementary or read.is_secondary):
31-
split_contigs=tiddit_signal.SA_analysis(read,-2,split_contigs,"SA")
31+
split=tiddit_signal.SA_analysis(read,-2,"SA",read.reference_name)
32+
33+
if split:
34+
if not split[2] in split_contigs[split[0]][split[1]]:
35+
split_contigs[split[0]][split[1]][split[2]]=[]
36+
split_contigs[split[0]][split[1]][split[2]]+=split[3:]
37+
38+
3239
elif read.has_tag("XA") and not (read.is_supplementary or read.is_secondary):
3340
XA=read.get_tag("XA")
3441
if XA.count(";") == 1:
@@ -44,7 +51,12 @@ def read_contigs(aligned_contigs,prefix,sample_id,min_size):
4451
XA=",".join(xa_list)
4552

4653
read.set_tag("XA",XA)
47-
split_contigs=tiddit_signal.SA_analysis(read,-2,split_contigs,"XA")
54+
split=tiddit_signal.SA_analysis(read,-2,"XA",read.reference_name)
55+
56+
if split:
57+
if not split[2] in split_contigs[split[0]][split[1]]:
58+
split_contigs[split[0]][split[1]][split[2]]=[]
59+
split_contigs[split[0]][split[1]][split[2]]+=split[3:]
4860

4961
elif not (read.is_supplementary or read.is_secondary) and len(read.cigartuples) > 2:
5062

@@ -114,9 +126,9 @@ def main(prefix,sample_id,library,contigs,coverage_data,args):
114126
f.close()
115127
del clips
116128

117-
os.system("{} -dNCr {}_tiddit/clips.fa | {} assemble -l 81 - > {}_tiddit/clips.fa.assembly.mag".format(args.ropebwt2,prefix,args.fermi2,prefix))
129+
os.system("{} -dNCr {}_tiddit/clips.fa | {} assemble -t {} -l 81 - > {}_tiddit/clips.fa.assembly.mag".format(args.ropebwt2,prefix,args.fermi2,args.threads,prefix))
118130
os.system("{} simplify -COS -d 0.8 {}_tiddit/clips.fa.assembly.mag 1> {}_tiddit/clips.fa.assembly.clean.mag 2> /dev/null".format(args.fermi2,prefix,prefix))
119-
os.system("{} mem -x intractg {} {}_tiddit/clips.fa.assembly.clean.mag 1> {}_tiddit/clips.sam 2> /dev/null".format(args.bwa,args.ref,prefix,prefix))
131+
os.system("{} mem -t {} -x intractg {} {}_tiddit/clips.fa.assembly.clean.mag 1> {}_tiddit/clips.sam 2> /dev/null".format(args.bwa,args.threads,args.ref,prefix,prefix))
120132

121133
read_contigs("{}_tiddit/clips.sam".format(prefix) , prefix, sample_id, args.z)
122134

tiddit/tiddit_coverage.pyx

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
11
import sys
2+
import time
23
cimport numpy
34
import numpy
45
import math
56
cimport cython
67
@cython.boundscheck(False)
78
@cython.wraparound(False)
89

9-
def create_coverage(bam_header,bin_size):
10+
def create_coverage(bam_header,bin_size,c="all"):
1011
coverage_data={}
1112
end_bin_size={}
1213

1314
for contig in bam_header["SQ"]:
14-
bins= int(math.ceil(contig["LN"]/float(bin_size)))
15-
coverage_data[ contig["SN"] ]=numpy.zeros(bins)
16-
end_bin_size[contig["SN"]]=contig["LN"]-(bins-1)*bin_size
17-
15+
if c == "all" or contig["SN"] == c:
16+
bins= int(math.ceil(contig["LN"]/float(bin_size)))
17+
coverage_data[ contig["SN"] ]=numpy.zeros(bins)
18+
end_bin_size[contig["SN"]]=contig["LN"]-(bins-1)*bin_size
19+
if c != "all":
20+
return(coverage_data[ contig["SN"] ],end_bin_size[contig["SN"]])
1821
return(coverage_data,end_bin_size)
19-
2022
def print_coverage(coverage_data,bam_header,bin_size,file_type,outfile):
2123
f=open(outfile,"w",buffering=819200)
2224

@@ -43,37 +45,34 @@ def print_coverage(coverage_data,bam_header,bin_size,file_type,outfile):
4345
f.close()
4446

4547
ctypedef numpy.double_t DTYPE_t
46-
def update_coverage(read,int bin_size,numpy.ndarray[DTYPE_t, ndim=1] coverage_data,int min_q,int end_bin_size):
47-
48-
cdef long ref_start=read.reference_start
49-
cdef long ref_end=read.reference_end
48+
def update_coverage(long ref_start,long ref_end,int bin_size,numpy.ndarray[DTYPE_t, ndim=1] coverage_data,int end_bin_size):
5049

5150
cdef int first_bin=ref_start//bin_size
52-
cdef int end_bin=int(ref_end-1)//bin_size
51+
cdef int end_bin=(ref_end-1)//bin_size
5352

54-
cdef int bases_first_bin
53+
cdef float bases_first_bin
5554

5655
if end_bin == first_bin:
5756
bases_first_bin=ref_end-ref_start
58-
coverage_data[first_bin]=float(bases_first_bin)/bin_size+coverage_data[first_bin]
57+
coverage_data[first_bin]=bases_first_bin/bin_size+coverage_data[first_bin]
5958

6059
return(coverage_data)
6160

6261
bases_first_bin=((first_bin+1)*bin_size)-ref_start
63-
coverage_data[first_bin]=float(bases_first_bin)/bin_size+coverage_data[first_bin]
64-
cdef int bases_last_bin=(ref_end-1)-end_bin*bin_size
62+
coverage_data[first_bin]=bases_first_bin/bin_size+coverage_data[first_bin]
63+
cdef float bases_last_bin=(ref_end-1)-end_bin*bin_size
64+
6565

6666
if end_bin < len(coverage_data)-1:
67-
coverage_data[end_bin]+=float(bases_last_bin)/bin_size
67+
coverage_data[end_bin]=bases_last_bin/bin_size+coverage_data[end_bin]
6868
else:
69-
coverage_data[end_bin]+=float(bases_last_bin)/end_bin_size
69+
coverage_data[end_bin]=bases_last_bin/end_bin_size+coverage_data[end_bin]
7070

7171
for i in range(first_bin+1,end_bin):
72-
coverage_data[i]+=1.0
72+
coverage_data[i]=1.0+coverage_data[i]
7373

7474
return(coverage_data)
7575

76-
7776
#bam_file_name=sys.argv[1]
7877

7978
#samfile = pysam.AlignmentFile(bam_file_name, "r")

tiddit/tiddit_coverage_analysis.pyx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ def determine_ploidy(dict coverage_data,contigs,dict library,int ploidy,str pref
7575
library["avg_coverage"]=c
7676

7777
for chromosome in contigs:
78+
if not chromosome in coverage_data:
79+
continue
80+
7881
avg_coverage_contig=library[ "avg_coverage_{}".format(chromosome) ]
7982
library["contig_ploidy_{}".format(chromosome)]=int(round(ploidy*avg_coverage_contig/library["avg_coverage"]))
8083
f.write("{}\t{}\t{}\t{}\n".format(chromosome,avg_coverage_contig/library["avg_coverage"]*ploidy,library["contig_ploidy_{}".format(chromosome)],avg_coverage_contig))

0 commit comments

Comments
 (0)