Skip to content

Commit 18b2ad0

Browse files
authored
Merge pull request #45 from J35P312/master
version 2.2.1
2 parents 9cf4d42 + 7746196 commit 18b2ad0

File tree

3 files changed

+44
-18
lines changed

3 files changed

+44
-18
lines changed

TIDDIT.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
import TIDDIT_clustering
55

6-
version = "2.1.1"
6+
version = "2.2.1"
77
parser = argparse.ArgumentParser("""TIDDIT-{}""".format(version),add_help=False)
88
parser.add_argument('--sv' , help="call structural variation", required=False, action="store_true")
99
parser.add_argument('--cov' , help="generate a coverage bed file", required=False, action="store_true")

TIDDIT_clustering.py

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ def analyse_pos(candidate_signals,discordants,library_stats,args):
268268
def generate_clusters(chrA,chrB,coordinates,library_stats,args):
269269
candidates=[]
270270
coordinates=coordinates[numpy.lexsort((coordinates[:,1],coordinates[:,0]))]
271-
db=DBSCAN.main(coordinates[:,0:2],args.e,args.l)
271+
db=DBSCAN.main(coordinates[:,0:2],args.e,int(round(args.l+library_stats["ploidies"][chrA]/(args.n*10))))
272272
unique_labels = set(db)
273273

274274
for var in unique_labels:
@@ -338,40 +338,57 @@ def fetch_variant_type(chrA,chrB,candidate,args,library_stats):
338338
var="<DUP>"
339339

340340
if chrA == chrB and library_stats["ploidies"][chrA]:
341-
if candidate["discs"]:
341+
ploidy=library_stats["ploidies"][chrA]
342+
if ploidy > 10:
343+
if candidate["discs"] and abs(candidate["covM"]/library_stats["chr_cov"][chrA]-1) < 0.05:
344+
if candidate["FF"] + candidate["RR"] > candidate["RF"] + candidate["FR"]:
345+
variant_type="SVTYPE=INV"
346+
var="<INV>"
347+
elif not candidate["discs"] and abs(candidate["covM"]/library_stats["chr_cov"][chrA]-1) < 0.05:
348+
if candidate["splitsINV"] > candidate["splits"]-candidate["splitsINV"]:
349+
variant_type="SVTYPE=INV"
350+
var="<INV>"
351+
elif candidate["covM"]/library_stats["chr_cov"][chrA]-1 > 0.05:
352+
variant_type="SVTYPE=DUP"
353+
var="<DUP>"
354+
elif candidate["covM"]/library_stats["chr_cov"][chrA]-1 < -0.05:
355+
variant_type="SVTYPE=DEL"
356+
var="<DEL>"
357+
358+
elif candidate["discs"]:
342359
if candidate["FF"] + candidate["RR"] > candidate["RF"] + candidate["FR"]:
343360
variant_type="SVTYPE=INV"
344361
var="<INV>"
345362
elif library_stats["Orientation"] == "innie":
346-
if candidate["covM"]/library_stats["chr_cov"][chrA] > (args.n+0.5)/args.n:
363+
if candidate["covM"]/library_stats["chr_cov"][chrA] > (ploidy+0.5)/float(ploidy):
347364
variant_type="SVTYPE=DUP"
348365
var="<DUP>"
349366
if candidate["RF"] > candidate["FR"]:
350367
variant_type="SVTYPE=TDUP"
351368
var="<TDUP>"
352-
elif candidate["covM"]/library_stats["chr_cov"][chrA] < (args.n-0.5)/args.n:
369+
elif candidate["covM"]/library_stats["chr_cov"][chrA] < (ploidy-0.5)/float(ploidy):
353370
variant_type="SVTYPE=DEL"
354371
var="<DEL>"
355372

356373
else:
357-
if candidate["covM"]/library_stats["chr_cov"][chrA] > (args.n+0.5)/args.n:
374+
if candidate["covM"]/library_stats["chr_cov"][chrA] > (ploidy+0.5)/float(ploidy):
358375
variant_type="SVTYPE=DUP"
359376
var="<DUP>"
360377
if candidate["RF"] < candidate["FR"]:
361378
variant_type="SVTYPE=TDUP"
362379
var="<TDUP>"
363380

364-
elif candidate["covM"]/library_stats["chr_cov"][chrA] < (args.n-0.5)/args.n:
381+
elif candidate["covM"]/library_stats["chr_cov"][chrA] < (ploidy-0.5)/float(ploidy):
365382
variant_type="SVTYPE=DEL"
366383
var="<DEL>"
367384
else:
368385
if candidate["splitsINV"] > candidate["splits"]-candidate["splitsINV"]:
369386
variant_type="SVTYPE=INV"
370387
var="<INV>"
371-
elif candidate["covM"]/library_stats["chr_cov"][chrA] > (args.n+0.5)/args.n:
388+
elif candidate["covM"]/library_stats["chr_cov"][chrA] >(ploidy+0.5)/float(ploidy):
372389
variant_type="SVTYPE=DUP"
373390
var="<DUP>"
374-
elif candidate["covM"]/library_stats["chr_cov"][chrA] < (args.n-0.5)/args.n:
391+
elif candidate["covM"]/library_stats["chr_cov"][chrA] < (ploidy-0.5)/float(ploidy):
375392
variant_type="SVTYPE=DEL"
376393
var="<DEL>"
377394

@@ -406,9 +423,9 @@ def fetch_filter(chrA,chrB,candidate,args,library_stats):
406423
else:
407424
if candidate["e1"]*0.4 >= candidate["splits"]:
408425
filt = "BelowExpectedLinks"
409-
if library_stats["ploidies"][chrA] == 0:
426+
if library_stats["ploidies"][chrA] == 0 or library_stats["ploidies"][chrB] == 0:
410427
return("Ploidy")
411-
if candidate["MaxcovA"] >= library_stats["chr_cov"][chrA]*(library_stats["ploidies"][chrA]+2) or candidate["MaxcovB"] >= library_stats["chr_cov"][chrA]*(library_stats["ploidies"][chrA]+2):
428+
if candidate["MaxcovA"] >= library_stats["chr_cov"][chrA]*(library_stats["ploidies"][chrA]+2) or candidate["MaxcovB"] >= library_stats["chr_cov"][chrB]*(library_stats["ploidies"][chrB]+2):
412429
filt = "UnexpectedCoverage"
413430
elif candidate["discsA"] > (candidate["discs"]+candidate["splits"])*(1+library_stats["ploidies"][chrA]) or candidate["discsB"] > (candidate["discs"]+candidate["splits"])*(1+library_stats["ploidies"][chrA]):
414431
filt= "FewLinks"
@@ -556,35 +573,44 @@ def determine_ploidy(args,chromosomes,coverage_data,Ncontent,sequence_length,lib
556573
library_stats["chr_cov"]={}
557574
ploidies={}
558575
avg_coverage=[]
559-
for chromosome in chromosomes:
576+
cov=[]
577+
for chromosome in chromosomes:
560578
try:
561579
N_count=Ncontent[chromosome]
562-
chromosomal_average=numpy.median(coverage_data[chromosome][numpy.where(N_count > 0),0])
563-
avg_coverage.append( chromosomal_average )
580+
chr_cov=coverage_data[chromosome][numpy.where( (N_count > 0) & (coverage_data[chromosome][:,1] > args.q) ),0][0]
581+
if len(chr_cov):
582+
chromosomal_average=numpy.median(chr_cov)
583+
cov+= list(chr_cov)
584+
else:
585+
chromosomal_average=0
564586
library_stats["chr_cov"][chromosome]=chromosomal_average
565587

566588
except:
567589
print "error: reference mismatch!"
568590
print "make sure that the contigs of the bam file and the reference match"
569591
quit()
570592

571-
coverage_norm=numpy.median(avg_coverage)
593+
cov=numpy.array(cov)
594+
if len(cov):
595+
coverage_norm=numpy.median(cov)
596+
else:
597+
coverage_norm=1
572598
coverage_data=gc_norm(args,coverage_norm,chromosomes,coverage_data,Ncontent)
573599

574600
chromosomal_average=0
575601
outfile=open(args.o+".ploidy.tab", 'w')
576602
outfile.write("Contig\tploidy_rounded\tploidy_raw\tmedian_coverage\n")
577603
for chromosome in chromosomes:
578604
N_count=Ncontent[chromosome]
579-
chromosomal_average=numpy.median(coverage_data[chromosome][numpy.where( (N_count > -1) & ( (coverage_data[chromosome][:,1] > args.q) | (coverage_data[chromosome][:,1] == 0) ) ),0])
605+
cov=coverage_data[chromosome][numpy.where( (N_count > -1) & ( (coverage_data[chromosome][:,1] > args.q) | (coverage_data[chromosome][:,1] == 0) ) ),0]
606+
chromosomal_average=numpy.median(cov)
580607
if not args.force_ploidy:
581608
try:
582609
ploidies[chromosome]=int(round((chromosomal_average)/coverage_norm*args.n))
583610
except:
584611
ploidies[chromosome]=args.n
585612
else:
586613
ploidies[chromosome]=args.n
587-
588614
library_stats["chr_cov"][chromosome]=chromosomal_average
589615

590616
outfile.write("{}\t{}\t{}\t{}\n".format(chromosome,ploidies[chromosome],round( library_stats["chr_cov"][chromosome]/coverage_norm*args.n,2),library_stats["chr_cov"][chromosome]))

src/TIDDIT.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ int main(int argc, char **argv) {
4545
float insertStd;
4646
int min_variant_size= 100;
4747
string outputFileHeader ="output";
48-
string version = "2.1.1";
48+
string version = "2.2.1";
4949

5050
//collect all options as a vector
5151
vector<string> arguments(argv, argv + argc);

0 commit comments

Comments
 (0)