@@ -268,7 +268,7 @@ def analyse_pos(candidate_signals,discordants,library_stats,args):
268
268
def generate_clusters (chrA ,chrB ,coordinates ,library_stats ,args ):
269
269
candidates = []
270
270
coordinates = coordinates [numpy .lexsort ((coordinates [:,1 ],coordinates [:,0 ]))]
271
- db = DBSCAN .main (coordinates [:,0 :2 ],args .e ,args .l )
271
+ db = DBSCAN .main (coordinates [:,0 :2 ],args .e ,int ( round ( args .l + library_stats [ "ploidies" ][ chrA ] / ( args . n * 10 ))) )
272
272
unique_labels = set (db )
273
273
274
274
for var in unique_labels :
@@ -338,40 +338,57 @@ def fetch_variant_type(chrA,chrB,candidate,args,library_stats):
338
338
var = "<DUP>"
339
339
340
340
if chrA == chrB and library_stats ["ploidies" ][chrA ]:
341
- if candidate ["discs" ]:
341
+ ploidy = library_stats ["ploidies" ][chrA ]
342
+ if ploidy > 10 :
343
+ if candidate ["discs" ] and abs (candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ]- 1 ) < 0.05 :
344
+ if candidate ["FF" ] + candidate ["RR" ] > candidate ["RF" ] + candidate ["FR" ]:
345
+ variant_type = "SVTYPE=INV"
346
+ var = "<INV>"
347
+ elif not candidate ["discs" ] and abs (candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ]- 1 ) < 0.05 :
348
+ if candidate ["splitsINV" ] > candidate ["splits" ]- candidate ["splitsINV" ]:
349
+ variant_type = "SVTYPE=INV"
350
+ var = "<INV>"
351
+ elif candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ]- 1 > 0.05 :
352
+ variant_type = "SVTYPE=DUP"
353
+ var = "<DUP>"
354
+ elif candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ]- 1 < - 0.05 :
355
+ variant_type = "SVTYPE=DEL"
356
+ var = "<DEL>"
357
+
358
+ elif candidate ["discs" ]:
342
359
if candidate ["FF" ] + candidate ["RR" ] > candidate ["RF" ] + candidate ["FR" ]:
343
360
variant_type = "SVTYPE=INV"
344
361
var = "<INV>"
345
362
elif library_stats ["Orientation" ] == "innie" :
346
- if candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ] > (args . n + 0.5 )/ args . n :
363
+ if candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ] > (ploidy + 0.5 )/ float ( ploidy ) :
347
364
variant_type = "SVTYPE=DUP"
348
365
var = "<DUP>"
349
366
if candidate ["RF" ] > candidate ["FR" ]:
350
367
variant_type = "SVTYPE=TDUP"
351
368
var = "<TDUP>"
352
- elif candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ] < (args . n - 0.5 )/ args . n :
369
+ elif candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ] < (ploidy - 0.5 )/ float ( ploidy ) :
353
370
variant_type = "SVTYPE=DEL"
354
371
var = "<DEL>"
355
372
356
373
else :
357
- if candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ] > (args . n + 0.5 )/ args . n :
374
+ if candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ] > (ploidy + 0.5 )/ float ( ploidy ) :
358
375
variant_type = "SVTYPE=DUP"
359
376
var = "<DUP>"
360
377
if candidate ["RF" ] < candidate ["FR" ]:
361
378
variant_type = "SVTYPE=TDUP"
362
379
var = "<TDUP>"
363
380
364
- elif candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ] < (args . n - 0.5 )/ args . n :
381
+ elif candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ] < (ploidy - 0.5 )/ float ( ploidy ) :
365
382
variant_type = "SVTYPE=DEL"
366
383
var = "<DEL>"
367
384
else :
368
385
if candidate ["splitsINV" ] > candidate ["splits" ]- candidate ["splitsINV" ]:
369
386
variant_type = "SVTYPE=INV"
370
387
var = "<INV>"
371
- elif candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ] > ( args . n + 0.5 )/ args . n :
388
+ elif candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ] > ( ploidy + 0.5 )/ float ( ploidy ) :
372
389
variant_type = "SVTYPE=DUP"
373
390
var = "<DUP>"
374
- elif candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ] < (args . n - 0.5 )/ args . n :
391
+ elif candidate ["covM" ]/ library_stats ["chr_cov" ][chrA ] < (ploidy - 0.5 )/ float ( ploidy ) :
375
392
variant_type = "SVTYPE=DEL"
376
393
var = "<DEL>"
377
394
@@ -406,9 +423,9 @@ def fetch_filter(chrA,chrB,candidate,args,library_stats):
406
423
else :
407
424
if candidate ["e1" ]* 0.4 >= candidate ["splits" ]:
408
425
filt = "BelowExpectedLinks"
409
- if library_stats ["ploidies" ][chrA ] == 0 :
426
+ if library_stats ["ploidies" ][chrA ] == 0 or library_stats [ "ploidies" ][ chrB ] == 0 :
410
427
return ("Ploidy" )
411
- if candidate ["MaxcovA" ] >= library_stats ["chr_cov" ][chrA ]* (library_stats ["ploidies" ][chrA ]+ 2 ) or candidate ["MaxcovB" ] >= library_stats ["chr_cov" ][chrA ]* (library_stats ["ploidies" ][chrA ]+ 2 ):
428
+ if candidate ["MaxcovA" ] >= library_stats ["chr_cov" ][chrA ]* (library_stats ["ploidies" ][chrA ]+ 2 ) or candidate ["MaxcovB" ] >= library_stats ["chr_cov" ][chrB ]* (library_stats ["ploidies" ][chrB ]+ 2 ):
412
429
filt = "UnexpectedCoverage"
413
430
elif candidate ["discsA" ] > (candidate ["discs" ]+ candidate ["splits" ])* (1 + library_stats ["ploidies" ][chrA ]) or candidate ["discsB" ] > (candidate ["discs" ]+ candidate ["splits" ])* (1 + library_stats ["ploidies" ][chrA ]):
414
431
filt = "FewLinks"
@@ -556,35 +573,44 @@ def determine_ploidy(args,chromosomes,coverage_data,Ncontent,sequence_length,lib
556
573
library_stats ["chr_cov" ]= {}
557
574
ploidies = {}
558
575
avg_coverage = []
559
- for chromosome in chromosomes :
576
+ cov = []
577
+ for chromosome in chromosomes :
560
578
try :
561
579
N_count = Ncontent [chromosome ]
562
- chromosomal_average = numpy .median (coverage_data [chromosome ][numpy .where (N_count > 0 ),0 ])
563
- avg_coverage .append ( chromosomal_average )
580
+ chr_cov = coverage_data [chromosome ][numpy .where ( (N_count > 0 ) & (coverage_data [chromosome ][:,1 ] > args .q ) ),0 ][0 ]
581
+ if len (chr_cov ):
582
+ chromosomal_average = numpy .median (chr_cov )
583
+ cov += list (chr_cov )
584
+ else :
585
+ chromosomal_average = 0
564
586
library_stats ["chr_cov" ][chromosome ]= chromosomal_average
565
587
566
588
except :
567
589
print "error: reference mismatch!"
568
590
print "make sure that the contigs of the bam file and the reference match"
569
591
quit ()
570
592
571
- coverage_norm = numpy .median (avg_coverage )
593
+ cov = numpy .array (cov )
594
+ if len (cov ):
595
+ coverage_norm = numpy .median (cov )
596
+ else :
597
+ coverage_norm = 1
572
598
coverage_data = gc_norm (args ,coverage_norm ,chromosomes ,coverage_data ,Ncontent )
573
599
574
600
chromosomal_average = 0
575
601
outfile = open (args .o + ".ploidy.tab" , 'w' )
576
602
outfile .write ("Contig\t ploidy_rounded\t ploidy_raw\t median_coverage\n " )
577
603
for chromosome in chromosomes :
578
604
N_count = Ncontent [chromosome ]
579
- chromosomal_average = numpy .median (coverage_data [chromosome ][numpy .where ( (N_count > - 1 ) & ( (coverage_data [chromosome ][:,1 ] > args .q ) | (coverage_data [chromosome ][:,1 ] == 0 ) ) ),0 ])
605
+ cov = coverage_data [chromosome ][numpy .where ( (N_count > - 1 ) & ( (coverage_data [chromosome ][:,1 ] > args .q ) | (coverage_data [chromosome ][:,1 ] == 0 ) ) ),0 ]
606
+ chromosomal_average = numpy .median (cov )
580
607
if not args .force_ploidy :
581
608
try :
582
609
ploidies [chromosome ]= int (round ((chromosomal_average )/ coverage_norm * args .n ))
583
610
except :
584
611
ploidies [chromosome ]= args .n
585
612
else :
586
613
ploidies [chromosome ]= args .n
587
-
588
614
library_stats ["chr_cov" ][chromosome ]= chromosomal_average
589
615
590
616
outfile .write ("{}\t {}\t {}\t {}\n " .format (chromosome ,ploidies [chromosome ],round ( library_stats ["chr_cov" ][chromosome ]/ coverage_norm * args .n ,2 ),library_stats ["chr_cov" ][chromosome ]))
0 commit comments