@@ -23,7 +23,7 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c
23
23
24
24
q_start= start
25
25
q_end= end+ max_ins
26
-
26
+
27
27
if q_end > contig_length:
28
28
q_end= contig_length
29
29
@@ -44,14 +44,14 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c
44
44
45
45
if not read.mate_is_unmapped:
46
46
if read.next_reference_start > end and read_reference_start > end:
47
- continue
47
+ continue
48
48
else :
49
49
if read_reference_start > end:
50
50
continue
51
51
52
52
if read.is_duplicate:
53
53
continue
54
-
54
+
55
55
if not (read_reference_start > end):
56
56
n_reads+= 1
57
57
if read.mapq < min_q:
@@ -83,7 +83,7 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c
83
83
84
84
if read_reference_start < start:
85
85
r_start= start
86
-
86
+
87
87
if read_reference_end > end:
88
88
r_end= end
89
89
@@ -133,7 +133,7 @@ def find_sv_type(chrA,chrB,inverted,non_inverted,args,sample_data,samples,librar
133
133
return (" DUP:TANDEM" ,cn)
134
134
elif cn < p:
135
135
return (" DEL" ,cn)
136
-
136
+
137
137
elif inverted > non_inverted:
138
138
return (" INV" ,cn)
139
139
else :
@@ -253,7 +253,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
253
253
s= int (math.floor(posA/ 50.0 ))
254
254
e= int (math.floor(posB/ 50.0 ))+ 1
255
255
sample_data[sample][" covM" ]= numpy.average(coverage_data[chrA][s:e] )
256
-
256
+
257
257
inverted= 0
258
258
non_inverted= 0
259
259
for i in range (0 ,len (sv_clusters[chrA][chrB][cluster][" positions_A" ][" orientation_discordants" ]) ):
@@ -277,12 +277,12 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
277
277
svtype,cn= find_sv_type(chrA,chrB,inverted,non_inverted,args,sample_data,samples,library)
278
278
279
279
filt= sv_filter(sample_data,args,chrA,chrB,posA,posB,max_ins_len,n_discordants,n_splits,library,sample_data[sample][" discA" ],sample_data[sample][" discB" ],sample_data[sample][" splitA" ],sample_data[sample][" splitB" ],n_contigs)
280
- format_col= " GT:CN:COV:DV:RV:LQ:RR:RD "
280
+ format_col= " GT:CN:COV:DV:RV:LQ:RR:DR "
281
281
282
282
# configure filters for CNV based on Read depth
283
283
for sample in samples:
284
284
if " DEL" in svtype:
285
- # homozygout del based on coverage
285
+ # homozygout del based on coverage
286
286
if cn == 0 :
287
287
filt= " PASS"
288
288
@@ -294,7 +294,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
294
294
if covA > covM* (cn+ 0.9 ) and covB > covM* (cn+ 0.9 ):
295
295
filt= " PASS"
296
296
297
- # too few reads, but clear RD signal
297
+ # too few reads, but clear DR signal
298
298
elif " DUP" in svtype and filt == " BelowExpectedLinks" :
299
299
filt= " PASS"
300
300
@@ -361,7 +361,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
361
361
GT= " 1/1"
362
362
else :
363
363
GT= " 0/1"
364
-
364
+
365
365
variant.append( " {}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}" .format(GT,cn,sample_data[sample][" covA" ],sample_data[sample][" covM" ],sample_data[sample][" covB" ],n_discordants,n_splits,sample_data[sample][" QA" ],sample_data[sample][" QB" ],sample_data[sample][" refRA" ],sample_data[sample][" refRB" ],sample_data[sample][" refFA" ],sample_data[sample][" refFB" ]) )
366
366
variants.append([chrA,posA,variant])
367
367
else :
@@ -472,7 +472,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
472
472
473
473
474
474
variant.append( " {}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}" .format(GT,cn,sample_data[sample][" covA" ],sample_data[sample][" covM" ],sample_data[sample][" covB" ],n_discordants,n_splits,sample_data[sample][" QA" ],sample_data[sample][" QB" ],sample_data[sample][" refRA" ],sample_data[sample][" refRB" ],sample_data[sample][" refFA" ],sample_data[sample][" refFB" ]) )
475
- variants.append([chrB,posB,variant])
475
+ variants.append([chrB,posB,variant])
476
476
477
477
samfile.close()
478
478
return (variants)
@@ -481,7 +481,7 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl
481
481
contig_seqs= {}
482
482
new_seq= False
483
483
if not args.skip_assembly:
484
- for line in open (" {}_tiddit/clips.fa.assembly.clean.mag" .format(args.o)):
484
+ for line in open (" {}_tiddit/clips.fa.assembly.clean.mag" .format(args.o)):
485
485
486
486
if not new_seq and line[0 ] == " @" and " \t " in line:
487
487
name= line.split(" \t " )[0 ][1 :]
@@ -501,5 +501,5 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl
501
501
for v in variants_list:
502
502
for variant in v:
503
503
variants[ variant[0 ] ].append( [ variant[1 ],variant[2 ] ] )
504
-
504
+
505
505
return (variants)
0 commit comments