|
8 | 8 | sys.path.insert(0, '{}/src/'.format(wd))
|
9 | 9 | import TIDDIT_calling
|
10 | 10 |
|
11 |
| -version = "2.10.0" |
| 11 | +version = "2.11.0" |
12 | 12 | parser = argparse.ArgumentParser("""TIDDIT-{}""".format(version),add_help=False)
|
13 | 13 | parser.add_argument('--sv' , help="call structural variation", required=False, action="store_true")
|
14 | 14 | parser.add_argument('--cov' , help="generate a coverage bed file", required=False, action="store_true")
|
|
27 | 27 | parser.add_argument('-Q', type=int,default=20, help="Minimum regional mapping quality (default 20)")
|
28 | 28 | parser.add_argument('-n', type=int,default=2, help="the ploidy of the organism,(default = 2)")
|
29 | 29 | parser.add_argument('-e', type=int, help="clustering distance parameter, discordant pairs closer than this distance are considered to belong to the same variant(default = sqrt(insert-size*2)*12)")
|
30 |
| - parser.add_argument('-l', type=int,default=3, help="min-pts parameter (default=3),must be set > 2") |
31 |
| - parser.add_argument('-s', type=int,default=20000000, help="Number of reads to sample when computing library statistics(default=50000000)") |
| 30 | + parser.add_argument('-l', type=int,default=3, help="min-pts parameter (default=3),must be set >= 2") |
| 31 | + parser.add_argument('-s', type=int,default=25000000, help="Number of reads to sample when computing library statistics(default=25000000)") |
32 | 32 | parser.add_argument('-z', type=int,default=100, help="minimum variant size (default=100), variants smaller than this will not be printed ( z < 10 is not recomended)")
|
33 | 33 | parser.add_argument('--force_ploidy',action="store_true", help="force the ploidy to be set to -n across the entire genome (i.e skip coverage normalisation of chromosomes)")
|
| 34 | + parser.add_argument('--no_cluster',action="store_true", help="Run only the TIDDIT signal extraction") |
34 | 35 | parser.add_argument('--debug',action="store_true", help="rerun the tiddit clustering procedure")
|
35 | 36 | parser.add_argument('--n_mask',type=float,default=0.5, help="exclude regions from coverage calculation if they contain more than this fraction of N (default = 0.5)")
|
36 |
| - parser.add_argument('--ref', type=str, help="reference fasta, used for GC correction") |
| 37 | + parser.add_argument('--ref', type=str, help="reference fasta, used for GC correction and for reading cram") |
| 38 | + parser.add_argument('--p_ratio', type=float,default=0.2, help="minimum discordant pair/normal pair ratio at the breakpoint junction(default=20%)") |
| 39 | + parser.add_argument('--r_ratio', type=float,default=0.1, help="minimum split read/coverage ratio at the breakpoint junction(default=10%)") |
37 | 40 |
|
38 | 41 | args= parser.parse_args()
|
39 | 42 | args.wd=os.path.dirname(os.path.realpath(__file__))
|
|
44 | 47 | if not os.path.isfile(args.ref):
|
45 | 48 | print ("error, could not find the reference file")
|
46 | 49 | quit()
|
47 |
| - if not args.bam.endswith(".bam"): |
48 |
| - print ("error, the input file is not a bam file, make sure that the file extension is .bam") |
| 50 | + |
| 51 | + if not args.ref and args.bam.endswith(".cram"): |
| 52 | + print("error, reference fasta is required when using cram input") |
49 | 53 | quit()
|
50 | 54 |
|
51 |
| - if not os.path.isfile(args.bam): |
| 55 | + if not (args.bam.endswith(".bam") or args.bam.endswith(".cram")) and not "/dev/" in args.bam: |
| 56 | + print ("error, the input file is not a bam file, make sure that the file extension is .bam or .cram") |
| 57 | + quit() |
| 58 | + |
| 59 | + if not os.path.isfile(args.bam) and not "/dev/" in args.bam: |
52 | 60 | print ("error, could not find the bam file")
|
53 | 61 | quit()
|
54 | 62 |
|
55 | 63 | if not os.path.isfile("{}/bin/TIDDIT".format(args.wd)):
|
56 | 64 | print ("error, could not find the TIDDIT executable file, try rerun the INSTALL.sh script")
|
57 | 65 | quit()
|
58 | 66 |
|
59 |
| - command_str="{}/bin/TIDDIT --sv -b {} -o {} -p {} -r {} -q {} -n {} -s {}".format(args.wd,args.bam,args.o,args.p,args.r,args.q,args.n,args.s) |
| 67 | + if not args.bam.endswith(".cram"): |
| 68 | + command_str="{}/bin/TIDDIT --sv -b {} -o {} -p {} -r {} -q {} -n {} -s {}".format(args.wd,args.bam,args.o,args.p,args.r,args.q,args.n,args.s) |
| 69 | + else: |
| 70 | + command_str="samtools view -hbu {} -T {} | {}/bin/TIDDIT --sv -b /dev/stdin -o {} -p {} -r {} -q {} -n {} -s {}".format(args.bam,args.ref,args.wd,args.o,args.p,args.r,args.q,args.n,args.s) |
| 71 | + |
| 72 | + |
60 | 73 | if args.i:
|
61 | 74 | command_str += " -i {}".format(args.i)
|
62 | 75 | if args.d:
|
|
74 | 87 | os.system("cat {} | {}/bin/TIDDIT --gc -z 50 -o {}".format(args.ref,args.wd,args.o))
|
75 | 88 | print ("Constructed GC wig in {} sec".format(time.time()-t))
|
76 | 89 |
|
77 |
| - TIDDIT_calling.cluster(args) |
| 90 | + if not args.no_cluster: |
| 91 | + TIDDIT_calling.cluster(args) |
78 | 92 |
|
79 | 93 | elif args.cov:
|
80 | 94 | parser = argparse.ArgumentParser("""TIDDIT --cov --bam inputfile [-o prefix]""")
|
|
84 | 98 | parser.add_argument('-z', type=int,default=500, help="use bins of specified size(default = 500bp) to measure the coverage of the entire bam file, set output to stdout to print to stdout")
|
85 | 99 | parser.add_argument('-w' , help="generate wig instead of bed", required=False, action="store_true")
|
86 | 100 | parser.add_argument('-u' , help="skip per bin mapping quality", required=False, action="store_true")
|
| 101 | + parser.add_argument('--ref', type=str, help="reference fasta, used for GC correction and for reading cram") |
87 | 102 | args= parser.parse_args()
|
88 | 103 | args.wd=os.path.dirname(os.path.realpath(__file__))
|
89 |
| - command="{}/bin/TIDDIT --cov -b {} -o {} -z {}".format(args.wd,args.bam,args.o,args.z) |
| 104 | + |
| 105 | + if args.ref: |
| 106 | + if not os.path.isfile(args.ref): |
| 107 | + print ("error, could not find the reference file") |
| 108 | + quit() |
| 109 | + |
| 110 | + if not args.bam.endswith(".cram"): |
| 111 | + command="{}/bin/TIDDIT --cov -b {} -o {} -z {}".format(args.wd,args.bam,args.o,args.z) |
| 112 | + else: |
| 113 | + if not args.ref: |
| 114 | + print("error, missing reference sequence!") |
| 115 | + quit() |
| 116 | + command="samtools view -hbu {} -T {} | {}/bin/TIDDIT --cov -b /dev/stdin -o {} -z {}".format(args.bam,args.ref,args.wd,args.o,args.z) |
| 117 | + |
90 | 118 | if args.w:
|
91 | 119 | command += " -w"
|
92 | 120 |
|
|
0 commit comments