Skip to content

Commit 306e454

Browse files
committed
Released minimap2-2.3 (r531)
1 parent 1dd221a commit 306e454

File tree

7 files changed

+106
-28
lines changed

7 files changed

+106
-28
lines changed

NEWS.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,37 @@
1+
Release 2.3-r531 (22 October 2017)
2+
----------------------------------
3+
4+
This release come with many improvements and bug fixes:
5+
6+
* The **sr** preset now supports paired-end short-read alignment. Minimap2 is
7+
3-4 times as fast as BWA-MEM, but is slightly less accurate on simulated
8+
reads.
9+
10+
* Meticulous improvements to assembly-to-assembly alignment (special thanks to
11+
Alexey Gurevich from the QUAST team): a) apply a small penalty to matches
12+
between ambiguous bases; b) reduce missing alignments due to spurious
13+
overlaps; c) introduce the short form of the `cs` tag, an improvement to the
14+
SAM MD tag.
15+
16+
* Make sure gaps are always left-aligned.
17+
18+
* Recognize `U` bases from Oxford Nanopore Direct RNA-seq (#33).
19+
20+
* Fixed slightly wrong chaining score. Fixed slightly inaccurate coordinates
21+
for split alignment.
22+
23+
* Fixed multiple reported bugs: 1) wrong reference name for inversion
24+
alignment (#30); 2) redundant SQ lines when multiple query files are
25+
specified (#39); 3) non-functioning option `-K` (#36).
26+
27+
This release has implemented all the major features I planned five months ago,
28+
with the addition of spliced long-read alignment. The next couple of releases
29+
will focus on fine tuning of base algorithms.
30+
31+
(2.3: 22 October 2017, r531)
32+
33+
34+
135
Release 2.2-r409 (17 September 2017)
236
------------------------------------
337

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
[![Release](https://img.shields.io/badge/Release-v2.2-blue.svg?style=flat)](https://github.com/lh3/minimap2/releases)
1+
[![Release](https://img.shields.io/badge/Release-v2.3-blue.svg?style=flat)](https://github.com/lh3/minimap2/releases)
22
[![BioConda](https://img.shields.io/conda/vn/bioconda/minimap2.svg?style=flat)](https://anaconda.org/bioconda/minimap2)
33
[![PyPI](https://img.shields.io/pypi/v/mappy.svg?style=flat)](https://pypi.python.org/pypi/mappy)
44
[![Python Version](https://img.shields.io/pypi/pyversions/mappy.svg?style=flat)](https://pypi.python.org/pypi/mappy)

main.c

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#include "mmpriv.h"
77
#include "getopt.h"
88

9-
#define MM_VERSION "2.2-r526-dirty"
9+
#define MM_VERSION "2.3-r531"
1010

1111
#ifdef __linux__
1212
#include <sys/resource.h>
@@ -68,7 +68,7 @@ int main(int argc, char *argv[])
6868
const char *opt_str = "2aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:i:L";
6969
mm_mapopt_t opt;
7070
mm_idxopt_t ipt;
71-
int i, c, n_threads = 3, long_idx, max_gap_ref = 0;
71+
int i, c, n_threads = 3, long_idx;
7272
char *fnw = 0, *rg = 0, *s;
7373
FILE *fp_help = stderr;
7474
mm_idx_reader_t *idx_rdr;
@@ -98,7 +98,7 @@ int main(int argc, char *argv[])
9898
else if (c == 't') n_threads = atoi(optarg);
9999
else if (c == 'v') mm_verbose = atoi(optarg);
100100
else if (c == 'g') opt.max_gap = (int)mm_parse_num(optarg);
101-
else if (c == 'G') max_gap_ref = (int)mm_parse_num(optarg);
101+
else if (c == 'G') mm_mapopt_max_intron_len(&opt, (int)mm_parse_num(optarg));
102102
else if (c == 'F') opt.max_frag_len = (int)mm_parse_num(optarg);
103103
else if (c == 'i') opt.min_iden = atof(optarg);
104104
else if (c == 'N') opt.best_n = atoi(optarg);
@@ -174,7 +174,7 @@ int main(int argc, char *argv[])
174174
else if (*optarg == 'r') opt.flag |= MM_F_SPLICE_REV, opt.flag &= ~MM_F_SPLICE_FOR; // match CT-AC (reverse complement of GT-AG)
175175
else if (*optarg == 'n') opt.flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV); // don't try to match the GT-AG signal
176176
else {
177-
fprintf(stderr, "[E::%s] unrecognized cDNA direction\n", __func__);
177+
fprintf(stderr, "[ERROR]\033[1;31m unrecognized cDNA direction\033[0m\n");
178178
return 1;
179179
}
180180
} else if (c == 'O') {
@@ -185,14 +185,9 @@ int main(int argc, char *argv[])
185185
if (*s == ',') opt.e2 = strtol(s + 1, &s, 10);
186186
}
187187
}
188-
if (max_gap_ref > 0) {
189-
opt.max_gap_ref = max_gap_ref;
190-
if (opt.flag & MM_F_SPLICE) opt.bw = max_gap_ref; // in the splice mode, this also changes the bandwidth
191-
}
192-
if ((opt.flag & MM_F_OUT_SAM) && (opt.flag & MM_F_OUT_CS_LONG)) {
193-
opt.flag &= ~MM_F_OUT_CS_LONG;
194-
if (mm_verbose >= 2)
195-
fprintf(stderr, "[WARNING]\033[1;31m in SAM, only the short form of the cs tag is outputted.\033[0m\n");
188+
if ((opt.flag & MM_F_SPLICE) && (opt.flag & MM_F_FRAG_MODE)) {
189+
fprintf(stderr, "[ERROR]\033[1;31m --splice and --frag should not be specified at the same time.\033[0m\n");
190+
return 1;
196191
}
197192

198193
if (argc == optind || fp_help == stdout) {
@@ -207,8 +202,8 @@ int main(int argc, char *argv[])
207202
fprintf(fp_help, " Mapping:\n");
208203
fprintf(fp_help, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
209204
fprintf(fp_help, " -g NUM stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
210-
fprintf(fp_help, " -G NUM max reference skip/intron length [-xsplice:200k]\n");
211-
fprintf(fp_help, " -F NUM max fragment length in the fragment mode [-xsr:800]\n");
205+
fprintf(fp_help, " -G NUM max intron length (effective with -xsplice; changing -r) [200k]\n");
206+
fprintf(fp_help, " -F NUM max fragment length (effective with -xsr or in the fragment mode) [800]\n");
212207
fprintf(fp_help, " -r NUM bandwidth used in chaining and DP-based alignment [%d]\n", opt.bw);
213208
fprintf(fp_help, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt);
214209
fprintf(fp_help, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
@@ -228,7 +223,7 @@ int main(int argc, char *argv[])
228223
fprintf(fp_help, " Input/Output:\n");
229224
fprintf(fp_help, " -a output in the SAM format (PAF by default)\n");
230225
fprintf(fp_help, " -Q don't output base quality in SAM\n");
231-
fprintf(fp_help, " -L write CIGAR with >65535 ops in the CG tag (compatible with future tools)\n");
226+
fprintf(fp_help, " -L write CIGAR with >65535 ops at the CG tag\n");
232227
fprintf(fp_help, " -R STR SAM read group line in a format like '@RG\\tID:foo\\tSM:bar' []\n");
233228
fprintf(fp_help, " -c output CIGAR in PAF\n");
234229
fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [none]\n");

map.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
5252
fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ);
5353
}
5454

55+
void mm_mapopt_max_intron_len(mm_mapopt_t *opt, int max_intron_len)
56+
{
57+
if ((opt->flag & MM_F_SPLICE) && max_intron_len > 0)
58+
opt->max_gap_ref = opt->bw = max_intron_len;
59+
}
60+
5561
int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
5662
{
5763
if (preset == 0) {

minimap.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo);
161161
*/
162162
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi);
163163

164+
void mm_mapopt_max_intron_len(mm_mapopt_t *opt, int max_intron_len);
165+
164166
/**
165167
* Initialize an index reader
166168
*

minimap2.1

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.TH minimap2 1 "17 October 2017" "minimap2-2.2-dirty (r518)" "Bioinformatics tools"
1+
.TH minimap2 1 "22 October 2017" "minimap2-2.2-dirty (r531)" "Bioinformatics tools"
22
.SH NAME
33
.PP
44
minimap2 - mapping and alignment between collections of DNA sequences
@@ -126,7 +126,7 @@ Stop chain enlongation if there are no minimizers in
126126
[10000].
127127
.TP
128128
.BI -r \ INT
129-
Bandwidth used in chaining and DP-based alignment [1000]. This option
129+
Bandwidth used in chaining and DP-based alignment [500]. This option
130130
approximately controls the maximum gap size.
131131
.TP
132132
.BI -n \ INT
@@ -148,7 +148,7 @@ diagonal minimizer hits will also be suppressed.
148148
.TP
149149
.BI -p \ FLOAT
150150
Minimal secondary-to-primary score ratio to output secondary mappings [0.8].
151-
Between two chains overlaping over half of the shorter chain (controled by
151+
Between two chains overlaping over half of the shorter chain (controlled by
152152
.BR --mask-level ),
153153
the chain with a lower score is secondary to the chain with a higher score.
154154
If the ratio of the scores is below
@@ -163,10 +163,16 @@ secondary alignments [5]. This option has no effect when
163163
is applied.
164164
.TP
165165
.BI -G \ NUM
166-
Maximal intron length in the splice mode [200k]. This option also changes the
167-
bandwidth to
166+
Maximum gap on the reference (effective with
167+
.BR -xsplice / --splice ).
168+
This option also changes the chaining and alignment band width to
168169
.IR NUM .
169-
Increasing this option slows down spliced alignment.
170+
Increasing this option slows down spliced alignment. [200k]
171+
.TP
172+
.BI -F \ NUM
173+
Maximum fragment length (aka insert size; effective with
174+
.BR -xsr / --frag)
175+
[800]
170176
.TP
171177
.BI --max-chain-skip \ INT
172178
A heuristics that stops chaining early [50]. Minimap2 uses dynamic programming
@@ -175,6 +181,23 @@ option makes minimap2 exits the inner loop if it repeatedly sees seeds already
175181
on chains. Set
176182
.I INT
177183
to a large number to switch off this heurstics.
184+
.TP
185+
.B --no-long-join
186+
Disable the long gap patching heuristic. When this option is applied, the
187+
maximum alignment gap is mostly controlled by
188+
.BR -r .
189+
.TP
190+
.B --splice
191+
Enable the splice alignment mode.
192+
.TP
193+
.B --sr
194+
Enable short-read alignment heuristics. In the short-read mode, minimap2
195+
applies a second round of chaining with a higher minimizer occurrence threshold
196+
if no good chain is found. In addition, minimap2 attempts to patch gaps between
197+
seeds with ungapped alignment.
198+
.TP
199+
.BR --frag [= no | yes ]
200+
Whether to enable the fragment mode [no]
178201
.SS Alignment options
179202
.TP 10
180203
.BI -A \ INT
@@ -194,6 +217,7 @@ Gap extension penalty [2,1]. A gap of length
194217
.I k
195218
costs
196219
.RI min{ O1 + k * E1 , O2 + k * E2 }.
220+
In the splice mode, the second gap penalties are not used.
197221
.TP
198222
.BI -z \ INT
199223
Break an alignment if the running score drops too quickly along the diagonal of
@@ -217,6 +241,9 @@ no attempt to match GT-AG [n]
217241
.TP
218242
.BI --cost-non-gt-ag \ INT
219243
Cost of non-canonical splicing sites [0].
244+
.TP
245+
.BI --end-bonus \ INT
246+
Score bonus when alignment extends to the end of the query sequence [10].
220247
.SS Input/output options
221248
.TP 10
222249
.B -a
@@ -226,9 +253,15 @@ by default.
226253
.B -Q
227254
Ignore base quality in the input file.
228255
.TP
256+
.B -L
257+
Write CIGAR with >65535 operators at the CG tag. Older tools are unable to
258+
convert alignments with >65535 CIGAR ops to BAM. This option makes minimap2 SAM
259+
compatible with older tools. Newer tools recognizes this tag and reconstruct
260+
the real CIGAR in memory.
261+
.TP
229262
.BI -R \ STR
230263
SAM read group line in a format like
231-
.B @RG\\\\tID:foo\\\\tSM:bar
264+
.RB @RG\\\\tID:foo\\\\tSM:bar
232265
[].
233266
.TP
234267
.B -c
@@ -249,6 +282,11 @@ is given,
249282
.I short
250283
is assumed. [none]
251284
.TP
285+
.BI --seed \ INT
286+
Integer seed for randomizing equally best hits. Minimap2 hashes
287+
.I INT
288+
and read name when choosing between equally best hits. [11]
289+
.TP
252290
.BI -t \ INT
253291
Number of threads [3]. Minimap2 uses at most three threads when indexing target
254292
sequences, and uses up to
@@ -271,6 +309,9 @@ K/M/G/k/m/g suffix is accepted. A large
271309
helps load balancing in the multi-threading mode, at the cost of increased
272310
memory.
273311
.TP
312+
.BR --secondary [= yes | no ]
313+
Whether to output secondary alignments [yes]
314+
.TP
274315
.B --version
275316
Print version number to stdout
276317
.SS Preset options
@@ -343,9 +384,9 @@ tag ignores introns to demote hits to pseudogenes.
343384
.B sr
344385
Short single-end reads without splicing
345386
.RB ( -k21
346-
.B -w11 -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20 -s40 -g200
347-
.B -2K50m --frag
348-
.BR --sr ).
387+
.B -w11 --sr --frag -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20
388+
.B -s40 -g200 -2K50m
389+
.BR --secondary=no ).
349390
.RE
350391
.SS Miscellaneous options
351392
.TP 10
@@ -358,7 +399,7 @@ multi-threading mode.
358399
.B --print-qname
359400
Print query names to stderr, mostly to see which query is crashing minimap2.
360401
.TP
361-
.B --print-seed
402+
.B --print-seeds
362403
Print seed positions to stderr, for debugging only.
363404
.SH OUTPUT FORMAT
364405
.PP

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def readme():
2323

2424
setup(
2525
name = 'mappy',
26-
version = '2.2',
26+
version = '2.3',
2727
url = 'https://github.com/lh3/minimap2',
2828
description = 'Minimap2 python binding',
2929
long_description = readme(),

0 commit comments

Comments
 (0)