Skip to content

Commit 260a68d

Browse files
jmarshalllh3
authored andcommitted
Use #defines for CIGAR operators in C code
Give the CIGAR constants names to clarify the code. So that ksw2.h remains self-contained, define KSW_* versions of the CIGAR operators it needs for use within ksw2.h. Other code should in general use the full set of MM_CIGAR_* constants in minimap.h.
1 parent 177eef2 commit 260a68d

File tree

4 files changed

+63
-46
lines changed

4 files changed

+63
-46
lines changed

align.c

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,16 @@ static int mm_test_zdrop(void *km, const mm_mapopt_t *opt, const uint8_t *qseq,
5353
// find the score and the region where score drops most along diagonal
5454
for (k = 0, score = 0; k < n_cigar; ++k) {
5555
uint32_t l, op = cigar[k]&0xf, len = cigar[k]>>4;
56-
if (op == 0) {
56+
if (op == MM_CIGAR_MATCH) {
5757
for (l = 0; l < len; ++l) {
5858
score += mat[tseq[i + l] * 5 + qseq[j + l]];
5959
update_max_zdrop(score, i+l, j+l, &max, &max_i, &max_j, opt->e, &max_zdrop, pos);
6060
}
6161
i += len, j += len;
62-
} else if (op == 1 || op == 2 || op == 3) {
62+
} else if (op == MM_CIGAR_INS || op == MM_CIGAR_DEL || op == MM_CIGAR_N_SKIP) {
6363
score -= opt->q + opt->e * len;
64-
if (op == 1) j += len; // insertion
65-
else i += len; // deletion
64+
if (op == MM_CIGAR_INS) j += len;
65+
else i += len;
6666
update_max_zdrop(score, i, j, &max, &max_i, &max_j, opt->e, &max_zdrop, pos);
6767
}
6868
}
@@ -98,12 +98,12 @@ static void mm_fix_cigar(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq,
9898
for (k = 0; k < p->n_cigar; ++k) { // indel left alignment
9999
uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4;
100100
if (len == 0) to_shrink = 1;
101-
if (op == 0) {
101+
if (op == MM_CIGAR_MATCH) {
102102
toff += len, qoff += len;
103-
} else if (op == 1 || op == 2) { // insertion or deletion
103+
} else if (op == MM_CIGAR_INS || op == MM_CIGAR_DEL) {
104104
if (k > 0 && k < p->n_cigar - 1 && (p->cigar[k-1]&0xf) == 0 && (p->cigar[k+1]&0xf) == 0) {
105105
int l, prev_len = p->cigar[k-1] >> 4;
106-
if (op == 1) {
106+
if (op == MM_CIGAR_INS) {
107107
for (l = 0; l < prev_len; ++l)
108108
if (qseq[qoff - 1 - l] != qseq[qoff + len - 1 - l])
109109
break;
@@ -116,9 +116,9 @@ static void mm_fix_cigar(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq,
116116
p->cigar[k-1] -= l<<4, p->cigar[k+1] += l<<4, qoff -= l, toff -= l;
117117
if (l == prev_len) to_shrink = 1;
118118
}
119-
if (op == 1) qoff += len;
119+
if (op == MM_CIGAR_INS) qoff += len;
120120
else toff += len;
121-
} else if (op == 3) {
121+
} else if (op == MM_CIGAR_N_SKIP) {
122122
toff += len;
123123
}
124124
}
@@ -128,13 +128,13 @@ static void mm_fix_cigar(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq,
128128
uint32_t l, s[3] = {0,0,0};
129129
for (l = k; l < p->n_cigar; ++l) { // count number of adjacent I and D
130130
uint32_t op = p->cigar[l]&0xf;
131-
if (op == 1 || op == 2 || p->cigar[l]>>4 == 0)
131+
if (op == MM_CIGAR_INS || op == MM_CIGAR_DEL || p->cigar[l]>>4 == 0)
132132
s[op] += p->cigar[l] >> 4;
133133
else break;
134134
}
135135
if (s[1] > 0 && s[2] > 0 && l - k > 2) { // turn to a single I and a single D
136-
p->cigar[k] = s[1]<<4|1;
137-
p->cigar[k+1] = s[2]<<4|2;
136+
p->cigar[k] = s[1]<<4|MM_CIGAR_INS;
137+
p->cigar[k+1] = s[2]<<4|MM_CIGAR_DEL;
138138
for (k += 2; k < l; ++k)
139139
p->cigar[k] &= 0xf;
140140
to_shrink = 1;
@@ -154,9 +154,9 @@ static void mm_fix_cigar(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq,
154154
else p->cigar[k+1] += p->cigar[k]>>4<<4; // add length to the next CIGAR operator
155155
p->n_cigar = l;
156156
}
157-
if ((p->cigar[0]&0xf) == 1 || (p->cigar[0]&0xf) == 2) { // get rid of leading I or D
157+
if ((p->cigar[0]&0xf) == MM_CIGAR_INS || (p->cigar[0]&0xf) == MM_CIGAR_DEL) { // get rid of leading I or D
158158
int32_t l = p->cigar[0] >> 4;
159-
if ((p->cigar[0]&0xf) == 1) {
159+
if ((p->cigar[0]&0xf) == MM_CIGAR_INS) {
160160
if (r->rev) r->qe -= l;
161161
else r->qs += l;
162162
*qshift = l;
@@ -174,7 +174,7 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t
174174
if (r->p == 0) return;
175175
for (k = 0; k < r->p->n_cigar; ++k) {
176176
uint32_t op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4;
177-
if (op == 0) {
177+
if (op == MM_CIGAR_MATCH) {
178178
while (len > 0) {
179179
for (l = 0; l < len && qseq[qoff + l] == tseq[toff + l]; ++l) {} // run of "="; TODO: N<=>N is converted to "="
180180
if (l > 0) { ++n_EQX; len -= l; toff += l; qoff += l; }
@@ -183,19 +183,19 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t
183183
if (l > 0) { ++n_EQX; len -= l; toff += l; qoff += l; }
184184
}
185185
++n_M;
186-
} else if (op == 1) { // insertion
186+
} else if (op == MM_CIGAR_INS) {
187187
qoff += len;
188-
} else if (op == 2) { // deletion
188+
} else if (op == MM_CIGAR_DEL) {
189189
toff += len;
190-
} else if (op == 3) { // intron
190+
} else if (op == MM_CIGAR_N_SKIP) {
191191
toff += len;
192192
}
193193
}
194194
// update in-place if we can
195195
if (n_EQX == n_M) {
196196
for (k = 0; k < r->p->n_cigar; ++k) {
197197
uint32_t op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4;
198-
if (op == 0) r->p->cigar[k] = len << 4 | 7;
198+
if (op == MM_CIGAR_MATCH) r->p->cigar[k] = len << 4 | MM_CIGAR_EQ_MATCH;
199199
}
200200
return;
201201
}
@@ -209,25 +209,25 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t
209209
toff = qoff = m = 0;
210210
for (k = 0; k < r->p->n_cigar; ++k) {
211211
uint32_t op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4;
212-
if (op == 0) { // match/mismatch
212+
if (op == MM_CIGAR_MATCH) {
213213
while (len > 0) {
214214
// match
215215
for (l = 0; l < len && qseq[qoff + l] == tseq[toff + l]; ++l) {}
216-
if (l > 0) p->cigar[m++] = l << 4 | 7;
216+
if (l > 0) p->cigar[m++] = l << 4 | MM_CIGAR_EQ_MATCH;
217217
len -= l;
218218
toff += l, qoff += l;
219219
// mismatch
220220
for (l = 0; l < len && qseq[qoff + l] != tseq[toff + l]; ++l) {}
221-
if (l > 0) p->cigar[m++] = l << 4 | 8;
221+
if (l > 0) p->cigar[m++] = l << 4 | MM_CIGAR_X_MISMATCH;
222222
len -= l;
223223
toff += l, qoff += l;
224224
}
225225
continue;
226-
} else if (op == 1) { // insertion
226+
} else if (op == MM_CIGAR_INS) {
227227
qoff += len;
228-
} else if (op == 2) { // deletion
228+
} else if (op == MM_CIGAR_DEL) {
229229
toff += len;
230-
} else if (op == 3) { // intron
230+
} else if (op == MM_CIGAR_N_SKIP) {
231231
toff += len;
232232
}
233233
p->cigar[m++] = r->p->cigar[k];
@@ -248,7 +248,7 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
248248
r->blen = r->mlen = 0;
249249
for (k = 0; k < p->n_cigar; ++k) {
250250
uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4;
251-
if (op == 0) { // match/mismatch
251+
if (op == MM_CIGAR_MATCH) {
252252
int n_ambi = 0, n_diff = 0;
253253
for (l = 0; l < len; ++l) {
254254
int cq = qseq[qoff + l], ct = tseq[toff + l];
@@ -260,23 +260,23 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
260260
}
261261
r->blen += len - n_ambi, r->mlen += len - (n_ambi + n_diff), p->n_ambi += n_ambi;
262262
toff += len, qoff += len;
263-
} else if (op == 1) { // insertion
263+
} else if (op == MM_CIGAR_INS) {
264264
int n_ambi = 0;
265265
for (l = 0; l < len; ++l)
266266
if (qseq[qoff + l] > 3) ++n_ambi;
267267
r->blen += len - n_ambi, p->n_ambi += n_ambi;
268268
s -= q + e * len;
269269
if (s < 0) s = 0;
270270
qoff += len;
271-
} else if (op == 2) { // deletion
271+
} else if (op == MM_CIGAR_DEL) {
272272
int n_ambi = 0;
273273
for (l = 0; l < len; ++l)
274274
if (tseq[toff + l] > 3) ++n_ambi;
275275
r->blen += len - n_ambi, p->n_ambi += n_ambi;
276276
s -= q + e * len;
277277
if (s < 0) s = 0;
278278
toff += len;
279-
} else if (op == 3) { // intron
279+
} else if (op == MM_CIGAR_N_SKIP) {
280280
toff += len;
281281
}
282282
}
@@ -730,7 +730,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
730730
if (qseq[j] >= 4 || tseq[j] >= 4) ez->score += opt->e2;
731731
else ez->score += qseq[j] == tseq[j]? opt->a : -opt->b;
732732
}
733-
ez->cigar = ksw_push_cigar(km, &ez->n_cigar, &ez->m_cigar, ez->cigar, 0, qe - qs);
733+
ez->cigar = ksw_push_cigar(km, &ez->n_cigar, &ez->m_cigar, ez->cigar, MM_CIGAR_MATCH, qe - qs);
734734
} else { // perform normal gapped alignment
735735
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, opt->zdrop, extra_flag|KSW_EZ_APPROX_MAX, ez); // first pass: with approximate Z-drop
736736
}

format.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ static void write_cs_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq
144144
if (write_tag) mm_sprintf_lite(s, "\tcs:Z:");
145145
for (i = q_off = t_off = 0; i < (int)r->p->n_cigar; ++i) {
146146
int j, op = r->p->cigar[i]&0xf, len = r->p->cigar[i]>>4;
147-
assert((op >= 0 && op <= 3) || op == 7 || op == 8);
148-
if (op == 0 || op == 7 || op == 8) { // match
147+
assert((op >= MM_CIGAR_MATCH && op <= MM_CIGAR_N_SKIP) || op == MM_CIGAR_EQ_MATCH || op == MM_CIGAR_X_MISMATCH);
148+
if (op == MM_CIGAR_MATCH || op == MM_CIGAR_EQ_MATCH || op == MM_CIGAR_X_MISMATCH) {
149149
int l_tmp = 0;
150150
for (j = 0; j < len; ++j) {
151151
if (qseq[q_off + j] != tseq[t_off + j]) {
@@ -166,12 +166,12 @@ static void write_cs_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq
166166
} else mm_sprintf_lite(s, ":%d", l_tmp);
167167
}
168168
q_off += len, t_off += len;
169-
} else if (op == 1) { // insertion to ref
169+
} else if (op == MM_CIGAR_INS) {
170170
for (j = 0, tmp[len] = 0; j < len; ++j)
171171
tmp[j] = "acgtn"[qseq[q_off + j]];
172172
mm_sprintf_lite(s, "+%s", tmp);
173173
q_off += len;
174-
} else if (op == 2) { // deletion from ref
174+
} else if (op == MM_CIGAR_DEL) {
175175
for (j = 0, tmp[len] = 0; j < len; ++j)
176176
tmp[j] = "acgtn"[tseq[t_off + j]];
177177
mm_sprintf_lite(s, "-%s", tmp);
@@ -192,24 +192,24 @@ static void write_MD_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq
192192
if (write_tag) mm_sprintf_lite(s, "\tMD:Z:");
193193
for (i = q_off = t_off = 0; i < (int)r->p->n_cigar; ++i) {
194194
int j, op = r->p->cigar[i]&0xf, len = r->p->cigar[i]>>4;
195-
assert((op >= 0 && op <= 3) || op == 7 || op == 8);
196-
if (op == 0 || op == 7 || op == 8) { // match
195+
assert((op >= MM_CIGAR_MATCH && op <= MM_CIGAR_N_SKIP) || op == MM_CIGAR_EQ_MATCH || op == MM_CIGAR_X_MISMATCH);
196+
if (op == MM_CIGAR_MATCH || op == MM_CIGAR_EQ_MATCH || op == MM_CIGAR_X_MISMATCH) {
197197
for (j = 0; j < len; ++j) {
198198
if (qseq[q_off + j] != tseq[t_off + j]) {
199199
mm_sprintf_lite(s, "%d%c", l_MD, "ACGTN"[tseq[t_off + j]]);
200200
l_MD = 0;
201201
} else ++l_MD;
202202
}
203203
q_off += len, t_off += len;
204-
} else if (op == 1) { // insertion to ref
204+
} else if (op == MM_CIGAR_INS) {
205205
q_off += len;
206-
} else if (op == 2) { // deletion from ref
206+
} else if (op == MM_CIGAR_DEL) {
207207
for (j = 0, tmp[len] = 0; j < len; ++j)
208208
tmp[j] = "ACGTN"[tseq[t_off + j]];
209209
mm_sprintf_lite(s, "%d^%s", l_MD, tmp);
210210
l_MD = 0;
211211
t_off += len;
212-
} else if (op == 3) { // reference skip
212+
} else if (op == MM_CIGAR_N_SKIP) {
213213
t_off += len;
214214
}
215215
}
@@ -271,7 +271,7 @@ double mm_event_identity(const mm_reg1_t *r)
271271
if (r->p == 0) return -1.0f;
272272
for (i = 0; i < r->p->n_cigar; ++i) {
273273
int32_t op = r->p->cigar[i] & 0xf, len = r->p->cigar[i] >> 4;
274-
if (op == 1 || op == 2)
274+
if (op == MM_CIGAR_INS || op == MM_CIGAR_DEL)
275275
++n_gapo, n_gap += len;
276276
}
277277
return (double)r->mlen / (r->blen + r->p->n_ambi - n_gap + n_gapo);

ksw2.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@
1616
#define KSW_EZ_SPLICE_REV 0x200
1717
#define KSW_EZ_SPLICE_FLANK 0x400
1818

19+
// The subset of CIGAR operators used by ksw code.
20+
// Use MM_CIGAR_* from minimap.h if you need the full list.
21+
#define KSW_CIGAR_MATCH 0
22+
#define KSW_CIGAR_INS 1
23+
#define KSW_CIGAR_DEL 2
24+
#define KSW_CIGAR_N_SKIP 3
25+
1926
#ifdef __cplusplus
2027
extern "C" {
2128
#endif
@@ -137,13 +144,13 @@ static inline void ksw_backtrack(void *km, int is_rot, int is_rev, int min_intro
137144
else if (!(tmp >> (state + 2) & 1)) state = 0; // if requesting other states, _state_ stays the same if it is a continuation; otherwise, set to H
138145
if (state == 0) state = tmp & 7; // TODO: probably this line can be merged into the "else if" line right above; not 100% sure
139146
if (force_state >= 0) state = force_state;
140-
if (state == 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 0, 1), --i, --j; // match
141-
else if (state == 1 || (state == 3 && min_intron_len <= 0)) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 2, 1), --i; // deletion
142-
else if (state == 3 && min_intron_len > 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 3, 1), --i; // intron
143-
else cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 1, 1), --j; // insertion
147+
if (state == 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_MATCH, 1), --i, --j;
148+
else if (state == 1 || (state == 3 && min_intron_len <= 0)) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_DEL, 1), --i;
149+
else if (state == 3 && min_intron_len > 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_N_SKIP, 1), --i;
150+
else cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_INS, 1), --j;
144151
}
145-
if (i >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, min_intron_len > 0 && i >= min_intron_len? 3 : 2, i + 1); // first deletion
146-
if (j >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 1, j + 1); // first insertion
152+
if (i >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, min_intron_len > 0 && i >= min_intron_len? KSW_CIGAR_N_SKIP : KSW_CIGAR_DEL, i + 1); // first deletion
153+
if (j >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_INS, j + 1); // first insertion
147154
if (!is_rev)
148155
for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR
149156
tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp;

minimap.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,16 @@
4646

4747
#define MM_MAX_SEG 255
4848

49+
#define MM_CIGAR_MATCH 0
50+
#define MM_CIGAR_INS 1
51+
#define MM_CIGAR_DEL 2
52+
#define MM_CIGAR_N_SKIP 3
53+
#define MM_CIGAR_SOFTCLIP 4
54+
#define MM_CIGAR_HARDCLIP 5
55+
#define MM_CIGAR_PADDING 6
56+
#define MM_CIGAR_EQ_MATCH 7
57+
#define MM_CIGAR_X_MISMATCH 8
58+
4959
#define MM_CIGAR_STR "MIDNSHP=XB"
5060

5161
#ifdef __cplusplus

0 commit comments

Comments
 (0)