Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions hts.c
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,10 @@ int hts_opt_add(hts_opt **opts, const char *c_arg) {
strcmp(o->arg, "FASTQ_UMI_REGEX") == 0)
o->opt = FASTQ_OPT_UMI_REGEX, o->val.s = val;

else if (strcmp(o->arg, "seq_max_depth") == 0 ||
strcmp(o->arg, "SEQ_MAX_DEPTH") == 0)
o->opt = SEQ_OPT_MAXDEPTH, o->val.i = atoi(val);

else {
hts_log_error("Unknown option '%s'", o->arg);
free(o->arg);
Expand Down Expand Up @@ -1260,6 +1264,7 @@ int hts_opt_apply(htsFile *fp, hts_opt *opts) {
case FASTQ_OPT_BARCODE:
case FASTQ_OPT_UMI:
case FASTQ_OPT_UMI_REGEX:
case SEQ_OPT_MAXDEPTH:
if (hts_set_opt(fp, opts->opt, opts->val.s) != 0)
return -1;
break;
Expand Down Expand Up @@ -1704,6 +1709,7 @@ int hts_close(htsFile *fp)
free(fp->fn);
free(fp->fn_aux);
free(fp->line.s);
sam_cond_destroy((sam_cond_t*)fp->cond_data);
free(fp);
errno = save;
return ret;
Expand Down Expand Up @@ -1903,6 +1909,27 @@ int hts_set_opt(htsFile *fp, enum hts_fmt_option opt, ...) {
break;
}

case SEQ_OPT_MAXDEPTH: {
/* once max depth is reached for all pos covered by the read, it will
be discarded */
va_start(args, opt);
int depth = va_arg(args, int);
va_end(args);
sam_cond_t *c = NULL;
if (depth > 0) {
if (!fp->cond_data) {
fp->cond_data = sam_cond_init();
}
if (!fp->cond_data) {
hts_log_warning("Cannot set max depth option");
return -1;
}
c = (sam_cond_t*)fp->cond_data;
c->maxdepth = depth;
}
}
break;

default:
break;
}
Expand Down
4 changes: 4 additions & 0 deletions htslib/hts.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ typedef struct htsFile {
const char *fnidx;
struct sam_hdr_t *bam_header;
struct hts_filter_t *filter;
void *cond_data; //for extra checks and filters
} htsFile;

// A combined thread pool and queue allocation size.
Expand Down Expand Up @@ -332,6 +333,9 @@ enum hts_fmt_option {
HTS_OPT_FILTER,
HTS_OPT_PROFILE,

// Seq data
SEQ_OPT_MAXDEPTH = 500, // sets the max depth above which read is discarded

// Fastq

// Boolean.
Expand Down
101 changes: 98 additions & 3 deletions sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -4254,6 +4254,95 @@ static inline int sam_read1_sam(htsFile *fp, sam_hdr_t *h, bam1_t *b) {
return ret;
}

/// extra checks on reads and decide whether to pass it to user or not
/* fp - file pointer which holds the buffer
* h - header
* b - bam data
* returns 1 to send the read to user and 0 to discard and check next read
* returns -1 on errors
*/
int check_conditions(samFile *fp, sam_hdr_t *h, bam1_t *b)
{
/* for maxdepth, depth per position is kept on a buffer in sam_cond_t data
in the file. It is safe as long as same file is not shared across threads.
Expects sorted data and if not so, fails. The depth data is discarded as
new position is read, to hold only relevant data.
*/
int ret = 0, i, j, updlen = 0;
sam_cond_t *c = NULL;
uint32_t *depbuf = NULL, *cigar = NULL;
size_t seqoffset = 0, reqbuflen = 0, bkplen = 0;
kstring_t *ksbuf = NULL;

//assumes fp and condition data to be valid
c = (sam_cond_t*)fp->cond_data;
if (!(ksbuf = &c->ksbuf)) {
return -1; //invalid, earlier alloc failed
}
bkplen = c->ksbuf.m;
if (b->core.flag & BAM_FUNMAP || !b->core.n_cigar)
return 1; //unmapped or no cigar, pass

if (c->tid != b->core.tid) { //tid changed, reset and reuse
c->bufstart = c->bufend = seqoffset = 0;
c->tid = b->core.tid;
if (ksbuf->s)
memset(ksbuf->s, 0, ksbuf->m);
} else { //same tid, ensure sorted data
if (c->bufstart > b->core.pos) {
hts_log_warning("Unsorted data, max depth checks won't work");
sam_cond_destroy(c);
fp->cond_data = NULL;
return -1; //return failure
}
}

seqoffset = c->bufend > b->core.pos ? b->core.pos - c->bufstart : 0;
if (ksbuf->m) {
//sorted and got a new pos, we can discard all upto this point
if ( c->bufstart < b->core.pos) {
if (seqoffset) { //part discard
size_t sz = seqoffset * sizeof(uint32_t);
size_t len = ksbuf->m - sz;
memmove(ksbuf->s, ksbuf->s + sz, len);
memset(ksbuf->s + len, 0, sz);
} else //full discard
memset(ksbuf->s, 0, ksbuf->m);

c->bufstart = b->core.pos;
seqoffset = 0;
}
}
if (c->bufstart < b->core.pos)
c->bufstart = b->core.pos;
//depends on l_qseq and length given by query consuming cigars matching
reqbuflen = seqoffset + b->core.l_qseq;
cigar = bam_get_cigar(b);
if (reqbuflen * sizeof(uint32_t) > ksbuf->m) {
if (ks_resize(ksbuf, reqbuflen * sizeof(uint32_t))) //failed to realloc
return -1;
memset(ksbuf->s+bkplen, 0, ksbuf->m - bkplen); //reset
}

depbuf = (uint32_t*)ksbuf->s;

for (i = 0; i < b->core.n_cigar; ++i) {
if (!(bam_cigar_type(bam_cigar_op(cigar[i]))&1)) {
continue; //irrelevant cigar
}
//cigar consuming query, update depth for each pos
for(j = 0; j < bam_cigar_oplen(cigar[i]); ++j) {
ret |= ++depbuf[seqoffset + updlen + j] <= c->maxdepth;
}
updlen += bam_cigar_oplen(cigar[i]);
}
if (c->bufend < b->core.pos + b->core.l_qseq)
c->bufend = b->core.pos + b->core.l_qseq; //update bufend pos
ksbuf->l = c->bufend - c->bufstart;

return ret;
}

// Returns 0 on success,
// -1 on EOF,
// <-1 on error
Expand Down Expand Up @@ -4296,9 +4385,15 @@ int sam_read1(htsFile *fp, sam_hdr_t *h, bam1_t *b)
return -3;
}

pass_filter = (ret >= 0 && fp->filter)
? sam_passes_filter(h, b, fp->filter)
: 1;
if (ret >= 0) {
pass_filter = fp->filter
? sam_passes_filter(h, b, fp->filter)
: 1;

if (pass_filter && fp->cond_data) //extra checks if set so
pass_filter = check_conditions(fp, h, b);
} else
pass_filter = 1;
} while (pass_filter == 0);

return pass_filter < 0 ? -2 : ret;
Expand Down
29 changes: 29 additions & 0 deletions sam_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,35 @@ static inline void nibble2base(uint8_t *nib, char *seq, int len) {
#endif
}

/// holds data for extra condition checks
typedef struct sam_cond_t {
int tid, maxdepth;
//buffer to hold depth data
hts_pos_t bufstart, bufend;
kstring_t ksbuf;
} sam_cond_t;

/// allocates condition data
/// returns the allocated data on success and NULL on failure
static inline sam_cond_t* sam_cond_init(void)
{
sam_cond_t *c = calloc(1, sizeof(sam_cond_t));
if (!c) {
hts_log_warning("Couldn't create condition data");
return NULL;
}
return c;
}

/// deallocates condition data
static inline void sam_cond_destroy(sam_cond_t *c)
{
if (!c)
return;
ks_free(&c->ksbuf);
free(c);
}

#ifdef __cplusplus
}
#endif
Expand Down