Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion tests/test_bcftools_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def run_vcztools(args: str, expect_error=False) -> tuple[str, str]:
"view --no-version -i 'FILTER~\"VQSRTrancheINDEL99.00to100.00\"'",
"1kg_2020_chrM.vcf.gz"
),
("view --no-version -i 'INFO/AC>2'", "chr22.vcf.gz")
("view --no-version -i 'INFO/AC>2'", "chr22.vcf.gz"),
("view --no-version -i 'INFO/AC[0]>2'", "chr22.vcf.gz")
],
# This is necessary when trying to run individual tests, as the arguments above
# make for unworkable command lines
Expand Down
26 changes: 19 additions & 7 deletions tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,13 @@ def test_invalid_expressions(self, parser, expression):
('DP="."', filter_mod.UnsupportedMissingDataError),
("ID!=@~/file", filter_mod.UnsupportedFileReferenceError),
("INFO/TAG=@file", filter_mod.UnsupportedFileReferenceError),
("INFO/X[0] == 1", filter_mod.UnsupportedArraySubscriptError),
("INFO/AF[0] > 0.3", filter_mod.UnsupportedArraySubscriptError),
("FORMAT/AD[0:0] > 30", filter_mod.UnsupportedArraySubscriptError),
("DP4[*] == 0", filter_mod.UnsupportedArraySubscriptError),
("FORMAT/DP[1-3] > 10", filter_mod.UnsupportedArraySubscriptError),
("FORMAT/DP[1-] < 7", filter_mod.UnsupportedArraySubscriptError),
("FORMAT/DP[0,2-4] > 20", filter_mod.UnsupportedArraySubscriptError),
("FORMAT/AD[0:*]", filter_mod.UnsupportedArraySubscriptError),
("FORMAT/AD[0:]", filter_mod.UnsupportedArraySubscriptError),
("FORMAT/AD[*:1]", filter_mod.UnsupportedArraySubscriptError),
(
"(DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3",
filter_mod.UnsupportedArraySubscriptError,
),
("binom(FMT/AD)", filter_mod.UnsupportedFunctionsError),
("fisher(INFO/DP4)", filter_mod.UnsupportedFunctionsError),
("fisher(FMT/ADF,FMT/ADR)", filter_mod.UnsupportedFunctionsError),
Expand Down Expand Up @@ -261,6 +254,25 @@ def test_evaluate_type_operation(self, expression, expected):
result = fee.evaluate(numpify_values(data))
nt.assert_array_equal(result, expected)

@pytest.mark.parametrize(
("expression", "expected"),
[
("INFO/AC>=2", [[0, 0], [1, 1], [0, 1], [1, 0]]),
("INFO/AC[*]>=2", [[0, 0], [1, 1], [0, 1], [1, 0]]),
("INFO/AC[0]>=2", [0, 1, 0, 1]),
("INFO/AC[1]>=2", [0, 1, 1, 0]),
],
)
def test_evaluate_array_subscripts(self, expression, expected):
data = {
"variant_AC": [[1, -1], [5, 4], [1, 4], [2, -1]],
}
fee = filter_mod.FilterExpression(
field_names={"variant_AC"}, include=expression
)
result = fee.evaluate(numpify_values(data))
nt.assert_array_equal(result, expected)

@pytest.mark.parametrize(
("expr", "expected"),
[
Expand Down
35 changes: 30 additions & 5 deletions vcztools/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ class Number(Constant):
pass


class Integer(Constant):
def eval(self, data):
return int(self.tokens)


class String(Constant):
def __init__(self, tokens):
super().__init__(tokens)
Expand Down Expand Up @@ -146,11 +151,29 @@ def referenced_fields(self):
return frozenset([self.field_name])


class IndexAny(Constant):
def eval(self, data):
return Ellipsis


class IndexedIdentifier(EvaluationNode):
def __init__(self, tokens):
# The tokens here are the already resolved idenfitier
# and the index
raise UnsupportedArraySubscriptError()
token = tokens[0]
# The tokens here are the already resolved identifier
# and the index - but only in the case of a single element
# index (an int), or any (*)
if len(token) > 2:
raise UnsupportedArraySubscriptError()
self.identifier = token[0]
self.index = token[1]

def eval(self, data):
val = self.identifier.eval(data)
ind = self.index.eval(data)
return val[:, ind] # index samples dim

def referenced_fields(self):
return self.identifier.referenced_fields()


class RegexOperator(EvaluationNode):
Expand Down Expand Up @@ -523,9 +546,11 @@ def make_bcftools_filter_parser(all_fields=None, map_vcf_identifiers=True):
# TODO we need to define the indexing grammar more carefully, but
# this at least let's us match correct strings and raise an informative
# error
index_single_element_expr = pp.Word(pp.nums).set_parse_action(Integer)
index_any_element_expr = pp.Literal("*").set_parse_action(IndexAny)
index_expr = pp.OneOrMore(
pp.common.number
| pp.Literal("*")
index_single_element_expr
| index_any_element_expr
| pp.Literal(":")
| pp.Literal("-")
| pp.Literal(",")
Expand Down
Loading