2121once."""
2222import functools
2323import gzip
24+ import re
2425import threading
2526from pathlib import Path
2627from typing import Iterable , Optional , Set
@@ -55,14 +56,51 @@ def check_content(strings: Iterable[str],
5556 break
5657
5758 for string in strings_to_check :
58- if string not in found_strings and string in line :
59+ if string in line :
5960 found_strings .add (string )
6061 # Remove found strings for faster searching. This should be done
6162 # outside of the loop above.
6263 strings_to_check -= found_strings
6364 return found_strings
6465
6566
67+ def check_regex_content (patterns : Iterable [str ],
68+ text_lines : Iterable [str ]) -> Set [str ]:
69+ """
70+ Checks whether any of the patterns is present in the text lines
71+ It only reads the lines once and it stops reading when
72+ everything is found. This makes searching for patterns in large bodies of
73+ text more efficient.
74+ :param patterns: A list of regexes which is matched
75+ :param text_lines: The lines of text that need to be searched.
76+ :return: A tuple with a set of found regexes, and a set of not found
77+ regexes
78+ """
79+
80+ # Create two sets. By default all strings are not found.
81+ regex_to_match = {re .compile (pattern ) for pattern in patterns }
82+ found_patterns : Set [str ] = set ()
83+
84+ for line in text_lines :
85+ # Break the loop if all regexes have been matched
86+ if not regex_to_match :
87+ break
88+
89+ # Regexes we don't have to check anymore
90+ to_remove = list ()
91+ for regex in regex_to_match :
92+ if re .search (regex , line ):
93+ found_patterns .add (regex .pattern )
94+ to_remove .append (regex )
95+
96+ # Remove found patterns for faster searching. This should be done
97+ # outside of the loop above.
98+ for regex in to_remove :
99+ regex_to_match .remove (regex )
100+
101+ return found_patterns
102+
103+
66104class ContentTestCollector (pytest .Collector ):
67105 def __init__ (self , name : str , parent : pytest .Collector ,
68106 filepath : Path ,
@@ -84,6 +122,7 @@ def __init__(self, name: str, parent: pytest.Collector,
84122 self .content_test = content_test
85123 self .workflow = workflow
86124 self .found_strings = None
125+ self .found_patterns = None
87126 self .thread = None
88127 # We check the contents of files. Sometimes files are not there. Then
89128 # content can not be checked. We save FileNotFoundErrors in this
@@ -99,6 +138,8 @@ def find_strings(self):
99138 self .workflow .wait ()
100139 strings_to_check = (self .content_test .contains +
101140 self .content_test .must_not_contain )
141+ patterns_to_check = (self .content_test .contains_regex +
142+ self .content_test .must_not_contain_regex )
102143 file_open = (functools .partial (gzip .open , str (self .filepath ))
103144 if self .filepath .suffix == ".gz" else
104145 self .filepath .open )
@@ -108,6 +149,11 @@ def find_strings(self):
108149 self .found_strings = check_content (
109150 strings = strings_to_check ,
110151 text_lines = file_handler )
152+ # Read the file again for the regex
153+ with file_open (mode = 'rt' ) as file_handler : # type: ignore # mypy goes crazy here otherwise # noqa: E501
154+ self .found_patterns = check_regex_content (
155+ patterns = patterns_to_check ,
156+ text_lines = file_handler )
111157 except FileNotFoundError :
112158 self .file_not_found = True
113159
@@ -124,6 +170,7 @@ def collect(self):
124170 parent = self ,
125171 string = string ,
126172 should_contain = True ,
173+ regex = False ,
127174 content_name = self .content_name
128175 )
129176 for string in self .content_test .contains ]
@@ -133,18 +180,39 @@ def collect(self):
133180 parent = self ,
134181 string = string ,
135182 should_contain = False ,
183+ regex = False ,
136184 content_name = self .content_name
137185 )
138186 for string in self .content_test .must_not_contain ]
139187
188+ test_items += [
189+ ContentTestItem .from_parent (
190+ parent = self ,
191+ string = pattern ,
192+ should_contain = True ,
193+ regex = True ,
194+ content_name = self .content_name
195+ )
196+ for pattern in self .content_test .contains_regex ]
197+
198+ test_items += [
199+ ContentTestItem .from_parent (
200+ parent = self ,
201+ string = pattern ,
202+ should_contain = False ,
203+ regex = True ,
204+ content_name = self .content_name
205+ )
206+ for pattern in self .content_test .must_not_contain_regex ]
207+
140208 return test_items
141209
142210
143211class ContentTestItem (pytest .Item ):
144212 """Item that reports if a string has been found in content."""
145213
146214 def __init__ (self , parent : ContentTestCollector , string : str ,
147- should_contain : bool , content_name : str ):
215+ should_contain : bool , regex : bool , content_name : str ):
148216 """
149217 Create a ContentTestItem
150218 :param parent: A ContentTestCollector. We use a ContentTestCollector
@@ -153,6 +221,7 @@ def __init__(self, parent: ContentTestCollector, string: str,
153221 finished.
154222 :param string: The string that was searched for.
155223 :param should_contain: Whether the string should have been there
224+ :param regex: Wether we are looking for a regex
156225 :param content_name: the name of the content which allows for easier
157226 debugging if the test fails
158227 """
@@ -163,6 +232,7 @@ def __init__(self, parent: ContentTestCollector, string: str,
163232 self .should_contain = should_contain
164233 self .string = string
165234 self .content_name = content_name
235+ self .regex = regex
166236
167237 def runtest (self ):
168238 """Only after a workflow is finished the contents of files and logs are
@@ -175,8 +245,12 @@ def runtest(self):
175245 # Wait for thread to complete.
176246 self .parent .thread .join ()
177247 assert not self .parent .file_not_found
178- assert ((self .string in self .parent .found_strings ) ==
179- self .should_contain )
248+ if self .regex :
249+ assert ((self .string in self .parent .found_patterns ) ==
250+ self .should_contain )
251+ else :
252+ assert ((self .string in self .parent .found_strings ) ==
253+ self .should_contain )
180254
181255 def repr_failure (self , excinfo , style = None ):
182256 if self .parent .file_not_found :
0 commit comments