Skip to content

Commit 73d90e3

Browse files
authored
Merge pull request #168 from LUMC/fixutf8crash
Fix crash when stderr bytes is not properly not aligned with the encoding
2 parents b823378 + 8005c66 commit 73d90e3

File tree

5 files changed

+84
-17
lines changed

5 files changed

+84
-17
lines changed

HISTORY.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ Changelog
77
.. This document is user facing. Please word the changes in such a way
88
.. that users understand how the changes affect the new version.
99
10+
version 2.1.0-dev
11+
---------------------------
12+
+ Fixed a bug where pytest-workflow would crash on logs that used non-ASCII
13+
characters where the chunk of size ``--stderr-bytes`` did not properly align
14+
with the used encoding.
15+
1016
version 2.0.0
1117
---------------------------
1218
This major release greatly cleans up the output of pytest-workflow in case of

src/pytest_workflow/plugin.py

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
from .content_tests import ContentTestCollector
3131
from .file_tests import FileTestCollector
3232
from .schema import WorkflowTest, workflow_tests_from_schema
33-
from .util import duplicate_tree, is_in_dir, replace_whitespace
33+
from .util import (decode_unaligned, duplicate_tree, is_in_dir,
34+
replace_whitespace)
3435
from .workflow import Workflow, WorkflowQueue
3536

3637

@@ -450,7 +451,10 @@ def collect(self):
450451
tests += [ExitCodeTest.from_parent(
451452
parent=self,
452453
workflow=workflow,
453-
stderr_bytes=self.config.getoption("stderr_bytes"))]
454+
stderr_bytes=self.config.getoption("stderr_bytes"),
455+
stdout_encoding=self.workflow_test.stdout.encoding,
456+
stderr_encoding=self.workflow_test.stderr.encoding,
457+
)]
454458

455459
tests += [
456460
FileTestCollector.from_parent(
@@ -476,11 +480,16 @@ def collect(self):
476480

477481
class ExitCodeTest(pytest.Item):
478482
def __init__(self, parent: pytest.Collector,
479-
workflow: Workflow, stderr_bytes: int):
483+
workflow: Workflow,
484+
stderr_bytes: int,
485+
stdout_encoding: Optional[str] = None,
486+
stderr_encoding: Optional[str] = None):
480487
name = f"exit code should be {workflow.desired_exit_code}"
481488
super().__init__(name, parent=parent)
482489
self.stderr_bytes = stderr_bytes
483490
self.workflow = workflow
491+
self.stdout_encoding = stdout_encoding
492+
self.stderr_encoding = stderr_encoding
484493

485494
def runtest(self):
486495
# workflow.exit_code waits for workflow to finish.
@@ -489,16 +498,21 @@ def runtest(self):
489498
def repr_failure(self, excinfo, style=None):
490499
standerr = self.workflow.stderr_file
491500
standout = self.workflow.stdout_file
492-
with open(standout, "rb") as standout_file, \
493-
open(standerr, "rb") as standerr_file:
494-
if os.path.getsize(standerr) >= self.stderr_bytes:
495-
standerr_file.seek(-self.stderr_bytes, os.SEEK_END)
501+
502+
with open(standout, "rb") as standout_file:
496503
if os.path.getsize(standout) >= self.stderr_bytes:
497504
standout_file.seek(-self.stderr_bytes, os.SEEK_END)
498-
message = (f"'{self.workflow.name}' exited with exit code " +
499-
f"'{self.workflow.exit_code}' instead of "
500-
f"'{self.workflow.desired_exit_code}'.\nstderr: "
501-
f"{standerr_file.read().strip().decode('utf-8')}"
502-
f"\nstdout: "
503-
f"{standout_file.read().strip().decode('utf-8')}")
504-
return message
505+
stdout_text = decode_unaligned(standout_file.read().strip(),
506+
encoding=self.stdout_encoding)
507+
with open(standerr, "rb") as standerr_file:
508+
if os.path.getsize(standerr) >= self.stderr_bytes:
509+
standerr_file.seek(-self.stderr_bytes, os.SEEK_END)
510+
stderr_text = decode_unaligned(standerr_file.read().strip(),
511+
encoding=self.stderr_encoding)
512+
513+
return (
514+
f"'{self.workflow.name}' exited with exit code " +
515+
f"'{self.workflow.exit_code}' instead of "
516+
f"'{self.workflow.desired_exit_code}'.\n"
517+
f"stderr: {stderr_text}\n"
518+
f"stdout: {stdout_text}")

src/pytest_workflow/util.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import sys
88
import warnings
99
from pathlib import Path
10-
from typing import Callable, Iterator, List, Set, Tuple, Union
10+
from typing import Callable, Iterator, List, Optional, Set, Tuple, Union
1111

1212
Filepath = Union[str, os.PathLike]
1313

@@ -209,3 +209,17 @@ def file_md5sum(filepath: Path, block_size=64 * 1024) -> str:
209209
for block in iter(lambda: file_handler.read(block_size), b''):
210210
hasher.update(block)
211211
return hasher.hexdigest()
212+
213+
214+
def decode_unaligned(data: bytes, encoding: Optional[str] = None):
215+
if encoding is None:
216+
encoding = sys.getdefaultencoding()
217+
for offset in range(4):
218+
try:
219+
decoded = data[offset:].decode(encoding=encoding, errors="strict")
220+
return decoded
221+
except UnicodeDecodeError:
222+
continue
223+
# When no return happens in the loop, decode again. This will throw an
224+
# error that is not caught and shown to the user.
225+
return data.decode(encoding=encoding)

tests/test_miscellaneous_crashes.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
# You should have received a copy of the GNU Affero General Public License
1515
# along with pytest-workflow. If not, see <https://www.gnu.org/licenses/
1616

17+
import textwrap
18+
19+
from pytest import ExitCode
20+
1721
from .test_success_messages import SIMPLE_ECHO
1822

1923

@@ -27,3 +31,13 @@ def test_same_name_different_files(pytester):
2731
conflicting_message = (
2832
"Conflicting tests: test_b.yml::simple echo, test_a.yml::simple echo.")
2933
assert conflicting_message in result.stdout.str()
34+
35+
36+
def test_non_ascii_logs_stderr_bytes(pytester):
37+
test = textwrap.dedent("""
38+
- name: print non-ascii
39+
command: bash -c 'printf èèèèèèèèè && exit 1'
40+
""")
41+
pytester.makefile(".yml", test_non_ascii=test)
42+
result = pytester.runpytest("--stderr-bytes", "7")
43+
assert result.ret == ExitCode.TESTS_FAILED

tests/test_utils.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,18 @@
1414
# You should have received a copy of the GNU Affero General Public License
1515
# along with pytest-workflow. If not, see <https://www.gnu.org/licenses/
1616
import hashlib
17+
import itertools
1718
import os
1819
import shutil
1920
import subprocess
21+
import sys
2022
import tempfile
2123
from pathlib import Path
2224

2325
import pytest
2426

25-
from pytest_workflow.util import duplicate_tree, file_md5sum, \
26-
git_check_submodules_cloned, git_root, \
27+
from pytest_workflow.util import decode_unaligned, duplicate_tree, \
28+
file_md5sum, git_check_submodules_cloned, git_root, \
2729
is_in_dir, link_tree, replace_whitespace
2830

2931
WHITESPACE_TESTS = [
@@ -227,3 +229,20 @@ def test_duplicate_git_tree_submodule_symlinks(git_repo_with_submodules):
227229
assert link.exists()
228230
assert link.is_symlink()
229231
assert link.resolve() == dest / "bird" / "sub"
232+
233+
234+
@pytest.mark.parametrize(["offset", "encoding"],
235+
list(itertools.product(
236+
range(4), (None, "utf-8", "utf-16", "utf-32"))
237+
))
238+
def test_decode_unaligned(offset, encoding):
239+
string = "èèèèèèèèèèè"
240+
data = string.encode(encoding or sys.getdefaultencoding())
241+
decoded = decode_unaligned(data[offset:], encoding)
242+
assert string.endswith(decoded)
243+
244+
245+
def test_decode_unaligned_wrong_encoding_throws_error():
246+
data = "hello".encode("utf-8")
247+
with pytest.raises(UnicodeDecodeError):
248+
decode_unaligned(data, "utf-32-le")

0 commit comments

Comments
 (0)