Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/minisweagent/config/extra/swebench.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ environment:
PIP_PROGRESS_BAR: 'off'
TQDM_DISABLE: '1'
environment_class: docker
use_login_shell: false

model:
model_name: "anthropic/claude-sonnet-4-5-20250929"
Expand Down
12 changes: 11 additions & 1 deletion src/minisweagent/environments/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class DockerEnvironmentConfig:
"""Max duration to keep container running. Uses the same format as the sleep command."""
pull_timeout: int = 120
"""Timeout in seconds for pulling images."""
use_login_shell: bool = True
"""Whether to use a login shell when executing commands. Current SWE-bench testset should disable this option."""


class DockerEnvironment:
Expand Down Expand Up @@ -84,7 +86,15 @@ def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) ->
cmd.extend(["-e", f"{key}={value}"])
for key, value in self.config.env.items():
cmd.extend(["-e", f"{key}={value}"])
cmd.extend([self.container_id, "bash", "-lc", command])
cmd.extend(
[
self.container_id,
"bash",
]
)
if self.config.use_login_shell:
cmd.append("-l")
cmd.extend(["-c", command])

result = subprocess.run(
cmd,
Expand Down
3 changes: 3 additions & 0 deletions src/minisweagent/run/extra/swebench.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,9 @@ def main(
config.setdefault("model", {})["model_name"] = model
if model_class is not None:
config.setdefault("model", {})["model_class"] = model_class
# SWE-bench images are built for non-login shells (checked on 2025-10-16)
if "use_login_shell" not in config.get("environment", {}):
config.setdefault("environment", {})["use_login_shell"] = False

progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{time.time()}.yaml")

Expand Down