From 058002661aa13010e0bdc1e7445b1ce79794c9a9 Mon Sep 17 00:00:00 2001 From: Kilian Lieret Date: Mon, 15 Sep 2025 22:13:08 -0400 Subject: [PATCH] [WIP] Add cost column and docent link --- data/leaderboards.json | 1156 +++++++++++++++++++++++++++++++++++++++- js/mainResults.js | 18 +- 2 files changed, 1145 insertions(+), 29 deletions(-) diff --git a/data/leaderboards.json b/data/leaderboards.json index fed2792..3b2409c 100644 --- a/data/leaderboards.json +++ b/data/leaderboards.json @@ -14,6 +14,10 @@ "date": "2025-08-02", "logs": "s3://swe-bench-experiments/bash-only/20250802_mini-v1.0.0_claude-4-opus-20250514/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250802_mini-v1.0.0_claude-4-opus-20250514/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/c93d3381-a539-4cd6-89be-97066d36d906", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -31,11 +35,15 @@ "https://upload.wikimedia.org/wikipedia/commons/6/66/OpenAI_logo_2025_%28symbol%29.svg" ], "site": "https://platform.openai.com/docs/models/gpt-5", - "folder": "20250807_mini-v1.7.0_gpt-5-2025-08-07", + "folder": "20250807_mini-v1.7.0_gpt-5", "resolved": 65.0, "date": "2025-08-07", - "logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-2025-08-07/logs", - "trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-2025-08-07/trajs", + "logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5/logs", + "trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/4844cb3c-d5cd-4c75-b64b-973029f2d37c", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -58,6 +66,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_claude-sonnet-4-20250514/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_claude-sonnet-4-20250514/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/0cb59666-bca8-476b-bf8e-3b924fafcae7", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -75,11 +87,15 @@ "https://upload.wikimedia.org/wikipedia/commons/6/66/OpenAI_logo_2025_%28symbol%29.svg" ], "site": "https://platform.openai.com/docs/models/gpt-5-mini", - "folder": "20250807_mini-v1.7.0_gpt-5-mini-2025-08-07", + "folder": "20250807_mini-v1.7.0_gpt-5-mini", "resolved": 59.8, "date": "2025-08-07", - "logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-mini-2025-08-07/logs", - "trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-mini-2025-08-07/trajs", + "logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-mini/logs", + "trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-mini/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/7f5e3cfc-8e38-4c73-812c-aca21298e7bc", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -102,6 +118,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_o3-2025-04-16/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_o3-2025-04-16/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/565e5680-b913-4031-b537-00721a7a619a", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -119,11 +139,15 @@ "https://avatars.githubusercontent.com/u/141221163?s=200&v=4" ], "site": "https://qwenlm.github.io/blog/qwen3-coder/", - "folder": "20250802_mini-v1.0.0_Qwen3-Coder-480B-A35B-Instruct", + "folder": "20250802_mini-v1.0.0_qwen3-coder-480b-a35b-instruct", "resolved": 55.4, "date": "2025-08-02", "logs": "s3://swe-bench-experiments/bash-only/20250802_mini-v1.0.0_Qwen3-Coder-480B-A35B-Instruct/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250802_mini-v1.0.0_Qwen3-Coder-480B-A35B-Instruct/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/f39d3041-d9d7-4f1b-b75e-8a13addb9e6e", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -146,6 +170,10 @@ "date": "2508-22-", "logs": "s3://swe-bench-experiments/bash-only/250822_mini-v1.9.1_glm-4.5/logs/", "trajs": "s3://swe-bench-experiments/bash-only/250822_mini-v1.9.1_glm-4.5/trajs/", + "trajs_docent": "https://docent.transluce.org/dashboard/9ae7f9a3-c9e3-48af-833c-52adcc7f5921", + "cost": 20.840391999999994, + "instance_cost": 0.26718451282051275, + "instance_calls": 37.6025641025641, "os_model": true, "os_system": true, "checked": true, @@ -168,6 +196,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_gemini-2.5-pro/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_gemini-2.5-pro/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/8b22ae97-2cfd-4b05-8438-38550cfaa163", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -190,6 +222,10 @@ "date": "2025-07-20", "logs": "s3://swe-bench-experiments/bash-only/20250720_mini-v0.0.0-claude-3-7-sonnet-20250219/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250720_mini-v0.0.0-claude-3-7-sonnet-20250219/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -212,6 +248,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/bash-only/20250521_o4-mini-20250416/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_o4-mini-2025-04-16/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/f46eb5b4-b4b2-478e-bc11-f1eb47eca637", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -229,11 +269,15 @@ "https://avatars.githubusercontent.com/u/129152888?s=200&v=4" ], "site": "https://moonshotai.github.io/Kimi-K2/", - "folder": "20250807_mini-v1.7.0_Kimi-K2-Instruct", + "folder": "20250807_mini-v1.7.0_kimi-k2-instruct", "resolved": 43.8, "date": "2025-08-07", "logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_Kimi-K2-Instruct/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_Kimi-K2-Instruct/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/b6a6d69b-e69e-4011-aa94-46f44b84202e", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -256,6 +300,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_gpt-4.1-2025-04-14/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_gpt-4.1-2025-04-14/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/cd7a23c5-a2b1-4cab-b851-6e2c42aaf0f3", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -273,11 +321,15 @@ "https://upload.wikimedia.org/wikipedia/commons/6/66/OpenAI_logo_2025_%28symbol%29.svg" ], "site": "https://platform.openai.com/docs/models/gpt-5-nano", - "folder": "20250807_mini-v1.7.0_gpt-5-nano-2025-08-07", + "folder": "20250807_mini-v1.7.0_gpt-5-nano", "resolved": 34.8, "date": "2025-08-07", - "logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-nano-2025-08-07/logs", - "trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-nano-2025-08-07/trajs", + "logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-nano/logs", + "trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-5-nano/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/3dc9b424-547c-4bdc-9b72-3b58d316cb0a", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -300,6 +352,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_gemini-2.5-flash/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_gemini-2.5-flash/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/37e5933e-71a1-4011-8340-fbca563124b3", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -322,6 +378,10 @@ "date": "2025-08-07", "logs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-oss-120b/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250807_mini-v1.7.0_gpt-oss-120b/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/4a0a2b2a-a65b-452a-88c7-bb5d99745c75", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -344,6 +404,10 @@ "date": "2025-07-20", "logs": "s3://swe-bench-experiments/bash-only/20250720_mini-v0.0.0_gpt-4.1-mini-2025-04-14/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250720_mini-v0.0.0_gpt-4.1-mini-2025-04-14/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -366,6 +430,10 @@ "date": "2025-07-20", "logs": "s3://swe-bench-experiments/bash-only/20250720_mini-v0.0.0-gpt-4o-2024-11-20/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250720_mini-v0.0.0-gpt-4o-2024-11-20/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -388,6 +456,10 @@ "date": "2025-07-20", "logs": "s3://swe-bench-experiments/bash-only/20250720_mini-v0.0.0-Llama-4-Maverick-17B-Instruct/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250720_mini-v0.0.0-Llama-4-Maverick-17B-Instruct/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -410,6 +482,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_gemini-2.0-flash/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250726_mini-v1.0.0_gemini-2.0-flash/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -432,6 +508,10 @@ "date": "2025-07-20", "logs": "s3://swe-bench-experiments/bash-only/20250720_mini-v0.0.0-Llama-4-Scout-17B-Instruct/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250720_mini-v0.0.0-Llama-4-Scout-17B-Instruct/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -449,11 +529,15 @@ "https://avatars.githubusercontent.com/u/141221163?s=200&v=4" ], "site": "https://qwenlm.github.io/blog/qwen2.5-coder/", - "folder": "20250803_mini-v1.0.0_Qwen2.5-Coder-32B-Instruct", + "folder": "20250803_mini-v1.0.0_qwen2-5-coder-32b-instruct", "resolved": 9.0, "date": "2025-08-03", "logs": "s3://swe-bench-experiments/bash-only/20250803_mini-v1.0.0_Qwen2.5-Coder-32B-Instruct/logs", "trajs": "s3://swe-bench-experiments/bash-only/20250803_mini-v1.0.0_Qwen2.5-Coder-32B-Instruct/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/f5288cc2-48b7-4df9-8e30-072d96b58606", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -481,6 +565,10 @@ "date": "2025-06-05", "logs": "s3://swe-bench-experiments/test/20250605_atlassian-rovo-dev/logs", "trajs": "s3://swe-bench-experiments/test/20250605_atlassian-rovo-dev/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -501,6 +589,10 @@ "date": "2025-05-22", "logs": "s3://swe-bench-experiments/test/20250522_amazon-q-developer-agent-20250405-dev/logs", "trajs": "s3://swe-bench-experiments/test/20250522_amazon-q-developer-agent-20250405-dev/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -521,6 +613,10 @@ "date": "2025-02-27", "logs": "s3://swe-bench-experiments/test/20250227_sweagent-claude-3-7-20250219/logs/", "trajs": "s3://swe-bench-experiments/test/20250227_sweagent-claude-3-7-20250219/trajs/", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -542,6 +638,10 @@ "date": "2025-01-31", "logs": "s3://swe-bench-experiments/test/20250131_amazon-q-developer-agent-20241202-dev/logs/", "trajs": "s3://swe-bench-experiments/test/20250131_amazon-q-developer-agent-20241202-dev/trajs/", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -562,6 +662,10 @@ "date": "2024-11-03", "logs": "s3://swe-bench-experiments/test/20241103_OpenHands-CodeAct-2.1-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/test/20241103_OpenHands-CodeAct-2.1-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -582,6 +686,10 @@ "date": "2024-11-21", "logs": "s3://swe-bench-experiments/test/20241121_autocoderover-v2.0-claude-3-5-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/test/20241121_autocoderover-v2.0-claude-3-5-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -602,6 +710,10 @@ "date": "2024-08-20", "logs": "s3://swe-bench-experiments/test/20240820_honeycomb/logs", "trajs": "s3://swe-bench-experiments/test/20240820_honeycomb/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -621,6 +733,10 @@ "date": "2024-07-21", "logs": "s3://swe-bench-experiments/test/20240721_amazon-q-developer-agent-20240719-dev/logs", "trajs": "s3://swe-bench-experiments/test/20240721_amazon-q-developer-agent-20240719-dev/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -641,6 +757,10 @@ "date": "2024-06-17", "logs": "s3://swe-bench-experiments/test/20240617_factory_code_droid/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -660,6 +780,10 @@ "date": "2024-06-28", "logs": "s3://swe-bench-experiments/test/20240628_autocoderover-v20240620/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -680,6 +804,10 @@ "date": "2024-06-20", "logs": "s3://swe-bench-experiments/test/20240620_sweagent_claude3.5sonnet/logs", "trajs": "s3://swe-bench-experiments/test/20240620_sweagent_claude3.5sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -701,6 +829,10 @@ "date": "2024-06-15", "logs": "s3://swe-bench-experiments/test/20240615_appmap-navie_gpt4o/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": false, "checked": true, @@ -721,6 +853,10 @@ "date": "2024-05-09", "logs": "s3://swe-bench-experiments/test/20240509_amazon-q-developer-agent-20240430-dev/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -741,6 +877,10 @@ "date": "2024-04-02", "logs": "s3://swe-bench-experiments/test/20240402_sweagent_gpt4/logs", "trajs": "s3://swe-bench-experiments/test/20240402_sweagent_gpt4/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -762,6 +902,10 @@ "date": "2024-07-28", "logs": "s3://swe-bench-experiments/test/20240728_sweagent_gpt4o/logs", "trajs": "s3://swe-bench-experiments/test/20240728_sweagent_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -783,6 +927,10 @@ "date": "2024-04-02", "logs": "s3://swe-bench-experiments/test/20240402_sweagent_claude3opus/logs", "trajs": "s3://swe-bench-experiments/test/20240402_sweagent_claude3opus/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -804,6 +952,10 @@ "date": "2024-04-02", "logs": "s3://swe-bench-experiments/test/20240402_rag_claude3opus/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -824,6 +976,10 @@ "date": "2023-10-10", "logs": "s3://swe-bench-experiments/test/20231010_rag_claude2/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -844,6 +1000,10 @@ "date": "2024-04-02", "logs": "s3://swe-bench-experiments/test/20240402_rag_gpt4/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -864,6 +1024,10 @@ "date": "2023-10-10", "logs": "s3://swe-bench-experiments/test/20231010_rag_swellama13b/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -884,6 +1048,10 @@ "date": "2023-10-10", "logs": "s3://swe-bench-experiments/test/20231010_rag_swellama7b/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -905,6 +1073,10 @@ "date": "2023-10-10", "logs": "s3://swe-bench-experiments/test/20231010_rag_gpt35/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -930,6 +1102,10 @@ "date": "2025-08-04", "logs": "s3://swe-bench-experiments/verified/20250804_epam-ai-run-claude-4-sonnet/logs", "trajs": "s3://swe-bench-experiments/verified/20250804_epam-ai-run-claude-4-sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -951,6 +1127,10 @@ "date": "2025-08-19", "logs": "s3://swe-bench-experiments/verified/20250819_ACoder/logs", "trajs": "s3://swe-bench-experiments/verified/20250819_ACoder/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -975,6 +1155,10 @@ "date": "2025-06-12", "logs": "s3://swe-bench-experiments/verified/20250612_trae/logs", "trajs": "s3://swe-bench-experiments/verified/20250612_trae/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -999,6 +1183,10 @@ "date": "2025-07-31", "logs": "s3://swe-bench-experiments/verified/20250731_harness_ai/logs", "trajs": "s3://swe-bench-experiments/verified/20250731_harness_ai/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1020,6 +1208,10 @@ "date": "2025-07-20", "logs": "s3://swe-bench-experiments/verified/20250720_Lingxi-v1.5_claude-4-sonnet-20250514/logs", "trajs": "s3://swe-bench-experiments/verified/20250720_Lingxi-v1.5_claude-4-sonnet-20250514/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -1041,6 +1233,10 @@ "date": "2025-06-03", "logs": "s3://swe-bench-experiments/verified/20250603_Refact_Agent_claude-4-sonnet/logs", "trajs": "s3://swe-bench-experiments/verified/20250603_Refact_Agent_claude-4-sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -1063,6 +1259,10 @@ "date": "2025-05-22", "logs": "s3://swe-bench-experiments/verified/20250522_tools_claude-4-opus/logs", "trajs": "s3://swe-bench-experiments/verified/20250522_tools_claude-4-opus/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1083,6 +1283,10 @@ "date": "2025-05-22", "logs": "s3://swe-bench-experiments/verified/20250522_tools_claude-4-sonnet/logs", "trajs": "s3://swe-bench-experiments/verified/20250522_tools_claude-4-sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1103,6 +1307,10 @@ "date": "2025-07-15", "logs": "s3://swe-bench-experiments/verified/20250715_qodo_command/logs", "trajs": "s3://swe-bench-experiments/verified/20250715_qodo_command/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1125,6 +1333,10 @@ "date": "2025-07-10", "logs": "s3://swe-bench-experiments/verified/20250710_bloop/logs", "trajs": "s3://swe-bench-experiments/verified/20250710_bloop/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1146,6 +1358,10 @@ "date": "2025-06-23", "logs": "s3://swe-bench-experiments/verified/20250623_warp/logs", "trajs": "s3://swe-bench-experiments/verified/20250623_warp/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1168,6 +1384,10 @@ "date": "2025-06-11", "logs": "s3://swe-bench-experiments/verified/20250611_moatless_claude-4-sonnet-20250514/logs", "trajs": "s3://swe-bench-experiments/verified/20250611_moatless_claude-4-sonnet-20250514/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -1189,6 +1409,10 @@ "date": "2025-05-19", "logs": "s3://swe-bench-experiments/verified/20250519_trae/logs", "trajs": "s3://swe-bench-experiments/verified/20250519_trae/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1208,6 +1432,10 @@ "date": "2025-05-15", "logs": "s3://swe-bench-experiments/verified/20250515_Refact_Agent/logs", "trajs": "s3://swe-bench-experiments/verified/20250515_Refact_Agent/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -1231,6 +1459,10 @@ "date": "2025-05-24", "logs": "s3://swe-bench-experiments/verified/20250524_openhands_claude_4_sonnet/logs", "trajs": "s3://swe-bench-experiments/verified/20250524_openhands_claude_4_sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -1252,6 +1484,10 @@ "date": "2025-06-10", "logs": "s3://swe-bench-experiments/verified/20250610_augment_agent_v1/logs", "trajs": "s3://swe-bench-experiments/verified/20250610_augment_agent_v1/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -1271,6 +1507,10 @@ "date": "2025-05-19", "logs": "s3://swe-bench-experiments/verified/20250519_devlo/logs", "trajs": "s3://swe-bench-experiments/verified/20250519_devlo/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1294,6 +1534,10 @@ "date": "2025-04-30", "logs": "s3://swe-bench-experiments/verified/20250430_zencoder_ai/logs", "trajs": "s3://swe-bench-experiments/verified/20250430_zencoder_ai/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1316,6 +1560,10 @@ "date": "2025-08-05", "logs": "s3://swe-bench-experiments/verified/20250805-openhands-Qwen3-Coder-480B-A35B-Instruct/logs", "trajs": "s3://swe-bench-experiments/verified/20250805-openhands-Qwen3-Coder-480B-A35B-Instruct/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": false, @@ -1338,6 +1586,10 @@ "date": "2025-05-16", "logs": "s3://swe-bench-experiments/verified/20250516_cortexa_o3/logs", "trajs": "s3://swe-bench-experiments/verified/20250516_cortexa_o3/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -1370,6 +1622,10 @@ "date": "2025-08-02", "logs": "s3://swe-bench-experiments/verified/20250802_mini-v1.0.0_claude-4-opus-20250514/logs", "trajs": "s3://swe-bench-experiments/verified/20250802_mini-v1.0.0_claude-4-opus-20250514/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/c93d3381-a539-4cd6-89be-97066d36d906", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -1393,6 +1649,10 @@ "date": "2025-05-22", "logs": "s3://swe-bench-experiments/verified/20250522_sweagent_claude-4-sonnet-20250514/logs", "trajs": "s3://swe-bench-experiments/verified/20250522_sweagent_claude-4-sonnet-20250514/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -1414,6 +1674,10 @@ "date": "2025-05-14", "logs": "s3://swe-bench-experiments/verified/20250514_aime_coder/logs", "trajs": "s3://swe-bench-experiments/verified/20250514_aime_coder/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1435,6 +1699,10 @@ "date": "2025-04-15", "logs": "s3://swe-bench-experiments/verified/20250415_openhands/logs", "trajs": "s3://swe-bench-experiments/verified/20250415_openhands/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -1455,6 +1723,10 @@ "date": "2025-03-16", "logs": "s3://swe-bench-experiments/verified/20250316_augment_agent_v0/logs", "trajs": "s3://swe-bench-experiments/verified/20250316_augment_agent_v0/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -1474,6 +1746,10 @@ "date": "2025-04-05", "logs": "s3://swe-bench-experiments/verified/20250405_amazon-q-developer-agent-20250405-dev/logs", "trajs": "s3://swe-bench-experiments/verified/20250405_amazon-q-developer-agent-20250405-dev/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1494,6 +1770,10 @@ "date": "2025-07-16", "logs": "s3://swe-bench-experiments/verified/20250716_openhands_kimi_k2/logs", "trajs": "s3://swe-bench-experiments/verified/20250716_openhands_kimi_k2/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -1510,11 +1790,15 @@ "https://mini-swe-agent.com/latest/assets/mini_square.svg" ], "site": "https://mini-swe-agent.com/latest/", - "folder": "20250807_mini-v1.7.0_gpt-5-2025-08-07", + "folder": "20250807_mini-v1.7.0_gpt-5", "resolved": 65.0, "date": "2025-08-07", - "logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-2025-08-07/logs", - "trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-2025-08-07/trajs", + "logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5/logs", + "trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/4844cb3c-d5cd-4c75-b64b-973029f2d37c", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -1538,6 +1822,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_claude-sonnet-4-20250514/logs", "trajs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_claude-sonnet-4-20250514/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/0cb59666-bca8-476b-bf8e-3b924fafcae7", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -1561,6 +1849,10 @@ "date": "2025-01-17", "logs": "s3://swe-bench-experiments/verified/20250117_wandb_programmer_o1_crosscheck5/logs", "trajs": "s3://swe-bench-experiments/verified/20250117_wandb_programmer_o1_crosscheck5/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1581,6 +1873,10 @@ "date": "2025-05-03", "logs": "s3://swe-bench-experiments/verified/20250503_patchpilot-v1.1-o4-mini/logs", "trajs": "s3://swe-bench-experiments/verified/20250503_patchpilot-v1.1-o4-mini/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -1601,6 +1897,10 @@ "date": "2025-07-28", "logs": "s3://swe-bench-experiments/verified/20250728_zai_glm4-5/logs", "trajs": "s3://swe-bench-experiments/verified/20250728_zai_glm4-5/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": false, @@ -1622,6 +1922,10 @@ "date": "2025-02-06", "logs": "s3://swe-bench-experiments/verified/20250206_agentscope/logs", "trajs": "s3://swe-bench-experiments/verified/20250206_agentscope/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1643,6 +1947,10 @@ "date": "2025-02-24", "logs": "s3://swe-bench-experiments/verified/20250224_tools_claude-3-7-sonnet/logs", "trajs": "s3://swe-bench-experiments/verified/20250224_tools_claude-3-7-sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1661,6 +1969,10 @@ "date": "2025-01-10", "logs": "s3://swe-bench-experiments/verified/20250110_blackboxai_agent_v1.1/logs", "trajs": "s3://swe-bench-experiments/verified/20250110_blackboxai_agent_v1.1/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1680,6 +1992,10 @@ "date": "2025-02-28", "logs": "s3://swe-bench-experiments/verified/20250228_epam-ai-run-claude-3-5-sonnet/logs/", "trajs": "s3://swe-bench-experiments/verified/20250228_epam-ai-run-claude-3-5-sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1700,6 +2016,10 @@ "date": "2025-02-25", "logs": "s3://swe-bench-experiments/verified/20250225_sweagent_claude-3-7-sonnet/logs", "trajs": "s3://swe-bench-experiments/verified/20250225_sweagent_claude-3-7-sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -1719,6 +2039,10 @@ "date": "2024-12-21", "logs": "s3://swe-bench-experiments/verified/20241221_codestory_midwit_claude-3-5-sonnet_swe-search/logs", "trajs": "s3://swe-bench-experiments/verified/20241221_codestory_midwit_claude-3-5-sonnet_swe-search/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1739,6 +2063,10 @@ "date": "2025-02-03", "logs": "s3://swe-bench-experiments/verified/20250203_openhands_4x_scaled/logs", "trajs": "s3://swe-bench-experiments/verified/20250203_openhands_4x_scaled/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -1759,6 +2087,10 @@ "date": "2025-01-10", "logs": "s3://swe-bench-experiments/verified/20250110_learn_by_interact_claude3.5/logs", "trajs": "s3://swe-bench-experiments/verified/20250110_learn_by_interact_claude3.5/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1775,11 +2107,15 @@ "https://mini-swe-agent.com/latest/assets/mini_square.svg" ], "site": "https://mini-swe-agent.com/latest/", - "folder": "20250807_mini-v1.7.0_gpt-5-mini-2025-08-07", + "folder": "20250807_mini-v1.7.0_gpt-5-mini", "resolved": 59.8, "date": "2025-08-07", - "logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-mini-2025-08-07/logs", - "trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-mini-2025-08-07/trajs", + "logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-mini/logs", + "trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-mini/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/7f5e3cfc-8e38-4c73-812c-aca21298e7bc", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -1803,6 +2139,10 @@ "date": "2025-06-29", "logs": "s3://swe-bench-experiments/verified/20250629_deepswerl_r2eagent_tts/logs", "trajs": "s3://swe-bench-experiments/verified/20250629_deepswerl_r2eagent_tts/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": false, @@ -1824,6 +2164,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_o3-2025-04-16/logs", "trajs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_o3-2025-04-16/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/565e5680-b913-4031-b537-00721a7a619a", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -1847,6 +2191,10 @@ "date": "2025-04-10", "logs": "s3://swe-bench-experiments/verified/20250410_cortexa/logs", "trajs": "s3://swe-bench-experiments/verified/20250410_cortexa/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": true, @@ -1876,6 +2224,10 @@ "date": "2024-12-13", "logs": "s3://swe-bench-experiments/verified/20241213_devlo/logs", "trajs": "s3://swe-bench-experiments/verified/20241213_devlo/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1895,6 +2247,10 @@ "date": "2024-12-23", "logs": "s3://swe-bench-experiments/verified/20241223_emergent/logs", "trajs": "s3://swe-bench-experiments/verified/20241223_emergent/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1916,6 +2272,10 @@ "date": "2024-12-08", "logs": "s3://swe-bench-experiments/verified/20241208_gru/logs", "trajs": "s3://swe-bench-experiments/verified/20241208_gru/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1933,6 +2293,10 @@ "date": "2025-04-05", "logs": "s3://swe-bench-experiments/verified/20250405_swe-rizzo_claude37/logs", "trajs": "s3://swe-bench-experiments/verified/20250405_swe-rizzo_claude37/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -1952,6 +2316,10 @@ "date": "2024-12-12", "logs": "s3://swe-bench-experiments/verified/20241212_epam-ai-run-claude-3-5-sonnet/logs", "trajs": "s3://swe-bench-experiments/verified/20241212_epam-ai-run-claude-3-5-sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -1967,11 +2335,15 @@ "https://mini-swe-agent.com/latest/assets/mini_square.svg" ], "site": "https://mini-swe-agent.com/latest/", - "folder": "20250802_mini-v1.0.0_Qwen3-Coder-480B-A35B-Instruct", + "folder": "20250802_mini-v1.0.0_qwen3-coder-480b-a35b-instruct", "resolved": 55.4, "date": "2025-08-02", "logs": "s3://swe-bench-experiments/verified/20250802_mini-v1.0.0_Qwen3-Coder-480B-A35B-Instruct/logs", "trajs": "s3://swe-bench-experiments/verified/20250802_mini-v1.0.0_Qwen3-Coder-480B-A35B-Instruct/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/f39d3041-d9d7-4f1b-b75e-8a13addb9e6e", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -1995,6 +2367,10 @@ "date": "2024-12-02", "logs": "s3://swe-bench-experiments/verified/20241202_amazon-q-developer-agent-20241202-dev/logs", "trajs": "s3://swe-bench-experiments/verified/20241202_amazon-q-developer-agent-20241202-dev/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2015,6 +2391,10 @@ "date": "2024-11-08", "logs": "s3://swe-bench-experiments/verified/20241108_devlo/logs", "trajs": "s3://swe-bench-experiments/verified/20241108_devlo/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2035,6 +2415,10 @@ "date": "2508-22-", "logs": "s3://swe-bench-experiments/verified/250822_mini-v1.9.1_glm-4.5/logs/", "trajs": "s3://swe-bench-experiments/verified/250822_mini-v1.9.1_glm-4.5/trajs/", + "trajs_docent": "https://docent.transluce.org/dashboard/9ae7f9a3-c9e3-48af-833c-52adcc7f5921", + "cost": 20.840391999999994, + "instance_cost": 0.26718451282051275, + "instance_calls": 37.6025641025641, "os_model": true, "os_system": true, "checked": true, @@ -2058,6 +2442,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_gemini-2.5-pro/logs", "trajs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_gemini-2.5-pro/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/8b22ae97-2cfd-4b05-8438-38550cfaa163", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -2081,6 +2469,10 @@ "date": "2025-01-20", "logs": "s3://swe-bench-experiments/verified/20250120_Bracket/logs", "trajs": "s3://swe-bench-experiments/verified/20250120_Bracket/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2100,6 +2492,10 @@ "date": "2024-10-29", "logs": "s3://swe-bench-experiments/verified/20241029_OpenHands-CodeAct-2.1-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/verified/20241029_OpenHands-CodeAct-2.1-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -2120,6 +2516,10 @@ "date": "2025-07-20", "logs": "s3://swe-bench-experiments/verified/20250720_mini-v0.0.0-claude-3-7-sonnet-20250219/logs", "trajs": "s3://swe-bench-experiments/verified/20250720_mini-v0.0.0-claude-3-7-sonnet-20250219/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -2143,6 +2543,10 @@ "date": "2024-12-12", "logs": "s3://swe-bench-experiments/verified/20241212_google_jules_gemini_2.0_flash_experimental/logs", "trajs": "s3://swe-bench-experiments/verified/20241212_google_jules_gemini_2.0_flash_experimental/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2163,6 +2567,10 @@ "date": "2024-11-25", "logs": "s3://swe-bench-experiments/verified/20241125_enginelabs/logs", "trajs": "s3://swe-bench-experiments/verified/20241125_enginelabs/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2184,6 +2592,10 @@ "date": "2025-01-22", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2205,6 +2617,10 @@ "date": "2025-08-05", "logs": "s3://swe-bench-experiments/verified/20250805-openhands-Qwen3-Coder-30B-A3B-Instruct/logs", "trajs": "s3://swe-bench-experiments/verified/20250805-openhands-Qwen3-Coder-30B-A3B-Instruct/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": false, @@ -2227,6 +2643,10 @@ "date": "2024-12-02", "logs": "s3://swe-bench-experiments/verified/20241202_agentless-1.5_claude-3.5-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/verified/20241202_agentless-1.5_claude-3.5-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -2248,6 +2668,10 @@ "date": "2024-10-28", "logs": "s3://swe-bench-experiments/verified/20241028_solver/logs", "trajs": "s3://swe-bench-experiments/verified/20241028_solver/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2267,6 +2691,10 @@ "date": "2024-11-25", "logs": "s3://swe-bench-experiments/verified/20241125_marscode-agent-dev/logs", "trajs": "s3://swe-bench-experiments/verified/20241125_marscode-agent-dev/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2287,6 +2715,10 @@ "date": "2024-11-05", "logs": "s3://swe-bench-experiments/verified/20241105_nfactorial/logs", "trajs": "s3://swe-bench-experiments/verified/20241105_nfactorial/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2308,6 +2740,10 @@ "date": "2024-10-22", "logs": "s3://swe-bench-experiments/verified/20241022_tools_claude-3-5-sonnet-updated/logs", "trajs": "s3://swe-bench-experiments/verified/20241022_tools_claude-3-5-sonnet-updated/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2330,6 +2766,10 @@ "date": "2024-10-25", "logs": "s3://swe-bench-experiments/verified/20241025_composio_swekit/logs", "trajs": "s3://swe-bench-experiments/verified/20241025_composio_swekit/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -2351,6 +2791,10 @@ "date": "2024-11-06", "logs": "s3://swe-bench-experiments/verified/20241106_navie-2-gpt4o-sonnet/logs", "trajs": "s3://swe-bench-experiments/verified/20241106_navie-2-gpt4o-sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -2372,6 +2816,10 @@ "date": "2025-06-16", "logs": "s3://swe-bench-experiments/verified/20250616_Skywork-SWE-32B+TTS_Bo8/logs", "trajs": "s3://swe-bench-experiments/verified/20250616_Skywork-SWE-32B+TTS_Bo8/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -2394,6 +2842,10 @@ "date": "2025-05-20", "logs": "s3://swe-bench-experiments/verified/20250520_openhands_devstral_small/logs", "trajs": "s3://swe-bench-experiments/verified/20250520_openhands_devstral_small/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -2416,6 +2868,10 @@ "date": "2024-10-23", "logs": "s3://swe-bench-experiments/verified/20241023_emergent/logs", "trajs": "s3://swe-bench-experiments/verified/20241023_emergent/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2437,6 +2893,10 @@ "date": "2024-11-08", "logs": "s3://swe-bench-experiments/verified/20241108_autocoderover-v2.0-claude-3-5-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/verified/20241108_autocoderover-v2.0-claude-3-5-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -2457,6 +2917,10 @@ "date": "2025-05-28", "logs": "s3://swe-bench-experiments/verified/20250528_patchpilot_Co-PatcheR/logs", "trajs": "s3://swe-bench-experiments/verified/20250528_patchpilot_Co-PatcheR/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": false, @@ -2476,6 +2940,10 @@ "date": "2024-09-24", "logs": "s3://swe-bench-experiments/verified/20240924_solver/logs", "trajs": "s3://swe-bench-experiments/verified/20240924_solver/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2495,6 +2963,10 @@ "date": "2024-08-24", "logs": "s3://swe-bench-experiments/verified/20240824_gru/logs", "trajs": "s3://swe-bench-experiments/verified/20240824_gru/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2514,6 +2986,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/verified/20250521_o4-mini-20250416/logs", "trajs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_o4-mini-2025-04-16/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/f46eb5b4-b4b2-478e-bc11-f1eb47eca637", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -2537,6 +3013,10 @@ "date": "2025-01-18", "logs": "evaluation/verified/20250118_codeshellagent_gemini_2.0_flash_experimental/logs", "trajs": "evaluation/verified/20250118_codeshellagent_gemini_2.0_flash_experimental/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2552,11 +3032,15 @@ "https://mini-swe-agent.com/latest/assets/mini_square.svg" ], "site": "https://mini-swe-agent.com/latest/", - "folder": "20250807_mini-v1.7.0_Kimi-K2-Instruct", + "folder": "20250807_mini-v1.7.0_kimi-k2-instruct", "resolved": 43.8, "date": "2025-08-07", "logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_Kimi-K2-Instruct/logs", "trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_Kimi-K2-Instruct/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/b6a6d69b-e69e-4011-aa94-46f44b84202e", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -2580,6 +3064,10 @@ "date": "2024-09-20", "logs": "s3://swe-bench-experiments/verified/20240920_solver/logs", "trajs": "s3://swe-bench-experiments/verified/20240920_solver/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2599,6 +3087,10 @@ "date": "2025-05-27", "logs": "s3://swe-bench-experiments/verified/20250527_amazon.nova-premier-v1.0/logs", "trajs": "s3://swe-bench-experiments/verified/20250527_amazon.nova-premier-v1.0/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2620,6 +3112,10 @@ "date": "2025-02-14", "logs": "s3://swe-bench-experiments/verified/20250214_agentless_lite_o3_mini/logs", "trajs": "s3://swe-bench-experiments/verified/20250214_agentless_lite_o3_mini/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -2641,6 +3137,10 @@ "date": "2025-06-29", "logs": "s3://swe-bench-experiments/verified/20250629_deepswerl_r2eagent/logs", "trajs": "s3://swe-bench-experiments/verified/20250629_deepswerl_r2eagent/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": false, @@ -2662,6 +3162,10 @@ "date": "2025-08-06", "logs": "s3://swe-bench-experiments/verified/20250806_SWE-Exp_DeepSeek-V3/logs", "trajs": "s3://swe-bench-experiments/verified/20250806_SWE-Exp_DeepSeek-V3/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": "false (See README.md for info on how to get your results verified)", @@ -2681,6 +3185,10 @@ "date": "2025-01-12", "logs": "s3://swe-bench-experiments/verified/20250112_ugaiforge/logs", "trajs": "s3://swe-bench-experiments/verified/20250112_ugaiforge/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2700,6 +3208,10 @@ "date": "2024-10-30", "logs": "s3://swe-bench-experiments/verified/20241030_nfactorial/logs", "trajs": "s3://swe-bench-experiments/verified/20241030_nfactorial/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2722,6 +3234,10 @@ "date": "2025-02-26", "logs": "s3://swe-bench-experiments/verified/20250226_swerl_llama3_70b/logs", "trajs": "s3://swe-bench-experiments/verified/20250226_swerl_llama3_70b/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -2744,6 +3260,10 @@ "date": "2024-11-13", "logs": "s3://swe-bench-experiments/verified/20241113_nebius-search-open-weight-models-11-24/logs", "trajs": "s3://swe-bench-experiments/verified/20241113_nebius-search-open-weight-models-11-24/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2765,6 +3285,10 @@ "date": "2024-10-22", "logs": "s3://swe-bench-experiments/verified/20241022_tools_claude-3-5-haiku/logs", "trajs": "s3://swe-bench-experiments/verified/20241022_tools_claude-3-5-haiku/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2786,6 +3310,10 @@ "date": "2024-08-20", "logs": "s3://swe-bench-experiments/verified/20240820_honeycomb/logs", "trajs": "s3://swe-bench-experiments/verified/20240820_honeycomb/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2805,6 +3333,10 @@ "date": "2024-10-16", "logs": "s3://swe-bench-experiments/verified/20241016_composio_swekit/logs", "trajs": "s3://swe-bench-experiments/verified/20241016_composio_swekit/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -2826,6 +3358,10 @@ "date": "2025-05-11", "logs": "s3://swe-bench-experiments/verified/20250511_sweagent_lm_32b/logs", "trajs": "s3://swe-bench-experiments/verified/20250511_sweagent_lm_32b/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -2847,6 +3383,10 @@ "date": "2024-10-29", "logs": "s3://swe-bench-experiments/verified/20241029_epam-ai-run-claude-3-5-sonnet/logs", "trajs": "s3://swe-bench-experiments/verified/20241029_epam-ai-run-claude-3-5-sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2867,6 +3407,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_gpt-4.1-2025-04-14/logs", "trajs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_gpt-4.1-2025-04-14/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/cd7a23c5-a2b1-4cab-b851-6e2c42aaf0f3", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -2890,6 +3434,10 @@ "date": "2024-07-21", "logs": "s3://swe-bench-experiments/verified/20240721_amazon-q-developer-agent-20240719-dev/logs", "trajs": "s3://swe-bench-experiments/verified/20240721_amazon-q-developer-agent-20240719-dev/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2910,6 +3458,10 @@ "date": "2024-10-28", "logs": "s3://swe-bench-experiments/verified/20241028_agentless-1.5_gpt4o/logs", "trajs": "s3://swe-bench-experiments/verified/20241028_agentless-1.5_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -2931,6 +3483,10 @@ "date": "2024-06-28", "logs": "s3://swe-bench-experiments/verified/20240628_autocoderover-v20240620/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -2951,6 +3507,10 @@ "date": "2025-06-16", "logs": "s3://swe-bench-experiments/verified/20250616_Skywork-SWE-32B/logs", "trajs": "s3://swe-bench-experiments/verified/20250616_Skywork-SWE-32B/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -2972,6 +3532,10 @@ "date": "2025-07-25", "logs": "s3://swe-bench-experiments/verified/20250725_sweagent_devstral_small_2507/logs", "trajs": "s3://swe-bench-experiments/verified/20250725_sweagent_devstral_small_2507/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -2993,6 +3557,10 @@ "date": "2024-06-17", "logs": "s3://swe-bench-experiments/verified/20240617_factory_code_droid/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3007,11 +3575,15 @@ "https://mini-swe-agent.com/latest/assets/mini_square.svg" ], "site": "https://mini-swe-agent.com/latest/", - "folder": "20250807_mini-v1.7.0_gpt-5-nano-2025-08-07", + "folder": "20250807_mini-v1.7.0_gpt-5-nano", "resolved": 34.8, "date": "2025-08-07", - "logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-nano-2025-08-07/logs", - "trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-nano-2025-08-07/trajs", + "logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-nano/logs", + "trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-5-nano/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/3dc9b424-547c-4bdc-9b72-3b58d316cb0a", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3035,6 +3607,10 @@ "date": "2024-06-20", "logs": "s3://swe-bench-experiments/verified/20240620_sweagent_claude3.5sonnet/logs", "trajs": "s3://swe-bench-experiments/verified/20240620_sweagent_claude3.5sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3056,6 +3632,10 @@ "date": "2025-03-06", "logs": "s3://swe-bench-experiments/verified/20250306_SWE-Fixer_Qwen2.5-7b-retriever_Qwen2.5-72b-editor/logs", "trajs": "s3://swe-bench-experiments/verified/20250306_SWE-Fixer_Qwen2.5-7b-retriever_Qwen2.5-72b-editor/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -3074,6 +3654,10 @@ "date": "2024-06-12", "logs": "s3://swe-bench-experiments/verified/20240612_MASAI_gpt4o/logs", "trajs": "s3://swe-bench-experiments/verified/20240612_MASAI_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3094,6 +3678,10 @@ "date": "2024-11-20", "logs": "s3://swe-bench-experiments/verified/20241120_artemis_agent/logs", "trajs": "s3://swe-bench-experiments/verified/20241120_artemis_agent/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3113,6 +3701,10 @@ "date": "2024-10-07", "logs": "s3://swe-bench-experiments/verified/20241007_nfactorial/logs", "trajs": "s3://swe-bench-experiments/verified/20241007_nfactorial/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3132,6 +3724,10 @@ "date": "2024-11-28", "logs": "s3://swe-bench-experiments/verified/20241128_SWE-Fixer_Qwen2.5-7b-retriever_Qwen2.5-72b-editor_20241128/logs", "trajs": "s3://swe-bench-experiments/verified/20241128_SWE-Fixer_Qwen2.5-7b-retriever_Qwen2.5-72b-editor_20241128/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -3152,6 +3748,10 @@ "date": "2024-10-02", "logs": "s3://swe-bench-experiments/verified/20241002_lingma-agent_lingma-swe-gpt-72b/logs", "trajs": "s3://swe-bench-experiments/verified/20241002_lingma-agent_lingma-swe-gpt-72b/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -3172,6 +3772,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_gemini-2.5-flash/logs", "trajs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_gemini-2.5-flash/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/37e5933e-71a1-4011-8340-fbca563124b3", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3195,6 +3799,10 @@ "date": "2024-10-16", "logs": "s3://swe-bench-experiments/verified/20241016_epam-ai-run-gpt-4o/logs", "trajs": "s3://swe-bench-experiments/verified/20241016_epam-ai-run-gpt-4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3215,6 +3823,10 @@ "date": "2024-06-15", "logs": "s3://swe-bench-experiments/verified/20240615_appmap-navie_gpt4o/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3235,6 +3847,10 @@ "date": "2025-08-07", "logs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-oss-120b/logs", "trajs": "s3://swe-bench-experiments/verified/20250807_mini-v1.7.0_gpt-oss-120b/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/4a0a2b2a-a65b-452a-88c7-bb5d99745c75", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -3258,6 +3874,10 @@ "date": "2024-10-01", "logs": "s3://swe-bench-experiments/verified/20241001_nfactorial/logs", "trajs": "s3://swe-bench-experiments/verified/20241001_nfactorial/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3277,6 +3897,10 @@ "date": "2024-05-09", "logs": "s3://swe-bench-experiments/verified/20240509_amazon-q-developer-agent-20240430-dev/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3297,6 +3921,10 @@ "date": "2024-09-18", "logs": "s3://swe-bench-experiments/verified/20240918_lingma-agent_lingma-swe-gpt-72b/logs", "trajs": "s3://swe-bench-experiments/verified/20240918_lingma-agent_lingma-swe-gpt-72b/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -3317,6 +3945,10 @@ "date": "2024-08-20", "logs": "s3://swe-bench-experiments/verified/20240820_epam-ai-run-gpt-4o/logs", "trajs": "s3://swe-bench-experiments/verified/20240820_epam-ai-run-gpt-4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3337,6 +3969,10 @@ "date": "2025-07-20", "logs": "s3://swe-bench-experiments/verified/20250720_mini-v0.0.0_gpt-4.1-mini-2025-04-14/logs", "trajs": "s3://swe-bench-experiments/verified/20250720_mini-v0.0.0_gpt-4.1-mini-2025-04-14/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3358,6 +3994,10 @@ "date": "2025-06-27", "logs": "s3://swe-bench-experiments/verified/20250627_agentless_MCTS-Refine-7B/logs", "trajs": "s3://swe-bench-experiments/verified/20250627_agentless_MCTS-Refine-7B/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": false, @@ -3378,6 +4018,10 @@ "date": "2024-07-28", "logs": "s3://swe-bench-experiments/verified/20240728_sweagent_gpt4o/logs", "trajs": "s3://swe-bench-experiments/verified/20240728_sweagent_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3399,6 +4043,10 @@ "date": "2024-04-02", "logs": "s3://swe-bench-experiments/verified/20240402_sweagent_gpt4/logs", "trajs": "s3://swe-bench-experiments/verified/20240402_sweagent_gpt4/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3420,6 +4068,10 @@ "date": "2025-07-20", "logs": "s3://swe-bench-experiments/verified/20250720_mini-v0.0.0-gpt-4o-2024-11-20/logs", "trajs": "s3://swe-bench-experiments/verified/20250720_mini-v0.0.0-gpt-4o-2024-11-20/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3443,6 +4095,10 @@ "date": "2025-07-20", "logs": "s3://swe-bench-experiments/verified/20250720_mini-v0.0.0-Llama-4-Maverick-17B-Instruct/logs", "trajs": "s3://swe-bench-experiments/verified/20250720_mini-v0.0.0-Llama-4-Maverick-17B-Instruct/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3466,6 +4122,10 @@ "date": "2024-10-02", "logs": "s3://swe-bench-experiments/verified/20241002_lingma-agent_lingma-swe-gpt-7b/logs", "trajs": "s3://swe-bench-experiments/verified/20241002_lingma-agent_lingma-swe-gpt-7b/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -3486,6 +4146,10 @@ "date": "2024-04-02", "logs": "s3://swe-bench-experiments/verified/20240402_sweagent_claude3opus/logs", "trajs": "s3://swe-bench-experiments/verified/20240402_sweagent_claude3opus/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3507,6 +4171,10 @@ "date": "2025-07-26", "logs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_gemini-2.0-flash/logs", "trajs": "s3://swe-bench-experiments/verified/20250726_mini-v1.0.0_gemini-2.0-flash/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3530,6 +4198,10 @@ "date": "2024-09-18", "logs": "s3://swe-bench-experiments/verified/20240918_lingma-agent_lingma-swe-gpt-7b/logs", "trajs": "s3://swe-bench-experiments/verified/20240918_lingma-agent_lingma-swe-gpt-7b/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -3550,6 +4222,10 @@ "date": "2025-07-20", "logs": "s3://swe-bench-experiments/verified/20250720_mini-v0.0.0-Llama-4-Scout-17B-Instruct/logs", "trajs": "s3://swe-bench-experiments/verified/20250720_mini-v0.0.0-Llama-4-Scout-17B-Instruct/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3568,11 +4244,15 @@ "https://mini-swe-agent.com/latest/assets/mini_square.svg" ], "site": "https://mini-swe-agent.com/latest/", - "folder": "20250803_mini-v1.0.0_Qwen2.5-Coder-32B-Instruct", + "folder": "20250803_mini-v1.0.0_qwen2-5-coder-32b-instruct", "resolved": 9.0, "date": "2025-08-03", "logs": "s3://swe-bench-experiments/verified/20250803_mini-v1.0.0_Qwen2.5-Coder-32B-Instruct/logs", "trajs": "s3://swe-bench-experiments/verified/20250803_mini-v1.0.0_Qwen2.5-Coder-32B-Instruct/trajs", + "trajs_docent": "https://docent.transluce.org/dashboard/f5288cc2-48b7-4df9-8e30-072d96b58606", + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -3596,6 +4276,10 @@ "date": "2024-04-02", "logs": "s3://swe-bench-experiments/verified/20240402_rag_claude3opus/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3616,6 +4300,10 @@ "date": "2023-10-10", "logs": "s3://swe-bench-experiments/verified/20231010_rag_claude2/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3636,6 +4324,10 @@ "date": "2024-04-02", "logs": "s3://swe-bench-experiments/verified/20240402_rag_gpt4/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3656,6 +4348,10 @@ "date": "2023-10-10", "logs": "s3://swe-bench-experiments/verified/20231010_rag_swellama7b/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3677,6 +4373,10 @@ "date": "2023-10-10", "logs": "s3://swe-bench-experiments/verified/20231010_rag_swellama13b/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3697,6 +4397,10 @@ "date": "2023-10-10", "logs": "s3://swe-bench-experiments/verified/20231010_rag_gpt35/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3722,6 +4426,10 @@ "date": "2025-06-25", "logs": "s3://swe-bench-experiments/lite/20250625_ExpeRepair-v1_claude-4-sonnet-20250514/logs", "trajs": "s3://swe-bench-experiments/lite/20250625_ExpeRepair-v1_claude-4-sonnet-20250514/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -3744,6 +4452,10 @@ "date": "2025-04-25", "logs": "s3://swe-bench-experiments/lite/20250425_Refact_Agent/logs", "trajs": "s3://swe-bench-experiments/lite/20250425_Refact_Agent/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -3766,6 +4478,10 @@ "date": "2025-05-26", "logs": "s3://swe-bench-experiments/lite/20250522_sweagent_claude-4-sonnet-20250514/trajs", "trajs": "s3://swe-bench-experiments/lite/20250522_sweagent_claude-4-sonnet-20250514/logs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3787,6 +4503,10 @@ "date": "2025-01-14", "logs": "s3://swe-bench-experiments/lite/20250114_Isoform/logs", "trajs": "s3://swe-bench-experiments/lite/20250114_Isoform/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3806,6 +4526,10 @@ "date": "2025-06-25", "logs": "s3://swe-bench-experiments/lite/20250625_SemAgent_Multi-v1_Claude3.7Sonnet_Gemini2.5Pro/logs", "trajs": "https://github.com/AnvithPabba/SemAgent_Multi/tree/main/trajs/SWEBench_Lite/20250625_SemAgent_Multi-v1.0_Claude3.7Sonnet_Gemini2.5Pro", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3827,6 +4551,10 @@ "date": "2024-12-20", "logs": "s3://swe-bench-experiments/lite/20241220_blackboxai_agent_v1/logs", "trajs": "s3://swe-bench-experiments/lite/20241220_blackboxai_agent_v1/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3842,6 +4570,10 @@ "date": "2025-05-28", "logs": "s3://swe-bench-experiments/lite/20250528_Codev/logs", "trajs": "s3://swe-bench-experiments/lite/20250528_Codev/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3869,6 +4601,10 @@ "date": "2024-12-08", "logs": "s3://swe-bench-experiments/lite/20241208_gru/logs", "trajs": "s3://swe-bench-experiments/lite/20241208_gru/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3886,6 +4622,10 @@ "date": "2025-06-13", "logs": "s3://swe-bench-experiments/lite/20250613_ExpeRepair-v1.0/logs", "trajs": "s3://swe-bench-experiments/lite/20250613_ExpeRepair-v1.0/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -3908,6 +4648,10 @@ "date": "2024-11-27", "logs": "s3://swe-bench-experiments/lite/20241127_globant_codefixer_agent/logs", "trajs": "s3://swe-bench-experiments/lite/20241127_globant_codefixer_agent/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3925,6 +4669,10 @@ "date": "2025-02-26", "logs": "s3://swe-bench-experiments/lite/20250226_sweagent_claude-3-7-sonnet-20250219/trajs", "trajs": "s3://swe-bench-experiments/lite/20250226_sweagent_claude-3-7-sonnet-20250219/logs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -3946,6 +4694,10 @@ "date": "2024-11-22", "logs": "s3://swe-bench-experiments/lite/20241122_devlo/logs", "trajs": "s3://swe-bench-experiments/lite/20241122_devlo/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -3965,6 +4717,10 @@ "date": "2025-02-05", "logs": "s3://swe-bench-experiments/lite/20250205_dars_agent_claude_3.5_sonnet_deepseek_r1/logs", "trajs": "s3://swe-bench-experiments/lite/20250205_dars_agent_claude_3.5_sonnet_deepseek_r1/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -3985,6 +4741,10 @@ "date": "2025-06-19", "logs": "s3://swe-bench-experiments/lite/20250619_KGCompass_claude-3.5-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/lite/20250619_KGCompass_claude-3.5-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4006,6 +4766,10 @@ "date": "2024-12-07", "logs": "s3://swe-bench-experiments/lite/20241207_kodu_sonnet_v1/logs", "trajs": "s3://swe-bench-experiments/lite/20241207_kodu_sonnet_v1/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4025,6 +4789,10 @@ "date": "2025-03-10", "logs": "s3://swe-bench-experiments/lite/20250310_codefuse-cgm/logs", "trajs": "s3://swe-bench-experiments/lite/20250310_codefuse-cgm/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -4043,6 +4811,10 @@ "date": "2024-07-02", "logs": "s3://swe-bench-experiments/lite/20240702_codestory_aide_mixed/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4060,6 +4832,10 @@ "date": "2025-05-09", "logs": "s3://swe-bench-experiments/lite/20250509_Lingxi_claude-3-5-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/lite/20250509_Lingxi_claude-3-5-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4079,6 +4855,10 @@ "date": "2024-10-25", "logs": "s3://swe-bench-experiments/lite/20241025_OpenHands-CodeAct-2.1-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/lite/20241025_OpenHands-CodeAct-2.1-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -4097,6 +4877,10 @@ "date": "2025-05-15", "logs": "s3://swe-bench-experiments/lite/20250515_codartai/logs", "trajs": "s3://swe-bench-experiments/lite/20250515_codartai/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4119,6 +4903,10 @@ "date": "2024-12-20", "logs": "s3://swe-bench-experiments/lite/20241220_PatchKitty-0.9_claude-3.5-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/lite/20241220_PatchKitty-0.9_claude-3.5-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4138,6 +4926,10 @@ "date": "2025-01-13", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4158,6 +4950,10 @@ "date": "2024-10-30", "logs": "s3://swe-bench-experiments/lite/20241030_composio_swekit/logs", "trajs": "s3://swe-bench-experiments/lite/20241030_composio_swekit/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4178,6 +4974,10 @@ "date": "2024-12-02", "logs": "s3://swe-bench-experiments/lite/20241202_agentless-1.5_claude-3.5-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/lite/20241202_agentless-1.5_claude-3.5-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4198,6 +4998,10 @@ "date": "2025-01-13", "logs": "s3://swe-bench-experiments/verified/20250113_OpenCSG-Starship-Agentic-Coder_gpt4o/logs", "trajs": "s3://swe-bench-experiments/verified/20250113_OpenCSG-Starship-Agentic-Coder_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4217,6 +5021,10 @@ "date": "2024-09-12", "logs": "s3://swe-bench-experiments/lite/20240912_marscode-agent-dev/logs", "trajs": "s3://swe-bench-experiments/lite/20240912_marscode-agent-dev/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4234,6 +5042,10 @@ "date": "2025-01-14", "logs": "s3://swe-bench-experiments/lite/20250114_moatless_claude-3.5-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/lite/20250114_moatless_claude-3.5-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -4252,6 +5064,10 @@ "date": "2024-11-17", "logs": "s3://swe-bench-experiments/lite/20241117_moatless_claude-3.5-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/lite/20241117_moatless_claude-3.5-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -4272,6 +5088,10 @@ "date": "2024-08-20", "logs": "s3://swe-bench-experiments/lite/20240820_honeycomb/logs", "trajs": "s3://swe-bench-experiments/lite/20240820_honeycomb/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4287,6 +5107,10 @@ "date": "2024-06-27", "logs": "s3://swe-bench-experiments/lite/20240627_abanteai_mentatbot_gpt4o/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4306,6 +5130,10 @@ "date": "2025-01-04", "logs": "s3://swe-bench-experiments/lite/20250104_patched_codes_claude-3.5-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/lite/20250104_patched_codes_claude-3.5-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4325,6 +5153,10 @@ "date": "2025-06-09", "logs": "s3://swe-bench-experiments/lite/20250609_KGCompass_deepseek-v3/logs", "trajs": "s3://swe-bench-experiments/lite/20250609_KGCompass_deepseek-v3/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": false, @@ -4346,6 +5178,10 @@ "date": "2024-11-13", "logs": "s3://swe-bench-experiments/lite/20241113_navie-2-gpt4o-sonnet/logs", "trajs": "s3://swe-bench-experiments/lite/20241113_navie-2-gpt4o-sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -4366,6 +5202,10 @@ "date": "2025-01-04", "logs": "s3://swe-bench-experiments/lite/20250104_codefuse-aais/logs", "trajs": "s3://swe-bench-experiments/lite/20250104_codefuse-aais/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4383,6 +5223,10 @@ "date": "2024-08-11", "logs": "s3://swe-bench-experiments/lite/20240811_gru/logs", "trajs": "s3://swe-bench-experiments/lite/20240811_gru/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4398,6 +5242,10 @@ "date": "2024-08-29", "logs": "s3://swe-bench-experiments/lite/20240829_Isoform/logs", "trajs": "s3://swe-bench-experiments/lite/20240829_Isoform/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4415,6 +5263,10 @@ "date": "2024-08-06", "logs": "s3://swe-bench-experiments/lite/20240806_SuperCoder2.0/logs", "trajs": "s3://swe-bench-experiments/lite/20240806_SuperCoder2.0/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4432,6 +5284,10 @@ "date": "2024-07-23", "logs": "s3://swe-bench-experiments/lite/20240723_marscode-agent-dev/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4452,6 +5308,10 @@ "date": "2024-06-22", "logs": "s3://swe-bench-experiments/lite/20240622_Lingma_Agent/logs", "trajs": "s3://swe-bench-experiments/lite/20240622_Lingma_Agent/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4471,6 +5331,10 @@ "date": "2025-02-14", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -4491,6 +5355,10 @@ "date": "2024-10-28", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4511,6 +5379,10 @@ "date": "2024-06-17", "logs": "s3://swe-bench-experiments/lite/20240617_factory_code_droid/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4528,6 +5400,10 @@ "date": "2024-11-11", "logs": "s3://swe-bench-experiments/lite/20241111_codeshelltester_gpt4o/logs", "trajs": "s3://swe-bench-experiments/lite/20241111_codeshelltester_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4545,6 +5421,10 @@ "date": "2025-01-11", "logs": "s3://swe-bench-experiments/lite/20250111_moatless_deepseek_v3/logs", "trajs": "s3://swe-bench-experiments/lite/20250111_moatless_deepseek_v3/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -4565,6 +5445,10 @@ "date": "2024-06-21", "logs": "s3://swe-bench-experiments/lite/20240621_autocoderover-v20240620/logs", "trajs": "s3://swe-bench-experiments/lite/20240621_autocoderover-v20240620/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4582,6 +5466,10 @@ "date": "2025-02-07", "logs": "s3://swe-bench-experiments/lite/20250207_aegis_o3mini/logs", "trajs": "s3://swe-bench-experiments/lite/20250207_aegis_o3mini/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4599,6 +5487,10 @@ "date": "2024-09-08", "logs": "s3://swe-bench-experiments/lite/20240908_infant_gpt4o/logs", "trajs": "s3://swe-bench-experiments/lite/20240908_infant_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4616,6 +5508,10 @@ "date": "2024-12-03", "logs": "s3://swe-bench-experiments/lite/20241203_KortixAI-AgentPress-sonnet-20241022/logs", "trajs": "s3://swe-bench-experiments/lite/20241203_KortixAI-AgentPress-sonnet-20241022/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": true, @@ -4635,6 +5531,10 @@ "date": "2024-07-21", "logs": "s3://swe-bench-experiments/lite/20240721_amazon-q-developer-agent-20240719-dev/logs", "trajs": "s3://swe-bench-experiments/lite/20240721_amazon-q-developer-agent-20240719-dev/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4654,6 +5554,10 @@ "date": "2024-08-08", "logs": "s3://swe-bench-experiments/lite/20240808_RepoGraph_gpt4o/logs", "trajs": "s3://swe-bench-experiments/lite/20240808_RepoGraph_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4674,6 +5578,10 @@ "date": "2024-06-04", "logs": "s3://swe-bench-experiments/lite/20240604_CodeR/logs", "trajs": "s3://swe-bench-experiments/lite/20240604_CodeR/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4691,6 +5599,10 @@ "date": "2024-11-17", "logs": "s3://swe-bench-experiments/lite/20241117_reproducedRG_gpt4o/logs", "trajs": "s3://swe-bench-experiments/lite/20241117_reproducedRG_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4708,6 +5620,10 @@ "date": "2024-07-06", "logs": "s3://swe-bench-experiments/lite/20240706_sima_gpt4o/logs", "trajs": "s3://swe-bench-experiments/lite/20240706_sima_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4725,6 +5641,10 @@ "date": "2024-06-12", "logs": "s3://swe-bench-experiments/lite/20240612_MASAI_gpt4o/logs", "trajs": "s3://swe-bench-experiments/lite/20240612_MASAI_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4744,6 +5664,10 @@ "date": "2024-06-30", "logs": "s3://swe-bench-experiments/lite/20240630_agentless_gpt4o/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4762,6 +5686,10 @@ "date": "2024-06-23", "logs": "s3://swe-bench-experiments/lite/20240623_moatless_claude35sonnet/logs", "trajs": "s3://swe-bench-experiments/lite/20240623_moatless_claude35sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -4782,6 +5710,10 @@ "date": "2024-07-25", "logs": "s3://swe-bench-experiments/lite/20240725_opendevin_codeact_v1.8_claude35sonnet/logs", "trajs": "s3://swe-bench-experiments/lite/20240725_opendevin_codeact_v1.8_claude35sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -4802,6 +5734,10 @@ "date": "2024-06-12", "logs": "s3://swe-bench-experiments/lite/20240612_IBM_Research_Agent101/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4821,6 +5757,10 @@ "date": "2024-05-23", "logs": "s3://swe-bench-experiments/lite/20240523_aider/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -4839,6 +5779,10 @@ "date": "2024-09-25", "logs": "s3://swe-bench-experiments/lite/20240925_hyperagent_lite1/logs", "trajs": "s3://swe-bench-experiments/lite/20240925_hyperagent_lite1/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4858,6 +5802,10 @@ "date": "2025-03-06", "logs": "s3://swe-bench-experiments/lite/20250306_SWE-Fixer_Qwen2.5-7b-retriever_Qwen2.5-72b-editor/logs", "trajs": "s3://swe-bench-experiments/lite/20250306_SWE-Fixer_Qwen2.5-7b-retriever_Qwen2.5-72b-editor/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": true, @@ -4875,6 +5823,10 @@ "date": "2024-06-17", "logs": "s3://swe-bench-experiments/lite/20240617_moatless_gpt4o/logs", "trajs": "s3://swe-bench-experiments/lite/20240617_moatless_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": true, @@ -4895,6 +5847,10 @@ "date": "2024-10-16", "logs": "s3://swe-bench-experiments/lite/20241016_IBM-SWE-1.0/logs", "trajs": "s3://swe-bench-experiments/lite/20241016_IBM-SWE-1.0/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4912,6 +5868,10 @@ "date": "2024-05-24", "logs": "s3://swe-bench-experiments/lite/20240524_opencsg_starship_gpt4/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -4931,6 +5891,10 @@ "date": "2024-11-28", "logs": "s3://swe-bench-experiments/lite/20241128_SWE-Fixer_Qwen2.5-7b-retriever_Qwen2.5-72b-editor_20241128/logs", "trajs": "s3://swe-bench-experiments/lite/20241128_SWE-Fixer_Qwen2.5-7b-retriever_Qwen2.5-72b-editor_20241128/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": false, @@ -4950,6 +5914,10 @@ "date": "2024-06-20", "logs": "s3://swe-bench-experiments/lite/20240620_sweagent_claude3.5sonnet/logs", "trajs": "s3://swe-bench-experiments/lite/20240620_sweagent_claude3.5sonnet/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -4970,6 +5938,10 @@ "date": "2024-06-15", "logs": "s3://swe-bench-experiments/lite/20240615_appmap-navie_gpt4o/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -4989,6 +5961,10 @@ "date": "2024-08-28", "logs": "s3://swe-bench-experiments/lite/20240828_autose_mixed/logs", "trajs": "s3://swe-bench-experiments/lite/20240828_autose_mixed/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -5009,6 +5985,10 @@ "date": "2024-05-09", "logs": "s3://swe-bench-experiments/lite/20240509_amazon-q-developer-agent-20240430-dev/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": false, @@ -5028,6 +6008,10 @@ "date": "2024-05-30", "logs": "s3://swe-bench-experiments/lite/20240530_autocoderover-v20240408/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -5047,6 +6031,10 @@ "date": "2024-07-28", "logs": "s3://swe-bench-experiments/lite/20240728_sweagent_gpt4o/logs", "trajs": "s3://swe-bench-experiments/lite/20240728_sweagent_gpt4o/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5067,6 +6055,10 @@ "date": "2024-04-02", "logs": "s3://swe-bench-experiments/lite/20240402_sweagent_gpt4/logs", "trajs": "s3://swe-bench-experiments/lite/20240402_sweagent_gpt4/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5085,6 +6077,10 @@ "date": "2025-06-27", "logs": "s3://swe-bench-experiments/lite/20250627_agentless_MCTS-Refine-7B/logs", "trajs": "s3://swe-bench-experiments/lite/20250627_agentless_MCTS-Refine-7B/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": true, "os_system": true, "checked": false, @@ -5105,6 +6101,10 @@ "date": "2024-04-02", "logs": "s3://swe-bench-experiments/lite/20240402_sweagent_claude3opus/logs", "trajs": "s3://swe-bench-experiments/lite/20240402_sweagent_claude3opus/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5124,6 +6124,10 @@ "date": "2024-04-02", "logs": "s3://swe-bench-experiments/lite/20240402_rag_claude3opus/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5143,6 +6147,10 @@ "date": "2023-10-10", "logs": "s3://swe-bench-experiments/lite/20231010_rag_claude2/logs", "trajs": "s3://swe-bench-experiments/lite/20231010_rag_claude2/trajs", + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5162,6 +6170,10 @@ "date": "2024-04-02", "logs": "s3://swe-bench-experiments/lite/20240402_rag_gpt4/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5181,6 +6193,10 @@ "date": "2023-10-10", "logs": "s3://swe-bench-experiments/lite/20231010_rag_swellama7b/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5201,6 +6217,10 @@ "date": "2023-10-10", "logs": "s3://swe-bench-experiments/lite/20231010_rag_swellama13b/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5220,6 +6240,10 @@ "date": "2023-10-10", "logs": "s3://swe-bench-experiments/lite/20231010_rag_gpt35/logs", "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5244,6 +6268,10 @@ "date": "2025-07-01", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5265,6 +6293,10 @@ "date": "2025-06-11", "logs": false, "trajs": false, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": false, @@ -5286,6 +6318,10 @@ "date": "2025-05-28", "logs": false, "trajs": false, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5307,6 +6343,10 @@ "date": "2025-05-31", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5328,6 +6368,10 @@ "date": "2025-05-09", "logs": false, "trajs": false, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5349,6 +6393,10 @@ "date": "2025-05-31", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5370,6 +6418,10 @@ "date": "2025-04-01", "logs": false, "trajs": false, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": true, @@ -5390,6 +6442,10 @@ "date": "2025-05-31", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5411,6 +6467,10 @@ "date": "2025-03-25", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": true, @@ -5430,6 +6490,10 @@ "date": "2025-03-11", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": false, "checked": true, @@ -5449,6 +6513,10 @@ "date": "2025-02-26", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5470,6 +6538,10 @@ "date": "2024-10-06", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5490,6 +6562,10 @@ "date": "2024-10-06", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5510,6 +6586,10 @@ "date": "2024-10-06", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5530,6 +6610,10 @@ "date": "2024-10-06", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5550,6 +6634,10 @@ "date": "2024-10-06", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5571,6 +6659,10 @@ "date": "2024-10-06", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5591,6 +6683,10 @@ "date": "2024-10-06", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5612,6 +6708,10 @@ "date": "2024-10-06", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5632,6 +6732,10 @@ "date": "2024-10-06", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, @@ -5652,6 +6756,10 @@ "date": "2024-10-06", "logs": null, "trajs": null, + "trajs_docent": false, + "cost": null, + "instance_cost": null, + "instance_calls": null, "os_model": false, "os_system": true, "checked": true, diff --git a/js/mainResults.js b/js/mainResults.js index 9dd8cab..ea1d831 100644 --- a/js/mainResults.js +++ b/js/mainResults.js @@ -43,6 +43,10 @@ function sortItems(a, b, field, direction) { case 'trajs': case 'site': return item[field] ? 1 : 0; + case 'instance_cost': + return parseFloat(item.instance_cost) || 0; + case 'trajs_docent': + return item.trajs_docent && item.trajs_docent !== false ? 1 : 0; case 'release': return (item['mini-swe-agent_version'] || '').toLowerCase(); default: @@ -96,9 +100,11 @@ function renderLeaderboardTable(leaderboard) { Model % Resolved + ${isBashOnly ? 'Avg. $' : ''} + ${isBashOnly ? 'Traj.' : ''} Org Date - Site + ${!isBashOnly ? 'Site' : ''} ${isBashOnly ? 'Release' : ''} @@ -121,6 +127,8 @@ function renderLeaderboardTable(leaderboard) { ${parseFloat(item.resolved).toFixed(2)} + ${isBashOnly ? `${item.instance_cost !== null && item.instance_cost !== undefined ? parseFloat(item.instance_cost).toFixed(2) : ''}` : ''} + ${isBashOnly ? `${item.trajs_docent && item.trajs_docent !== false ? `` : ''}` : ''} ${item.logo && item.logo.length > 0 ? `
@@ -129,14 +137,14 @@ function renderLeaderboardTable(leaderboard) { ` : '-'} ${item.date} - + ${!isBashOnly ? ` ${item.site ? `` : '-'} - - ${isBashOnly ? `${item['mini-swe-agent_version'] || '-'}` : ''} + ` : ''} + ${isBashOnly ? `${item['mini-swe-agent_version'] && item['mini-swe-agent_version'] !== '0.0.0' ? `${item['mini-swe-agent_version']}` : (item['mini-swe-agent_version'] || '-')}` : ''} `).join('')} - + No entries match the selected filters. Try adjusting your filters.