From cc9132d41d6f10aaa4ae317a20754a69ffdfd215 Mon Sep 17 00:00:00 2001 From: John Yang Date: Tue, 22 Oct 2024 19:10:23 -0700 Subject: [PATCH] Add org logos --- index.html | 581 +++++++++++++++++++++++++++++++++++ template/data.json | 258 ++++++++++------ template/template_index.html | 8 + 3 files changed, 761 insertions(+), 86 deletions(-) diff --git a/index.html b/index.html index 0ad364f..8e961ee 100644 --- a/index.html +++ b/index.html @@ -145,6 +145,7 @@

Leaderboard

Model
% Resolved
+
Org
Date
Logs
Trajs
@@ -164,6 +165,13 @@

Leaderboard

22.06

+ +

+ + + +

+

2024-08-20

@@ -195,6 +203,13 @@

Leaderboard

19.75

+ +

+ + + +

+

2024-07-21

@@ -226,6 +241,13 @@

Leaderboard

19.27

+ +

+ + + +

+

2024-06-17

@@ -256,6 +278,13 @@

Leaderboard

18.83

+ +

+ + + +

+

2024-06-28

@@ -286,6 +315,13 @@

Leaderboard

18.13

+ +

+ + + +

+

2024-06-20

@@ -314,6 +350,13 @@

Leaderboard

14.60

+ +

+ + + +

+

2024-06-15

@@ -344,6 +387,13 @@

Leaderboard

13.82

+ +

+ + + +

+

2024-05-09

@@ -374,6 +424,13 @@

Leaderboard

12.47

+ +

+ + + +

+

2024-04-02

@@ -404,6 +461,13 @@

Leaderboard

11.99

+ +

+ + + +

+

2024-07-28

@@ -434,6 +498,13 @@

Leaderboard

10.51

+ +

+ + + +

+

2024-04-02

@@ -462,6 +533,13 @@

Leaderboard

3.79

+ +

+ + + +

+

2024-04-02

@@ -492,6 +570,13 @@

Leaderboard

1.96

+ +

+ + + +

+

2023-10-10

@@ -520,6 +605,13 @@

Leaderboard

1.31

+ +

+ + + +

+

2024-04-02

@@ -548,6 +640,13 @@

Leaderboard

0.70

+ +

+ + + +

+

2023-10-10

@@ -576,6 +675,13 @@

Leaderboard

0.70

+ +

+ + + +

+

2023-10-10

@@ -604,6 +710,13 @@

Leaderboard

0.17

+ +

+ + + +

+

2023-10-10

@@ -632,6 +745,7 @@

Leaderboard

Model
% Resolved
+
Org
Date
Logs
Trajs
@@ -651,6 +765,13 @@

Leaderboard

49.00

+ +

+ + + +

+

2024-10-22

@@ -682,6 +803,13 @@

Leaderboard

45.40

+ +

+ + + +

+

2024-09-24

@@ -713,6 +841,13 @@

Leaderboard

45.20

+ +

+ + + +

+

2024-08-24

@@ -743,6 +878,13 @@

Leaderboard

43.60

+ +

+ + + +

+

2024-09-20

@@ -773,6 +915,13 @@

Leaderboard

40.60

+ +

+ + + +

+

2024-10-22

@@ -803,6 +952,13 @@

Leaderboard

40.60

+ +

+ + + +

+

2024-08-20

@@ -833,6 +989,13 @@

Leaderboard

40.60

+ +

+ + + +

+

2024-10-16

@@ -863,6 +1026,13 @@

Leaderboard

38.80

+ +

+ + + +

+

2024-07-21

@@ -893,6 +1063,13 @@

Leaderboard

38.40

+ +

+ + + +

+

2024-06-28

@@ -923,6 +1100,13 @@

Leaderboard

37.00

+ +

+ + + +

+

2024-06-17

@@ -953,6 +1137,13 @@

Leaderboard

33.60

+ +

+ + + +

+

2024-06-20

@@ -981,6 +1172,11 @@

Leaderboard

31.60

+ +

+ - +

+

2024-10-07

@@ -1011,6 +1207,13 @@

Leaderboard

28.80

+ +

+ + + +

+

2024-10-02

@@ -1041,6 +1244,13 @@

Leaderboard

27.00

+ +

+ + + +

+

2024-10-16

@@ -1071,6 +1281,13 @@

Leaderboard

26.20

+ +

+ + + +

+

2024-06-15

@@ -1101,6 +1318,11 @@

Leaderboard

25.80

+ +

+ - +

+

2024-10-01

@@ -1131,6 +1353,13 @@

Leaderboard

25.60

+ +

+ + + +

+

2024-05-09

@@ -1161,6 +1390,13 @@

Leaderboard

25.00

+ +

+ + + +

+

2024-09-18

@@ -1191,6 +1427,13 @@

Leaderboard

24.00

+ +

+ + + +

+

2024-08-20

@@ -1221,6 +1464,13 @@

Leaderboard

23.20

+ +

+ + + +

+

2024-07-28

@@ -1251,6 +1501,13 @@

Leaderboard

22.40

+ +

+ + + +

+

2024-04-02

@@ -1281,6 +1538,13 @@

Leaderboard

18.20

+ +

+ + + +

+

2024-04-02

@@ -1309,6 +1573,13 @@

Leaderboard

18.20

+ +

+ + + +

+

2024-10-02

@@ -1339,6 +1610,13 @@

Leaderboard

10.20

+ +

+ + + +

+

2024-09-18

@@ -1369,6 +1647,13 @@

Leaderboard

7.00

+ +

+ + + +

+

2024-04-02

@@ -1399,6 +1684,13 @@

Leaderboard

4.40

+ +

+ + + +

+

2023-10-10

@@ -1427,6 +1719,13 @@

Leaderboard

2.80

+ +

+ + + +

+

2024-04-02

@@ -1455,6 +1754,13 @@

Leaderboard

1.40

+ +

+ + + +

+

2023-10-10

@@ -1483,6 +1789,13 @@

Leaderboard

1.20

+ +

+ + + +

+

2023-10-10

@@ -1511,6 +1824,13 @@

Leaderboard

0.40

+ +

+ + + +

+

2023-10-10

@@ -1539,6 +1859,7 @@

Leaderboard

Model
% Resolved
+
Org
Date
Logs
Trajs
@@ -1558,6 +1879,11 @@

Leaderboard

43.00

+ +

+ - +

+

2024-07-02

@@ -1589,6 +1915,13 @@

Leaderboard

39.33

+ +

+ + + +

+

2024-09-12

@@ -1620,6 +1953,13 @@

Leaderboard

38.33

+ +

+ + + +

+

2024-08-20

@@ -1650,6 +1990,11 @@

Leaderboard

38.00

+ +

+ - +

+

2024-06-27

@@ -1680,6 +2025,13 @@

Leaderboard

35.67

+ +

+ + + +

+

2024-08-11

@@ -1710,6 +2062,11 @@

Leaderboard

35.00

+ +

+ - +

+

2024-08-29

@@ -1740,6 +2097,13 @@

Leaderboard

34.00

+ +

+ + + +

+

2024-08-06

@@ -1770,6 +2134,13 @@

Leaderboard

34.00

+ +

+ + + +

+

2024-07-23

@@ -1800,6 +2171,13 @@

Leaderboard

33.00

+ +

+ + + +

+

2024-06-22

@@ -1830,6 +2208,13 @@

Leaderboard

31.33

+ +

+ + + +

+

2024-06-17

@@ -1860,6 +2245,13 @@

Leaderboard

30.67

+ +

+ + + +

+

2024-06-21

@@ -1890,6 +2282,11 @@

Leaderboard

30.00

+ +

+ - +

+

2024-09-08

@@ -1920,6 +2317,13 @@

Leaderboard

29.67

+ +

+ + + +

+

2024-07-21

@@ -1950,6 +2354,13 @@

Leaderboard

29.67

+ +

+ + + +

+

2024-08-08

@@ -1980,6 +2391,13 @@

Leaderboard

28.33

+ +

+ + + +

+

2024-06-04

@@ -2010,6 +2428,11 @@

Leaderboard

28.00

+ +

+ - +

+

2024-06-12

@@ -2040,6 +2463,11 @@

Leaderboard

27.67

+ +

+ - +

+

2024-07-06

@@ -2070,6 +2498,13 @@

Leaderboard

27.33

+ +

+ + + +

+

2024-06-30

@@ -2100,6 +2535,11 @@

Leaderboard

26.67

+ +

+ - +

+

2024-06-23

@@ -2130,6 +2570,13 @@

Leaderboard

26.67

+ +

+ + + +

+

2024-07-25

@@ -2160,6 +2607,13 @@

Leaderboard

26.67

+ +

+ + + +

+

2024-06-12

@@ -2190,6 +2644,13 @@

Leaderboard

26.33

+ +

+ + + +

+

2024-05-23

@@ -2220,6 +2681,11 @@

Leaderboard

25.33

+ +

+ - +

+

2024-09-25

@@ -2250,6 +2716,11 @@

Leaderboard

24.67

+ +

+ - +

+

2024-06-17

@@ -2280,6 +2751,13 @@

Leaderboard

23.67

+ +

+ + + +

+

2024-10-16

@@ -2310,6 +2788,11 @@

Leaderboard

23.67

+ +

+ - +

+

2024-05-24

@@ -2340,6 +2823,13 @@

Leaderboard

23.00

+ +

+ + + +

+

2024-06-20

@@ -2368,6 +2858,13 @@

Leaderboard

21.67

+ +

+ + + +

+

2024-06-15

@@ -2398,6 +2895,13 @@

Leaderboard

21.67

+ +

+ + + +

+

2024-08-28

@@ -2426,6 +2930,13 @@

Leaderboard

20.33

+ +

+ + + +

+

2024-05-09

@@ -2456,6 +2967,13 @@

Leaderboard

19.00

+ +

+ + + +

+

2024-05-30

@@ -2486,6 +3004,13 @@

Leaderboard

18.33

+ +

+ + + +

+

2024-07-28

@@ -2516,6 +3041,13 @@

Leaderboard

18.00

+ +

+ + + +

+

2024-04-02

@@ -2546,6 +3078,13 @@

Leaderboard

11.67

+ +

+ + + +

+

2024-04-02

@@ -2574,6 +3113,13 @@

Leaderboard

4.33

+ +

+ + + +

+

2024-04-02

@@ -2604,6 +3150,13 @@

Leaderboard

3.00

+ +

+ + + +

+

2023-10-10

@@ -2632,6 +3185,13 @@

Leaderboard

2.67

+ +

+ + + +

+

2024-04-02

@@ -2660,6 +3220,13 @@

Leaderboard

1.33

+ +

+ + + +

+

2023-10-10

@@ -2688,6 +3255,13 @@

Leaderboard

1.00

+ +

+ + + +

+

2023-10-10

@@ -2716,6 +3290,13 @@

Leaderboard

0.33

+ +

+ + + +

+

2023-10-10

diff --git a/template/data.json b/template/data.json index 2c811a0..f071a69 100644 --- a/template/data.json +++ b/template/data.json @@ -12,7 +12,8 @@ "trajs": true, "site": "https://honeycomb.sh/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1826351945021636608/eQgGBRKw_400x400.jpg" }, { "name": "Amazon Q Developer Agent (v20240719-dev)", @@ -23,7 +24,8 @@ "trajs": true, "site": "https://aws.amazon.com/q/developer/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/Amazon_Web_Services_Logo.svg/2560px-Amazon_Web_Services_Logo.svg.png" }, { "name": "Factory Code Droid", @@ -34,7 +36,8 @@ "trajs": false, "site": "https://www.factory.ai/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1768168785733980160/a4hWTSfN_400x400.jpg" }, { "name": "AutoCodeRover (v20240620) + GPT 4o (2024-05-13)", @@ -45,7 +48,8 @@ "trajs": false, "site": "https://autocoderover.dev/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://avatars.githubusercontent.com/u/100131783?s=200&v=4" }, { "name": "SWE-agent + Claude 3.5 Sonnet", @@ -56,7 +60,8 @@ "trajs": true, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "AppMap Navie + GPT 4o (2024-05-13)", @@ -67,7 +72,8 @@ "trajs": false, "site": "https://appmap.io/navie", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://pbs.twimg.com/profile_images/1393240555191160833/q5OUpB3Q_400x400.png" }, { "name": "Amazon Q Developer Agent (v20240430-dev)", @@ -78,7 +84,8 @@ "trajs": false, "site": "https://aws.amazon.com/q/developer/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/Amazon_Web_Services_Logo.svg/2560px-Amazon_Web_Services_Logo.svg.png" }, { "name": "SWE-agent + GPT 4 (1106)", @@ -89,7 +96,8 @@ "trajs": true, "site": "https://github.com/princeton-nlp/SWE-agent", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "SWE-agent + GPT 4o (2024-05-13)", @@ -100,7 +108,8 @@ "trajs": true, "site": "https://github.com/princeton-nlp/SWE-agent", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "SWE-agent + Claude 3 Opus", @@ -111,7 +120,8 @@ "trajs": true, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + Claude 3 Opus", @@ -122,7 +132,8 @@ "trajs": false, "site": "https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/inference", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + Claude 2", @@ -133,7 +144,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + GPT 4 (1106)", @@ -144,7 +156,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + SWE-Llama 13B", @@ -155,7 +168,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + SWE-Llama 7B", @@ -166,7 +180,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + ChatGPT 3.5", @@ -177,7 +192,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" } ] }, @@ -193,7 +209,8 @@ "trajs": true, "site": "https://www.anthropic.com/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1798110641414443008/XP8gyBaY_400x400.jpg" }, { "name": "Solver (2024-09-12)", @@ -204,7 +221,8 @@ "trajs": true, "site": "https://laredolabs.com/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1847427476601053184/L3RCKwMu_400x400.png" }, { "name": "Gru(2024-08-24)", @@ -215,7 +233,8 @@ "trajs": true, "site": "https://gru.ai", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1832709320896917504/FB5KojGT_400x400.png" }, { "name": "Solver (2024-09-12)", @@ -226,7 +245,8 @@ "trajs": true, "site": "https://laredolabs.com/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1847427476601053184/L3RCKwMu_400x400.png" }, { "name": "Tools + Claude 3.5 Haiku", @@ -237,7 +257,8 @@ "trajs": true, "site": "https://www.anthropic.com/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1798110641414443008/XP8gyBaY_400x400.jpg" }, { "name": "Honeycomb", @@ -248,7 +269,8 @@ "trajs": true, "site": "https://honeycomb.sh/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1826351945021636608/eQgGBRKw_400x400.jpg" }, { "name": "Composio SWEkit + Claude 3.5 Sonnet (2024-10-16)", @@ -259,7 +281,8 @@ "trajs": true, "site": "https://github.com/ComposioHQ/composio/tree/master/python/swe/agent", "verified": false, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/128464815?s=48&v=4" }, { "name": "Amazon Q Developer Agent (v20240719-dev)", @@ -270,7 +293,8 @@ "trajs": true, "site": "https://aws.amazon.com/q/developer/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/Amazon_Web_Services_Logo.svg/2560px-Amazon_Web_Services_Logo.svg.png" }, { "name": "AutoCodeRover (v20240620) + GPT 4o (2024-05-13)", @@ -281,7 +305,8 @@ "trajs": false, "site": "https://autocoderover.dev/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://avatars.githubusercontent.com/u/100131783?s=200&v=4" }, { "name": "Factory Code Droid", @@ -292,7 +317,8 @@ "trajs": false, "site": "https://www.factory.ai/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1768168785733980160/a4hWTSfN_400x400.jpg" }, { "name": "SWE-agent + Claude 3.5 Sonnet", @@ -303,7 +329,8 @@ "trajs": true, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "nFactorial (2024-10-07)", @@ -314,7 +341,8 @@ "trajs": true, "site": "https://nfactorial.dev/", "verified": false, - "oss": false + "oss": false, + "org_logo": null }, { "name": "Lingma Agent + Lingma SWE-GPT 72b (v0925)", @@ -325,7 +353,8 @@ "trajs": true, "site": "https://www.modelscope.cn/models/yingwei/Lingma-SWE-GPT (https://www.modelscope.cn/models/yingwei/Lingma-SWE-GPT-v20240925)", "verified": false, - "oss": true + "oss": true, + "org_logo": "https://pbs.twimg.com/profile_images/1567807256091308032/4AXfLwia_400x400.jpg" }, { "name": "EPAM AI/Run Developer Agent + GPT4o", @@ -336,7 +365,8 @@ "trajs": true, "site": "https://www.epam.com/services/artificial-intelligence", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1753265803191795712/qg3YENmV_400x400.png" }, { "name": "AppMap Navie + GPT 4o (2024-05-13)", @@ -347,7 +377,8 @@ "trajs": false, "site": "https://appmap.io/navie", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://pbs.twimg.com/profile_images/1393240555191160833/q5OUpB3Q_400x400.png" }, { "name": "nFactorial (2024-10-01)", @@ -358,7 +389,8 @@ "trajs": true, "site": "https://nfactorial.dev/", "verified": false, - "oss": false + "oss": false, + "org_logo": null }, { "name": "Amazon Q Developer Agent (v20240430-dev)", @@ -369,7 +401,8 @@ "trajs": false, "site": "https://aws.amazon.com/q/developer/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/Amazon_Web_Services_Logo.svg/2560px-Amazon_Web_Services_Logo.svg.png" }, { "name": "Lingma Agent + Lingma SWE-GPT 72b (v0918)", @@ -380,7 +413,8 @@ "trajs": true, "site": "https://www.modelscope.cn/models/yingwei/Lingma-SWE-GPT", "verified": false, - "oss": true + "oss": true, + "org_logo": "https://pbs.twimg.com/profile_images/1567807256091308032/4AXfLwia_400x400.jpg" }, { "name": "EPAM AI/Run Developer Agent + GPT4o", @@ -391,7 +425,8 @@ "trajs": true, "site": "https://www.epam.com/services/artificial-intelligence", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1753265803191795712/qg3YENmV_400x400.png" }, { "name": "SWE-agent + GPT 4o (2024-05-13)", @@ -402,7 +437,8 @@ "trajs": true, "site": "https://github.com/princeton-nlp/SWE-agent", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "SWE-agent + GPT 4 (1106)", @@ -413,7 +449,8 @@ "trajs": true, "site": "https://github.com/princeton-nlp/SWE-agent", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "SWE-agent + Claude 3 Opus", @@ -424,7 +461,8 @@ "trajs": true, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "Lingma Agent + Lingma SWE-GPT 7b (v0925)", @@ -435,7 +473,8 @@ "trajs": true, "site": "https://www.modelscope.cn/models/yingwei/Lingma-SWE-GPT (https://www.modelscope.cn/models/yingwei/Lingma-SWE-GPT-v20240925)", "verified": false, - "oss": true + "oss": true, + "org_logo": "https://pbs.twimg.com/profile_images/1567807256091308032/4AXfLwia_400x400.jpg" }, { "name": "Lingma Agent + Lingma SWE-GPT 7b (v0918)", @@ -446,7 +485,8 @@ "trajs": true, "site": "https://www.modelscope.cn/models/yingwei/Lingma-SWE-GPT", "verified": false, - "oss": true + "oss": true, + "org_logo": "https://pbs.twimg.com/profile_images/1567807256091308032/4AXfLwia_400x400.jpg" }, { "name": "RAG + Claude 3 Opus", @@ -457,7 +497,8 @@ "trajs": false, "site": "https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/inference", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + Claude 2", @@ -468,7 +509,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + GPT 4 (1106)", @@ -479,7 +521,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + SWE-Llama 7B", @@ -490,7 +533,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + SWE-Llama 13B", @@ -501,7 +545,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + ChatGPT 3.5", @@ -512,7 +557,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" } ] }, @@ -528,7 +574,8 @@ "trajs": false, "site": "https://aide.dev/", "verified": false, - "oss": false + "oss": false, + "org_logo": null }, { "name": "Bytedance MarsCode Agent", @@ -539,7 +586,8 @@ "trajs": true, "site": "https://www.marscode.com/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1768232487271153664/etPzHPm1_400x400.jpg" }, { "name": "Honeycomb", @@ -550,7 +598,8 @@ "trajs": true, "site": "https://honeycomb.sh/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1826351945021636608/eQgGBRKw_400x400.jpg" }, { "name": "AbanteAI MentatBot + GPT 4o (2024-05-13)", @@ -561,7 +610,8 @@ "trajs": false, "site": "https://mentat.ai/blog/mentatbot-sota-coding-agent", "verified": false, - "oss": false + "oss": false, + "org_logo": null }, { "name": "Gru(2024-08-11)", @@ -572,7 +622,8 @@ "trajs": true, "site": "https://gru.ai", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1832709320896917504/FB5KojGT_400x400.png" }, { "name": "Isoform", @@ -583,7 +634,8 @@ "trajs": true, "site": "https://isoform.ai", "verified": false, - "oss": false + "oss": false, + "org_logo": null }, { "name": "SuperCoder2.0", @@ -594,7 +646,8 @@ "trajs": true, "site": "https://superagi.com/supercoder/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1678659510041456640/rxUIfulT_400x400.jpg" }, { "name": "Bytedance MarsCode Agent + GPT 4o (2024-05-13)", @@ -605,7 +658,8 @@ "trajs": false, "site": "https://www.marscode.com/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1768232487271153664/etPzHPm1_400x400.jpg" }, { "name": "Alibaba Lingma Agent", @@ -616,7 +670,8 @@ "trajs": true, "site": "https://arxiv.org/abs/2406.01422", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1567807256091308032/4AXfLwia_400x400.jpg" }, { "name": "Factory Code Droid", @@ -627,7 +682,8 @@ "trajs": false, "site": "https://www.factory.ai/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1768168785733980160/a4hWTSfN_400x400.jpg" }, { "name": "AutoCodeRover (v20240620) + GPT 4o (2024-05-13)", @@ -638,7 +694,8 @@ "trajs": true, "site": "https://autocoderover.dev/", "verified": false, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/100131783?s=200&v=4" }, { "name": "AIGCode Infant-Coder(2024-08-30)", @@ -649,7 +706,8 @@ "trajs": true, "site": "https://aigcode.net/", "verified": false, - "oss": false + "oss": false, + "org_logo": null }, { "name": "Amazon Q Developer Agent (v20240719-dev)", @@ -660,7 +718,8 @@ "trajs": true, "site": "https://aws.amazon.com/q/developer/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/Amazon_Web_Services_Logo.svg/2560px-Amazon_Web_Services_Logo.svg.png" }, { "name": "Agentless + RepoGraph + GPT-4o", @@ -671,7 +730,8 @@ "trajs": true, "site": "https://github.com/ozyyshr/RepoGraph", "verified": false, - "oss": true + "oss": true, + "org_logo": "https://brand.illinois.edu/wp-content/uploads/2024/02/Color-Variation-Orange-Block-I-White-Background.png" }, { "name": "CodeR + GPT 4 (1106)", @@ -682,7 +742,8 @@ "trajs": false, "site": "https://github.com/NL2Code/CodeR", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://upload.wikimedia.org/wikipedia/en/thumb/0/04/Huawei_Standard_logo.svg/1200px-Huawei_Standard_logo.svg.png" }, { "name": "MASAI + GPT 4o (2024-05-13)", @@ -693,7 +754,8 @@ "trajs": false, "site": "https://github.com/masai-dev-agent/masai", "verified": false, - "oss": false + "oss": false, + "org_logo": null }, { "name": "SIMA + GPT 4o (2024-05-13)", @@ -704,7 +766,8 @@ "trajs": true, "site": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240706_sima_gpt4o", "verified": false, - "oss": false + "oss": false, + "org_logo": null }, { "name": "Agentless + GPT 4o (2024-05-13)", @@ -715,7 +778,8 @@ "trajs": false, "site": "https://github.com/OpenAutoCoder/Agentless", "verified": false, - "oss": true + "oss": true, + "org_logo": "https://brand.illinois.edu/wp-content/uploads/2024/02/Color-Variation-Orange-Block-I-White-Background.png" }, { "name": "Moatless Tools + Claude 3.5 Sonnet", @@ -726,7 +790,8 @@ "trajs": true, "site": "https://github.com/aorwall/moatless-tools", "verified": true, - "oss": true + "oss": true, + "org_logo": null }, { "name": "OpenDevin + CodeAct v1.8", @@ -737,7 +802,8 @@ "trajs": true, "site": "https://docs.all-hands.dev/", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://pbs.twimg.com/profile_images/1825651863292882944/KQwkuu7P_400x400.jpg" }, { "name": "IBM Research Agent-101", @@ -748,7 +814,8 @@ "trajs": false, "site": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240612_IBM_Research_Agent101", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQPBss61ZSzmBM75JqdYCVqsfqJ4JijX0mt9g&s" }, { "name": "Aider + GPT 4o & Claude 3 Opus", @@ -759,7 +826,8 @@ "trajs": false, "site": "https://github.com/paul-gauthier/aider", "verified": false, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/172139148?s=48&v=4" }, { "name": "HyperAgent", @@ -770,7 +838,8 @@ "trajs": true, "site": "https://arxiv.org/abs/2409.16299", "verified": false, - "oss": false + "oss": false, + "org_logo": null }, { "name": "Moatless Tools + GPT 4o (2024-05-13)", @@ -781,7 +850,8 @@ "trajs": true, "site": "https://github.com/aorwall/moatless-tools", "verified": true, - "oss": true + "oss": true, + "org_logo": null }, { "name": "IBM SWE-1.0 (with open LLMs)", @@ -792,7 +862,8 @@ "trajs": true, "site": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20241016_IBM-SWE-1.0", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQPBss61ZSzmBM75JqdYCVqsfqJ4JijX0mt9g&s" }, { "name": "OpenCSG StarShip CodeGenAgent + GPT 4 (0613)", @@ -803,7 +874,8 @@ "trajs": false, "site": "https://opencsg.com/product?class=StarShip", "verified": false, - "oss": false + "oss": false, + "org_logo": null }, { "name": "SWE-agent + Claude 3.5 Sonnet", @@ -814,7 +886,8 @@ "trajs": true, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "AppMap Navie + GPT 4o (2024-05-13)", @@ -825,7 +898,8 @@ "trajs": false, "site": "https://appmap.io/navie", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://pbs.twimg.com/profile_images/1393240555191160833/q5OUpB3Q_400x400.png" }, { "name": "Bytedance AutoSE (based on SWE-Agent) + GPT4/GPT4o Mixed (20240828)", @@ -836,7 +910,8 @@ "trajs": true, "site": null, "verified": false, - "oss": false + "oss": false, + "org_logo": "https://pbs.twimg.com/profile_images/1768232487271153664/etPzHPm1_400x400.jpg" }, { "name": "Amazon Q Developer Agent (v20240430-dev)", @@ -847,7 +922,8 @@ "trajs": false, "site": "https://aws.amazon.com/q/developer/", "verified": false, - "oss": false + "oss": false, + "org_logo": "https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/Amazon_Web_Services_Logo.svg/2560px-Amazon_Web_Services_Logo.svg.png" }, { "name": "AutoCodeRover (v20240408) + GPT 4 (0125)", @@ -858,7 +934,8 @@ "trajs": false, "site": "https://github.com/nus-apr/auto-code-rover", "verified": false, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/100131783?s=200&v=4" }, { "name": "SWE-agent + GPT 4o (2024-05-13)", @@ -869,7 +946,8 @@ "trajs": true, "site": "https://github.com/princeton-nlp/SWE-agent", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "SWE-agent + GPT 4 (1106)", @@ -880,7 +958,8 @@ "trajs": true, "site": "https://github.com/princeton-nlp/SWE-agent", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "SWE-agent + Claude 3 Opus", @@ -891,7 +970,8 @@ "trajs": true, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + Claude 3 Opus", @@ -902,7 +982,8 @@ "trajs": false, "site": "https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/inference", "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + Claude 2", @@ -913,7 +994,8 @@ "trajs": true, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + GPT 4 (1106)", @@ -924,7 +1006,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + SWE-Llama 7B", @@ -935,7 +1018,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + SWE-Llama 13B", @@ -946,7 +1030,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" }, { "name": "RAG + ChatGPT 3.5", @@ -957,7 +1042,8 @@ "trajs": false, "site": null, "verified": true, - "oss": true + "oss": true, + "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4" } ] } diff --git a/template/template_index.html b/template/template_index.html index a440605..9dc16b2 100644 --- a/template/template_index.html +++ b/template/template_index.html @@ -145,6 +145,7 @@

Leaderboard

Model
% Resolved
+
Org
Date
Logs
Trajs
@@ -166,6 +167,13 @@

Leaderboard

{{ "%.2f"|format(item.resolved|float) }}

+ +

+ {% if item.org_logo %} + + {% else %} - {% endif %} +

+

{{item.date}}