Spaces:
Running
Running
update claude 4.5 flores results
Browse files
data/leaderboard_json/afrobench_lite.json
CHANGED
|
@@ -21,8 +21,8 @@
|
|
| 21 |
"Gemini-2.5 Pro": 72.7,
|
| 22 |
"Gemini-2.5 Flash": 69.9,
|
| 23 |
"Claude 4.5 Sonnet": 71.7,
|
| 24 |
-
"Claude
|
| 25 |
-
"Claude
|
| 26 |
}
|
| 27 |
},
|
| 28 |
"Intent": {
|
|
@@ -47,8 +47,8 @@
|
|
| 47 |
"Gemini-2.5 Pro": 88.3,
|
| 48 |
"Gemini-2.5 Flash": 87.9,
|
| 49 |
"Claude 4.5 Sonnet": 81.6,
|
| 50 |
-
"Claude
|
| 51 |
-
"Claude
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"MT(en/fr-xx)": {
|
|
@@ -73,8 +73,8 @@
|
|
| 73 |
"Gemini-2.5 Pro": 47.4,
|
| 74 |
"Gemini-2.5 Flash": 46.5,
|
| 75 |
"Claude 4.5 Sonnet": 46.6,
|
| 76 |
-
"Claude
|
| 77 |
-
"Claude
|
| 78 |
}
|
| 79 |
},
|
| 80 |
"MMLU": {
|
|
@@ -99,8 +99,8 @@
|
|
| 99 |
"Gemini-2.5 Pro": 78.2,
|
| 100 |
"Gemini-2.5 Flash": 67.7,
|
| 101 |
"Claude 4.5 Sonnet": 58.6,
|
| 102 |
-
"Claude
|
| 103 |
-
"Claude
|
| 104 |
}
|
| 105 |
},
|
| 106 |
"Math": {
|
|
@@ -125,8 +125,8 @@
|
|
| 125 |
"Gemini-2.5 Pro": 74.4,
|
| 126 |
"Gemini-2.5 Flash": 70.6,
|
| 127 |
"Claude 4.5 Sonnet": 73.1,
|
| 128 |
-
"Claude
|
| 129 |
-
"Claude
|
| 130 |
}
|
| 131 |
},
|
| 132 |
"Topic": {
|
|
@@ -151,8 +151,8 @@
|
|
| 151 |
"Gemini-2.5 Pro": 88.1,
|
| 152 |
"Gemini-2.5 Flash": 87.2,
|
| 153 |
"Claude 4.5 Sonnet": 84.2,
|
| 154 |
-
"Claude
|
| 155 |
-
"Claude
|
| 156 |
}
|
| 157 |
},
|
| 158 |
"RC": {
|
|
@@ -177,8 +177,8 @@
|
|
| 177 |
"Gemini-2.5 Pro": 76.7,
|
| 178 |
"Gemini-2.5 Flash": 42.2,
|
| 179 |
"Claude 4.5 Sonnet": 74.8,
|
| 180 |
-
"Claude
|
| 181 |
-
"Claude
|
| 182 |
}
|
| 183 |
}
|
| 184 |
}
|
|
|
|
| 21 |
"Gemini-2.5 Pro": 72.7,
|
| 22 |
"Gemini-2.5 Flash": 69.9,
|
| 23 |
"Claude 4.5 Sonnet": 71.7,
|
| 24 |
+
"Claude 3.7 Sonnet": 59.8,
|
| 25 |
+
"Claude 4.0 Sonnet": 68.1
|
| 26 |
}
|
| 27 |
},
|
| 28 |
"Intent": {
|
|
|
|
| 47 |
"Gemini-2.5 Pro": 88.3,
|
| 48 |
"Gemini-2.5 Flash": 87.9,
|
| 49 |
"Claude 4.5 Sonnet": 81.6,
|
| 50 |
+
"Claude 3.7 Sonnet": 73.4,
|
| 51 |
+
"Claude 4.0 Sonnet": 80.4
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"MT(en/fr-xx)": {
|
|
|
|
| 73 |
"Gemini-2.5 Pro": 47.4,
|
| 74 |
"Gemini-2.5 Flash": 46.5,
|
| 75 |
"Claude 4.5 Sonnet": 46.6,
|
| 76 |
+
"Claude 3.7 Sonnet": 44.0,
|
| 77 |
+
"Claude 4.0 Sonnet": 46.0
|
| 78 |
}
|
| 79 |
},
|
| 80 |
"MMLU": {
|
|
|
|
| 99 |
"Gemini-2.5 Pro": 78.2,
|
| 100 |
"Gemini-2.5 Flash": 67.7,
|
| 101 |
"Claude 4.5 Sonnet": 58.6,
|
| 102 |
+
"Claude 3.7 Sonnet": 66.7,
|
| 103 |
+
"Claude 4.0 Sonnet": 75.5
|
| 104 |
}
|
| 105 |
},
|
| 106 |
"Math": {
|
|
|
|
| 125 |
"Gemini-2.5 Pro": 74.4,
|
| 126 |
"Gemini-2.5 Flash": 70.6,
|
| 127 |
"Claude 4.5 Sonnet": 73.1,
|
| 128 |
+
"Claude 3.7 Sonnet": 35.2,
|
| 129 |
+
"Claude 4.0 Sonnet": 66.9
|
| 130 |
}
|
| 131 |
},
|
| 132 |
"Topic": {
|
|
|
|
| 151 |
"Gemini-2.5 Pro": 88.1,
|
| 152 |
"Gemini-2.5 Flash": 87.2,
|
| 153 |
"Claude 4.5 Sonnet": 84.2,
|
| 154 |
+
"Claude 3.7 Sonnet": 84.9,
|
| 155 |
+
"Claude 4.0 Sonnet": 83.2
|
| 156 |
}
|
| 157 |
},
|
| 158 |
"RC": {
|
|
|
|
| 177 |
"Gemini-2.5 Pro": 76.7,
|
| 178 |
"Gemini-2.5 Flash": 42.2,
|
| 179 |
"Claude 4.5 Sonnet": 74.8,
|
| 180 |
+
"Claude 3.7 Sonnet": 65.1,
|
| 181 |
+
"Claude 4.0 Sonnet": 76.2
|
| 182 |
}
|
| 183 |
}
|
| 184 |
}
|