diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -3,30 +3,30 @@ { "task": "classification", "metric": "accuracy", - "score": 0.5671296296296297, - "bcp_47": 42, - "model": 4 + "score": 0.5722222222222223, + "bcp_47": 10, + "model": 5 }, { "task": "language_modeling", "metric": "chrf", - "score": 0.9370067182020132, - "bcp_47": 42, - "model": 4 + "score": 0.9360730990265229, + "bcp_47": 10, + "model": 5 }, { "task": "translation", "metric": "bleu", - "score": 0.27639322299355507, - "bcp_47": 42, - "model": 4 + "score": 0.3508793079301233, + "bcp_47": 10, + "model": 5 }, { "task": "translation", "metric": "chrf", - "score": 0.44833214505324764, - "bcp_47": 42, - "model": 4 + "score": 0.5117214627054559, + "bcp_47": 10, + "model": 5 } ], "models": [ @@ -34,113 +34,141 @@ "model": "google/gemini-2.0-flash-001", "task": "classification", "metric": "accuracy", - "score": 0.8700000000000001, - "bcp_47": 10 + "score": 0.8666666666666667, + "bcp_47": 2 }, { "model": "google/gemini-2.0-flash-001", "task": "language_modeling", "metric": "chrf", - "score": 0.9555263008516499, - "bcp_47": 10 + "score": 0.9625160007216136, + "bcp_47": 2 }, { "model": "google/gemini-2.0-flash-001", "task": "translation", "metric": "bleu", - "score": 0.36418986768393896, - "bcp_47": 10 + "score": 0.4523562354788243, + "bcp_47": 2 }, { "model": "google/gemini-2.0-flash-001", "task": "translation", "metric": "chrf", - "score": 0.5257896349203012, - "bcp_47": 10 + "score": 0.5828490054615683, + "bcp_47": 2 + }, + { + "model": "google/gemma-3-27b-it", + "task": "classification", + "metric": "accuracy", + "score": 0.7166666666666666, + "bcp_47": 2 + }, + { + "model": "google/gemma-3-27b-it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9567943176484227, + "bcp_47": 2 + }, + { + "model": "google/gemma-3-27b-it", + "task": "translation", + "metric": "bleu", + "score": 0.3748623797480871, + "bcp_47": 2 + }, + { + "model": "google/gemma-3-27b-it", + "task": "translation", + "metric": "chrf", + "score": 0.5376336154503363, + "bcp_47": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "task": "classification", "metric": "accuracy", - "score": 0.5087301587301587, - "bcp_47": 42 + "score": 0.5, + "bcp_47": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "task": "language_modeling", "metric": "chrf", - "score": 0.9382523459554147, - "bcp_47": 42 + "score": 0.9402702238563417, + "bcp_47": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "task": "translation", "metric": "bleu", - "score": 0.2511829155327205, - "bcp_47": 42 + "score": 0.3116623954547766, + "bcp_47": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "task": "translation", "metric": "chrf", - "score": 0.4313353245284424, - "bcp_47": 42 + "score": 0.4836914110309717, + "bcp_47": 10 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "task": "classification", "metric": "accuracy", - "score": 0.5666666666666667, - "bcp_47": 10 + "score": 0.55, + "bcp_47": 2 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "task": "language_modeling", "metric": "chrf", - "score": 0.9046559534191477, - "bcp_47": 10 + "score": 0.8557257213273853, + "bcp_47": 2 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "task": "translation", "metric": "bleu", - "score": 0.2612972853503233, - "bcp_47": 10 + "score": 0.37837115628691054, + "bcp_47": 2 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "task": "translation", "metric": "chrf", - "score": 0.41659663566434596, - "bcp_47": 10 + "score": 0.5213024503486642, + "bcp_47": 2 }, { "model": "openai/gpt-4o-mini", "task": "classification", "metric": "accuracy", - "score": 0.51, - "bcp_47": 10 + "score": 0.5166666666666666, + "bcp_47": 2 }, { "model": "openai/gpt-4o-mini", "task": "language_modeling", "metric": "chrf", - "score": 0.9456062637709552, - "bcp_47": 10 + "score": 0.9482707322595748, + "bcp_47": 2 }, { "model": "openai/gpt-4o-mini", "task": "translation", "metric": "bleu", - "score": 0.30957580728190814, - "bcp_47": 10 + "score": 0.3940120225834043, + "bcp_47": 2 }, { "model": "openai/gpt-4o-mini", "task": "translation", "metric": "chrf", - "score": 0.4739968107792783, - "bcp_47": 10 + "score": 0.5452510379336759, + "bcp_47": 2 } ], "languages": [ @@ -335,52 +363,10 @@ "commonvoice_hours": 1.8, "commonvoice_locale": "am", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.36666666666666664, - "model": 1.0 - }, - { - "bcp_47": "am", - "speakers": 35728475, - "language_name": "Amharic", - "flores_path": "amh_Ethi", - "fleurs_tag": "am_et", - "commonvoice_hours": 1.8, - "commonvoice_locale": "am", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9031129205404201, - "model": 1.0 - }, - { - "bcp_47": "am", - "speakers": 35728475, - "language_name": "Amharic", - "flores_path": "amh_Ethi", - "fleurs_tag": "am_et", - "commonvoice_hours": 1.8, - "commonvoice_locale": "am", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.15002522598066087, - "model": 1.0 - }, - { - "bcp_47": "am", - "speakers": 35728475, - "language_name": "Amharic", - "flores_path": "amh_Ethi", - "fleurs_tag": "am_et", - "commonvoice_hours": 1.8, - "commonvoice_locale": "am", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.3344555209113584, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "amo", @@ -463,8 +449,8 @@ "in_benchmark": true, "task": "classification", "metric": "accuracy", - "score": 0.6666666666666666, - "model": 4.0 + "score": 0.43333333333333335, + "model": 1.0 }, { "bcp_47": "ar", @@ -477,8 +463,8 @@ "in_benchmark": true, "task": "language_modeling", "metric": "chrf", - "score": 0.9505580315120445, - "model": 4.0 + "score": 0.9307208363594199, + "model": 1.0 }, { "bcp_47": "ar", @@ -491,8 +477,8 @@ "in_benchmark": true, "task": "translation", "metric": "bleu", - "score": 0.272695032954275, - "model": 4.0 + "score": 0.2837250166554738, + "model": 1.0 }, { "bcp_47": "ar", @@ -505,8 +491,8 @@ "in_benchmark": true, "task": "translation", "metric": "chrf", - "score": 0.45513349114063173, - "model": 4.0 + "score": 0.4684314458952127, + "model": 1.0 }, { "bcp_47": "arn", @@ -587,52 +573,10 @@ "commonvoice_hours": null, "commonvoice_locale": null, "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "model": 1.0 - }, - { - "bcp_47": "arz", - "speakers": 66639360, - "language_name": "Egyptian Arabic", - "flores_path": "arz_Arab", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9319821809429131, - "model": 1.0 - }, - { - "bcp_47": "arz", - "speakers": 66639360, - "language_name": "Egyptian Arabic", - "flores_path": "arz_Arab", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.20127691717583826, - "model": 1.0 - }, - { - "bcp_47": "arz", - "speakers": 66639360, - "language_name": "Egyptian Arabic", - "flores_path": "arz_Arab", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.38780198426317497, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "as", @@ -741,52 +685,10 @@ "commonvoice_hours": 0.5, "commonvoice_locale": "az", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "model": 1.0 - }, - { - "bcp_47": "az", - "speakers": 32446682, - "language_name": "Azerbaijani", - "flores_path": "azj_Latn", - "fleurs_tag": "az_az", - "commonvoice_hours": 0.5, - "commonvoice_locale": "az", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9439975775415789, - "model": 1.0 - }, - { - "bcp_47": "az", - "speakers": 32446682, - "language_name": "Azerbaijani", - "flores_path": "azj_Latn", - "fleurs_tag": "az_az", - "commonvoice_hours": 0.5, - "commonvoice_locale": "az", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.18364881639775618, - "model": 1.0 - }, - { - "bcp_47": "az", - "speakers": 32446682, - "language_name": "Azerbaijani", - "flores_path": "azj_Latn", - "fleurs_tag": "az_az", - "commonvoice_hours": 0.5, - "commonvoice_locale": "az", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.3791669348856303, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "ba", @@ -1147,52 +1049,10 @@ "commonvoice_hours": null, "commonvoice_locale": null, "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.3333333333333333, - "model": 1.0 - }, - { - "bcp_47": "bho", - "speakers": 32934797, - "language_name": "Bhojpuri", - "flores_path": "bho_Deva", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9334228689163548, - "model": 1.0 - }, - { - "bcp_47": "bho", - "speakers": 32934797, - "language_name": "Bhojpuri", - "flores_path": "bho_Deva", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.24092898437545654, - "model": 1.0 - }, - { - "bcp_47": "bho", - "speakers": 32934797, - "language_name": "Bhojpuri", - "flores_path": "bho_Deva", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.41894143077328727, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "bi", @@ -1387,8 +1247,8 @@ "in_benchmark": true, "task": "classification", "metric": "accuracy", - "score": 0.5, - "model": 4.0 + "score": 0.4, + "model": 1.0 }, { "bcp_47": "bn", @@ -1401,8 +1261,8 @@ "in_benchmark": true, "task": "language_modeling", "metric": "chrf", - "score": 0.8930948406963347, - "model": 4.0 + "score": 0.8990036074617607, + "model": 1.0 }, { "bcp_47": "bn", @@ -1415,8 +1275,8 @@ "in_benchmark": true, "task": "translation", "metric": "bleu", - "score": 0.22867663633876684, - "model": 4.0 + "score": 0.20360260890869705, + "model": 1.0 }, { "bcp_47": "bn", @@ -1429,8 +1289,8 @@ "in_benchmark": true, "task": "translation", "metric": "chrf", - "score": 0.40541764389945456, - "model": 4.0 + "score": 0.4076175886917154, + "model": 1.0 }, { "bcp_47": "bo", @@ -2253,52 +2113,10 @@ "commonvoice_hours": 1359.0, "commonvoice_locale": "de", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6666666666666666, - "model": 1.0 - }, - { - "bcp_47": "de", - "speakers": 136350226, - "language_name": "German", - "flores_path": "deu_Latn", - "fleurs_tag": "de_de", - "commonvoice_hours": 1359.0, - "commonvoice_locale": "de", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9788487181545229, - "model": 1.0 - }, - { - "bcp_47": "de", - "speakers": 136350226, - "language_name": "German", - "flores_path": "deu_Latn", - "fleurs_tag": "de_de", - "commonvoice_hours": 1359.0, - "commonvoice_locale": "de", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.31823711841849855, - "model": 1.0 - }, - { - "bcp_47": "de", - "speakers": 136350226, - "language_name": "German", - "flores_path": "deu_Latn", - "fleurs_tag": "de_de", - "commonvoice_hours": 1359.0, - "commonvoice_locale": "de", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.49174647549269207, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "den", @@ -2577,8 +2395,8 @@ "in_benchmark": true, "task": "classification", "metric": "accuracy", - "score": 0.5916666666666667, - "model": 4.0 + "score": 0.6466666666666667, + "model": 5.0 }, { "bcp_47": "en", @@ -2591,8 +2409,8 @@ "in_benchmark": true, "task": "language_modeling", "metric": "chrf", - "score": 0.9225354470067261, - "model": 4.0 + "score": 0.9391757502550891, + "model": 5.0 }, { "bcp_47": "en", @@ -2605,8 +2423,8 @@ "in_benchmark": true, "task": "translation", "metric": "bleu", - "score": 0.4289895608958245, - "model": 4.0 + "score": 0.4274883186793429, + "model": 5.0 }, { "bcp_47": "en", @@ -2619,8 +2437,8 @@ "in_benchmark": true, "task": "translation", "metric": "chrf", - "score": 0.5437014634491315, - "model": 4.0 + "score": 0.5456627250056182, + "model": 5.0 }, { "bcp_47": "eo", @@ -2647,8 +2465,8 @@ "in_benchmark": true, "task": "classification", "metric": "accuracy", - "score": 0.6583333333333333, - "model": 4.0 + "score": 0.5333333333333333, + "model": 1.0 }, { "bcp_47": "es", @@ -2661,8 +2479,8 @@ "in_benchmark": true, "task": "language_modeling", "metric": "chrf", - "score": 0.9561525839092082, - "model": 4.0 + "score": 0.9602736376353148, + "model": 1.0 }, { "bcp_47": "es", @@ -2675,8 +2493,8 @@ "in_benchmark": true, "task": "translation", "metric": "bleu", - "score": 0.2984596316936459, - "model": 4.0 + "score": 0.283646935447629, + "model": 1.0 }, { "bcp_47": "es", @@ -2689,8 +2507,8 @@ "in_benchmark": true, "task": "translation", "metric": "chrf", - "score": 0.46243613105820586, - "model": 4.0 + "score": 0.46056393670415496, + "model": 1.0 }, { "bcp_47": "esu", @@ -2771,52 +2589,10 @@ "commonvoice_hours": 370.0, "commonvoice_locale": "fa", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "model": 1.0 - }, - { - "bcp_47": "fa", - "speakers": 84710459, - "language_name": "Persian", - "flores_path": "pes_Arab", - "fleurs_tag": "fa_ir", - "commonvoice_hours": 370.0, - "commonvoice_locale": "fa", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9396649061437635, - "model": 1.0 - }, - { - "bcp_47": "fa", - "speakers": 84710459, - "language_name": "Persian", - "flores_path": "pes_Arab", - "fleurs_tag": "fa_ir", - "commonvoice_hours": 370.0, - "commonvoice_locale": "fa", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.2516753344674677, - "model": 1.0 - }, - { - "bcp_47": "fa", - "speakers": 84710459, - "language_name": "Persian", - "flores_path": "pes_Arab", - "fleurs_tag": "fa_ir", - "commonvoice_hours": 370.0, - "commonvoice_locale": "fa", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4448545956789697, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "fan", @@ -2911,52 +2687,10 @@ "commonvoice_hours": 0.0, "commonvoice_locale": "tl", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "model": 1.0 - }, - { - "bcp_47": "fil", - "speakers": 67471096, - "language_name": "Filipino", - "flores_path": "fil_Latn", - "fleurs_tag": "fil_ph", - "commonvoice_hours": 0.0, - "commonvoice_locale": "tl", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9553160038255677, - "model": 1.0 - }, - { - "bcp_47": "fil", - "speakers": 67471096, - "language_name": "Filipino", - "flores_path": "fil_Latn", - "fleurs_tag": "fil_ph", - "commonvoice_hours": 0.0, - "commonvoice_locale": "tl", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.3248927726984041, - "model": 1.0 - }, - { - "bcp_47": "fil", - "speakers": 67471096, - "language_name": "Filipino", - "flores_path": "fil_Latn", - "fleurs_tag": "fil_ph", - "commonvoice_hours": 0.0, - "commonvoice_locale": "tl", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4689020729383555, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "fit", @@ -3025,8 +2759,8 @@ "in_benchmark": true, "task": "classification", "metric": "accuracy", - "score": 0.7166666666666667, - "model": 4.0 + "score": 0.5333333333333333, + "model": 1.0 }, { "bcp_47": "fr", @@ -3039,8 +2773,8 @@ "in_benchmark": true, "task": "language_modeling", "metric": "chrf", - "score": 0.9718447009224642, - "model": 4.0 + "score": 0.9756346055321089, + "model": 1.0 }, { "bcp_47": "fr", @@ -3053,8 +2787,8 @@ "in_benchmark": true, "task": "translation", "metric": "bleu", - "score": 0.3187714613657571, - "model": 4.0 + "score": 0.3193235920661593, + "model": 1.0 }, { "bcp_47": "fr", @@ -3067,8 +2801,8 @@ "in_benchmark": true, "task": "translation", "metric": "chrf", - "score": 0.4845347359298269, - "model": 4.0 + "score": 0.4875691290722964, + "model": 1.0 }, { "bcp_47": "frc", @@ -3527,52 +3261,10 @@ "commonvoice_hours": 0.0, "commonvoice_locale": "gu-IN", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "model": 1.0 - }, - { - "bcp_47": "gu", - "speakers": 61721799, - "language_name": "Gujarati", - "flores_path": "guj_Gujr", - "fleurs_tag": "gu_in", - "commonvoice_hours": 0.0, - "commonvoice_locale": "gu-IN", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9421488818900206, - "model": 1.0 - }, - { - "bcp_47": "gu", - "speakers": 61721799, - "language_name": "Gujarati", - "flores_path": "guj_Gujr", - "fleurs_tag": "gu_in", - "commonvoice_hours": 0.0, - "commonvoice_locale": "gu-IN", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.24812610549809738, - "model": 1.0 - }, - { - "bcp_47": "gu", - "speakers": 61721799, - "language_name": "Gujarati", - "flores_path": "guj_Gujr", - "fleurs_tag": "gu_in", - "commonvoice_hours": 0.0, - "commonvoice_locale": "gu-IN", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4318359636701651, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "gub", @@ -3636,7 +3328,7 @@ "language_name": "Manx", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 8.6, + "commonvoice_hours": 11.0, "commonvoice_locale": "gv", "in_benchmark": false, "task": null, @@ -3681,52 +3373,10 @@ "commonvoice_hours": 4.1, "commonvoice_locale": "ha", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "model": 1.0 - }, - { - "bcp_47": "ha", - "speakers": 40411882, - "language_name": "Hausa", - "flores_path": "hau_Latn", - "fleurs_tag": "ha_ng", - "commonvoice_hours": 4.1, - "commonvoice_locale": "ha", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.945704338611724, - "model": 1.0 - }, - { - "bcp_47": "ha", - "speakers": 40411882, - "language_name": "Hausa", - "flores_path": "hau_Latn", - "fleurs_tag": "ha_ng", - "commonvoice_hours": 4.1, - "commonvoice_locale": "ha", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.14767816277169443, - "model": 1.0 - }, - { - "bcp_47": "ha", - "speakers": 40411882, - "language_name": "Hausa", - "flores_path": "hau_Latn", - "fleurs_tag": "ha_ng", - "commonvoice_hours": 4.1, - "commonvoice_locale": "ha", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.34353973347368816, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "hak", @@ -3795,8 +3445,8 @@ "in_benchmark": true, "task": "classification", "metric": "accuracy", - "score": 0.6083333333333334, - "model": 4.0 + "score": 0.5333333333333333, + "model": 1.0 }, { "bcp_47": "hi", @@ -3809,8 +3459,8 @@ "in_benchmark": true, "task": "language_modeling", "metric": "chrf", - "score": 0.935107298962707, - "model": 4.0 + "score": 0.9489245079595486, + "model": 1.0 }, { "bcp_47": "hi", @@ -3823,8 +3473,8 @@ "in_benchmark": true, "task": "translation", "metric": "bleu", - "score": 0.29263068330216335, - "model": 4.0 + "score": 0.31956422674397006, + "model": 1.0 }, { "bcp_47": "hi", @@ -3837,8 +3487,8 @@ "in_benchmark": true, "task": "translation", "metric": "chrf", - "score": 0.45579369454961405, - "model": 4.0 + "score": 0.4896277852320754, + "model": 1.0 }, { "bcp_47": "hif", @@ -4143,66 +3793,24 @@ "commonvoice_hours": 33.0, "commonvoice_locale": "id", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { - "bcp_47": "id", - "speakers": 171207687, - "language_name": "Indonesian", - "flores_path": "ind_Latn", - "fleurs_tag": "id_id", - "commonvoice_hours": 33.0, - "commonvoice_locale": "id", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9441891794331471, - "model": 1.0 - }, - { - "bcp_47": "id", - "speakers": 171207687, - "language_name": "Indonesian", - "flores_path": "ind_Latn", - "fleurs_tag": "id_id", - "commonvoice_hours": 33.0, - "commonvoice_locale": "id", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.28485630651027877, - "model": 1.0 - }, - { - "bcp_47": "id", - "speakers": 171207687, - "language_name": "Indonesian", - "flores_path": "ind_Latn", - "fleurs_tag": "id_id", - "commonvoice_hours": 33.0, - "commonvoice_locale": "id", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4561447909933971, - "model": 1.0 - }, - { - "bcp_47": "ie", - "speakers": 1, - "language_name": "Interlingue", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "ie", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null + "bcp_47": "ie", + "speakers": 1, + "language_name": "Interlingue", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ie", + "in_benchmark": false, + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "ife", @@ -4325,52 +3933,10 @@ "commonvoice_hours": 362.0, "commonvoice_locale": "it", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "model": 1.0 - }, - { - "bcp_47": "it", - "speakers": 70247060, - "language_name": "Italian", - "flores_path": "ita_Latn", - "fleurs_tag": "it_it", - "commonvoice_hours": 362.0, - "commonvoice_locale": "it", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.96965822717479, - "model": 1.0 - }, - { - "bcp_47": "it", - "speakers": 70247060, - "language_name": "Italian", - "flores_path": "ita_Latn", - "fleurs_tag": "it_it", - "commonvoice_hours": 362.0, - "commonvoice_locale": "it", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.27748101044407486, - "model": 1.0 - }, - { - "bcp_47": "it", - "speakers": 70247060, - "language_name": "Italian", - "flores_path": "ita_Latn", - "fleurs_tag": "it_it", - "commonvoice_hours": 362.0, - "commonvoice_locale": "it", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.46396483435604213, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "iu", @@ -4409,52 +3975,10 @@ "commonvoice_hours": 222.0, "commonvoice_locale": "ja", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "model": 1.0 - }, - { - "bcp_47": "ja", - "speakers": 119729026, - "language_name": "Japanese", - "flores_path": "jpn_Jpan", - "fleurs_tag": "ja_jp", - "commonvoice_hours": 222.0, - "commonvoice_locale": "ja", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9339719005290334, - "model": 1.0 - }, - { - "bcp_47": "ja", - "speakers": 119729026, - "language_name": "Japanese", - "flores_path": "jpn_Jpan", - "fleurs_tag": "ja_jp", - "commonvoice_hours": 222.0, - "commonvoice_locale": "ja", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.26835607046224613, - "model": 1.0 - }, - { - "bcp_47": "ja", - "speakers": 119729026, - "language_name": "Japanese", - "flores_path": "jpn_Jpan", - "fleurs_tag": "ja_jp", - "commonvoice_hours": 222.0, - "commonvoice_locale": "ja", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4411714629040183, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "jam", @@ -4549,52 +4073,10 @@ "commonvoice_hours": 0.0, "commonvoice_locale": "jv", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "model": 1.0 - }, - { - "bcp_47": "jv", - "speakers": 91180665, - "language_name": "Javanese", - "flores_path": "jav_Latn", - "fleurs_tag": "jv_id", - "commonvoice_hours": 0.0, - "commonvoice_locale": "jv", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9274301130074318, - "model": 1.0 - }, - { - "bcp_47": "jv", - "speakers": 91180665, - "language_name": "Javanese", - "flores_path": "jav_Latn", - "fleurs_tag": "jv_id", - "commonvoice_hours": 0.0, - "commonvoice_locale": "jv", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.25461973194021953, - "model": 1.0 - }, - { - "bcp_47": "jv", - "speakers": 91180665, - "language_name": "Javanese", - "flores_path": "jav_Latn", - "fleurs_tag": "jv_id", - "commonvoice_hours": 0.0, - "commonvoice_locale": "jv", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4296209828775689, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "ka", @@ -5109,52 +4591,10 @@ "commonvoice_hours": 0.0, "commonvoice_locale": "kn", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "model": 1.0 - }, - { - "bcp_47": "kn", - "speakers": 49065330, - "language_name": "Kannada", - "flores_path": "kan_Knda", - "fleurs_tag": "kn_in", - "commonvoice_hours": 0.0, - "commonvoice_locale": "kn", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9422502494463877, - "model": 1.0 - }, - { - "bcp_47": "kn", - "speakers": 49065330, - "language_name": "Kannada", - "flores_path": "kan_Knda", - "fleurs_tag": "kn_in", - "commonvoice_hours": 0.0, - "commonvoice_locale": "kn", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.2550718176103704, - "model": 1.0 - }, - { - "bcp_47": "kn", - "speakers": 49065330, - "language_name": "Kannada", - "flores_path": "kan_Knda", - "fleurs_tag": "kn_in", - "commonvoice_hours": 0.0, - "commonvoice_locale": "kn", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.44555883949922764, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "knf", @@ -5179,52 +4619,10 @@ "commonvoice_hours": 1.7, "commonvoice_locale": "ko", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "model": 1.0 - }, - { - "bcp_47": "ko", - "speakers": 78357046, - "language_name": "Korean", - "flores_path": "kor_Hang", - "fleurs_tag": "ko_kr", - "commonvoice_hours": 1.7, - "commonvoice_locale": "ko", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9384158016197498, - "model": 1.0 - }, - { - "bcp_47": "ko", - "speakers": 78357046, - "language_name": "Korean", - "flores_path": "kor_Hang", - "fleurs_tag": "ko_kr", - "commonvoice_hours": 1.7, - "commonvoice_locale": "ko", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.21969579072372616, - "model": 1.0 - }, - { - "bcp_47": "ko", - "speakers": 78357046, - "language_name": "Korean", - "flores_path": "kor_Hang", - "fleurs_tag": "ko_kr", - "commonvoice_hours": 1.7, - "commonvoice_locale": "ko", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4134343535369622, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "koi", @@ -6481,52 +5879,10 @@ "commonvoice_hours": 2.8, "commonvoice_locale": "ml", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "model": 1.0 - }, - { - "bcp_47": "ml", - "speakers": 43257484, - "language_name": "Malayalam", - "flores_path": "mal_Mlym", - "fleurs_tag": "ml_in", - "commonvoice_hours": 2.8, - "commonvoice_locale": "ml", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9372865200487422, - "model": 1.0 - }, - { - "bcp_47": "ml", - "speakers": 43257484, - "language_name": "Malayalam", - "flores_path": "mal_Mlym", - "fleurs_tag": "ml_in", - "commonvoice_hours": 2.8, - "commonvoice_locale": "ml", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.20114751241051923, - "model": 1.0 - }, - { - "bcp_47": "ml", - "speakers": 43257484, - "language_name": "Malayalam", - "flores_path": "mal_Mlym", - "fleurs_tag": "ml_in", - "commonvoice_hours": 2.8, - "commonvoice_locale": "ml", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.39404171184956394, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "mls", @@ -6635,52 +5991,10 @@ "commonvoice_hours": 20.0, "commonvoice_locale": "mr", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "model": 1.0 - }, - { - "bcp_47": "mr", - "speakers": 92826300, - "language_name": "Marathi", - "flores_path": "mar_Deva", - "fleurs_tag": "mr_in", - "commonvoice_hours": 20.0, - "commonvoice_locale": "mr", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9493846554320581, - "model": 1.0 - }, - { - "bcp_47": "mr", - "speakers": 92826300, - "language_name": "Marathi", - "flores_path": "mar_Deva", - "fleurs_tag": "mr_in", - "commonvoice_hours": 20.0, - "commonvoice_locale": "mr", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.22832077978859452, - "model": 1.0 - }, - { - "bcp_47": "mr", - "speakers": 92826300, - "language_name": "Marathi", - "flores_path": "mar_Deva", - "fleurs_tag": "mr_in", - "commonvoice_hours": 20.0, - "commonvoice_locale": "mr", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4245203296342906, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "mrd", @@ -6733,52 +6047,10 @@ "commonvoice_hours": 0.0, "commonvoice_locale": "ms", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "model": 1.0 - }, - { - "bcp_47": "ms", - "speakers": 38097307, - "language_name": "Malay", - "flores_path": "zsm_Latn", - "fleurs_tag": "ms_my", - "commonvoice_hours": 0.0, - "commonvoice_locale": "ms", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9449541544914151, - "model": 1.0 - }, - { - "bcp_47": "ms", - "speakers": 38097307, - "language_name": "Malay", - "flores_path": "zsm_Latn", - "fleurs_tag": "ms_my", - "commonvoice_hours": 0.0, - "commonvoice_locale": "ms", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.26403874316694886, - "model": 1.0 - }, - { - "bcp_47": "ms", - "speakers": 38097307, - "language_name": "Malay", - "flores_path": "zsm_Latn", - "fleurs_tag": "ms_my", - "commonvoice_hours": 0.0, - "commonvoice_locale": "ms", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.45903065670305854, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "mt", @@ -6915,52 +6187,10 @@ "commonvoice_hours": 0.0, "commonvoice_locale": "my", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "model": 1.0 - }, - { - "bcp_47": "my", - "speakers": 36559231, - "language_name": "Burmese", - "flores_path": "mya_Mymr", - "fleurs_tag": "my_mm", - "commonvoice_hours": 0.0, - "commonvoice_locale": "my", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.918432369873729, - "model": 1.0 - }, - { - "bcp_47": "my", - "speakers": 36559231, - "language_name": "Burmese", - "flores_path": "mya_Mymr", - "fleurs_tag": "my_mm", - "commonvoice_hours": 0.0, - "commonvoice_locale": "my", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.18617334539824332, - "model": 1.0 - }, - { - "bcp_47": "my", - "speakers": 36559231, - "language_name": "Burmese", - "flores_path": "mya_Mymr", - "fleurs_tag": "my_mm", - "commonvoice_hours": 0.0, - "commonvoice_locale": "my", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.3705060280208132, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "myv", @@ -7573,52 +6803,10 @@ "commonvoice_hours": 0.0, "commonvoice_locale": "om", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "model": 1.0 - }, - { - "bcp_47": "om", - "speakers": 34897121, - "language_name": "Oromo", - "flores_path": "gaz_Latn", - "fleurs_tag": "om_et", - "commonvoice_hours": 0.0, - "commonvoice_locale": "om", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359301483225031, - "model": 1.0 - }, - { - "bcp_47": "om", - "speakers": 34897121, - "language_name": "Oromo", - "flores_path": "gaz_Latn", - "fleurs_tag": "om_et", - "commonvoice_hours": 0.0, - "commonvoice_locale": "om", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.06509147151730071, - "model": 1.0 - }, - { - "bcp_47": "om", - "speakers": 34897121, - "language_name": "Oromo", - "flores_path": "gaz_Latn", - "fleurs_tag": "om_et", - "commonvoice_hours": 0.0, - "commonvoice_locale": "om", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.22674886804446037, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "or", @@ -7629,52 +6817,10 @@ "commonvoice_hours": 2.8, "commonvoice_locale": "or", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "model": 1.0 - }, - { - "bcp_47": "or", - "speakers": 42434880, - "language_name": "Odia", - "flores_path": "ory_Orya", - "fleurs_tag": "or_in", - "commonvoice_hours": 2.8, - "commonvoice_locale": "or", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9211968327986088, - "model": 1.0 - }, - { - "bcp_47": "or", - "speakers": 42434880, - "language_name": "Odia", - "flores_path": "ory_Orya", - "fleurs_tag": "or_in", - "commonvoice_hours": 2.8, - "commonvoice_locale": "or", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.2521285657827072, - "model": 1.0 - }, - { - "bcp_47": "or", - "speakers": 42434880, - "language_name": "Odia", - "flores_path": "ory_Orya", - "fleurs_tag": "or_in", - "commonvoice_hours": 2.8, - "commonvoice_locale": "or", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4422326291663304, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "os", @@ -7715,8 +6861,8 @@ "in_benchmark": true, "task": "classification", "metric": "accuracy", - "score": 0.4666666666666667, - "model": 4.0 + "score": 0.4, + "model": 1.0 }, { "bcp_47": "pa", @@ -7729,8 +6875,8 @@ "in_benchmark": true, "task": "language_modeling", "metric": "chrf", - "score": 0.9013087100801337, - "model": 4.0 + "score": 0.8692673671947869, + "model": 1.0 }, { "bcp_47": "pa", @@ -7743,8 +6889,8 @@ "in_benchmark": true, "task": "translation", "metric": "bleu", - "score": 0.3099562778912564, - "model": 4.0 + "score": 0.3368333727390049, + "model": 1.0 }, { "bcp_47": "pa", @@ -7757,8 +6903,8 @@ "in_benchmark": true, "task": "translation", "metric": "chrf", - "score": 0.4604060151149361, - "model": 4.0 + "score": 0.4873541452250132, + "model": 1.0 }, { "bcp_47": "pag", @@ -7923,52 +7069,10 @@ "commonvoice_hours": 174.0, "commonvoice_locale": "pl", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "model": 1.0 - }, - { - "bcp_47": "pl", - "speakers": 41077399, - "language_name": "Polish", - "flores_path": "pol_Latn", - "fleurs_tag": "pl_pl", - "commonvoice_hours": 174.0, - "commonvoice_locale": "pl", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9603182772393537, - "model": 1.0 - }, - { - "bcp_47": "pl", - "speakers": 41077399, - "language_name": "Polish", - "flores_path": "pol_Latn", - "fleurs_tag": "pl_pl", - "commonvoice_hours": 174.0, - "commonvoice_locale": "pl", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.26987204535648013, - "model": 1.0 - }, - { - "bcp_47": "pl", - "speakers": 41077399, - "language_name": "Polish", - "flores_path": "pol_Latn", - "fleurs_tag": "pl_pl", - "commonvoice_hours": 174.0, - "commonvoice_locale": "pl", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.45624928163848544, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "pms", @@ -8079,8 +7183,8 @@ "in_benchmark": true, "task": "classification", "metric": "accuracy", - "score": 0.65, - "model": 4.0 + "score": 0.5, + "model": 1.0 }, { "bcp_47": "pt", @@ -8093,8 +7197,8 @@ "in_benchmark": true, "task": "language_modeling", "metric": "chrf", - "score": 0.9598364847166063, - "model": 4.0 + "score": 0.9723487188655754, + "model": 1.0 }, { "bcp_47": "pt", @@ -8107,8 +7211,8 @@ "in_benchmark": true, "task": "translation", "metric": "bleu", - "score": 0.3232823660730954, - "model": 4.0 + "score": 0.3110105331834714, + "model": 1.0 }, { "bcp_47": "pt", @@ -8121,8 +7225,8 @@ "in_benchmark": true, "task": "translation", "metric": "chrf", - "score": 0.4828937591427408, - "model": 4.0 + "score": 0.49172080600981716, + "model": 1.0 }, { "bcp_47": "puu", @@ -8469,52 +7573,10 @@ "commonvoice_hours": 242.0, "commonvoice_locale": "ru", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "model": 1.0 - }, - { - "bcp_47": "ru", - "speakers": 195841151, - "language_name": "Russian", - "flores_path": "rus_Cyrl", - "fleurs_tag": "ru_ru", - "commonvoice_hours": 242.0, - "commonvoice_locale": "ru", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9702818211253317, - "model": 1.0 - }, - { - "bcp_47": "ru", - "speakers": 195841151, - "language_name": "Russian", - "flores_path": "rus_Cyrl", - "fleurs_tag": "ru_ru", - "commonvoice_hours": 242.0, - "commonvoice_locale": "ru", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.21923866610511514, - "model": 1.0 - }, - { - "bcp_47": "ru", - "speakers": 195841151, - "language_name": "Russian", - "flores_path": "rus_Cyrl", - "fleurs_tag": "ru_ru", - "commonvoice_hours": 242.0, - "commonvoice_locale": "ru", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.41987611292148114, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "rue", @@ -8777,52 +7839,10 @@ "commonvoice_hours": 0.4, "commonvoice_locale": "sd", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.3333333333333333, - "model": 1.0 - }, - { - "bcp_47": "sd", - "speakers": 40329510, - "language_name": "Sindhi", - "flores_path": "snd_Arab", - "fleurs_tag": "sd_in", - "commonvoice_hours": 0.4, - "commonvoice_locale": "sd", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9075877951969867, - "model": 1.0 - }, - { - "bcp_47": "sd", - "speakers": 40329510, - "language_name": "Sindhi", - "flores_path": "snd_Arab", - "fleurs_tag": "sd_in", - "commonvoice_hours": 0.4, - "commonvoice_locale": "sd", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.21679684560539594, - "model": 1.0 - }, - { - "bcp_47": "sd", - "speakers": 40329510, - "language_name": "Sindhi", - "flores_path": "snd_Arab", - "fleurs_tag": "sd_in", - "commonvoice_hours": 0.4, - "commonvoice_locale": "sd", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4130326388570076, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "sdc", @@ -9407,52 +8427,10 @@ "commonvoice_hours": 411.0, "commonvoice_locale": "sw", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "model": 1.0 - }, - { - "bcp_47": "sw", - "speakers": 171610296, - "language_name": "Swahili", - "flores_path": "swh_Latn", - "fleurs_tag": "sw_ke", - "commonvoice_hours": 411.0, - "commonvoice_locale": "sw", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9363185104933659, - "model": 1.0 - }, - { - "bcp_47": "sw", - "speakers": 171610296, - "language_name": "Swahili", - "flores_path": "swh_Latn", - "fleurs_tag": "sw_ke", - "commonvoice_hours": 411.0, - "commonvoice_locale": "sw", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.2687578645492076, - "model": 1.0 - }, - { - "bcp_47": "sw", - "speakers": 171610296, - "language_name": "Swahili", - "flores_path": "swh_Latn", - "fleurs_tag": "sw_ke", - "commonvoice_hours": 411.0, - "commonvoice_locale": "sw", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.435973049682813, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "swb", @@ -9561,52 +8539,10 @@ "commonvoice_hours": 234.0, "commonvoice_locale": "ta", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "model": 1.0 - }, - { - "bcp_47": "ta", - "speakers": 85616159, - "language_name": "Tamil", - "flores_path": "tam_Taml", - "fleurs_tag": "ta_in", - "commonvoice_hours": 234.0, - "commonvoice_locale": "ta", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9604137478864351, - "model": 1.0 - }, - { - "bcp_47": "ta", - "speakers": 85616159, - "language_name": "Tamil", - "flores_path": "tam_Taml", - "fleurs_tag": "ta_in", - "commonvoice_hours": 234.0, - "commonvoice_locale": "ta", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.22755903113370943, - "model": 1.0 - }, - { - "bcp_47": "ta", - "speakers": 85616159, - "language_name": "Tamil", - "flores_path": "tam_Taml", - "fleurs_tag": "ta_in", - "commonvoice_hours": 234.0, - "commonvoice_locale": "ta", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.3968691612249629, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "taj", @@ -9701,52 +8637,10 @@ "commonvoice_hours": 0.3, "commonvoice_locale": "te", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.36666666666666664, - "model": 1.0 - }, - { - "bcp_47": "te", - "speakers": 95478480, - "language_name": "Telugu", - "flores_path": "tel_Telu", - "fleurs_tag": "te_in", - "commonvoice_hours": 0.3, - "commonvoice_locale": "te", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9017537718464212, - "model": 1.0 - }, - { - "bcp_47": "te", - "speakers": 95478480, - "language_name": "Telugu", - "flores_path": "tel_Telu", - "fleurs_tag": "te_in", - "commonvoice_hours": 0.3, - "commonvoice_locale": "te", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.2660789099709258, - "model": 1.0 - }, - { - "bcp_47": "te", - "speakers": 95478480, - "language_name": "Telugu", - "flores_path": "tel_Telu", - "fleurs_tag": "te_in", - "commonvoice_hours": 0.3, - "commonvoice_locale": "te", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4454927673606575, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "tem", @@ -9813,52 +8707,10 @@ "commonvoice_hours": 172.0, "commonvoice_locale": "th", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "model": 1.0 - }, - { - "bcp_47": "th", - "speakers": 55181920, - "language_name": "Thai", - "flores_path": "tha_Thai", - "fleurs_tag": "th_th", - "commonvoice_hours": 172.0, - "commonvoice_locale": "th", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9492189440417714, - "model": 1.0 - }, - { - "bcp_47": "th", - "speakers": 55181920, - "language_name": "Thai", - "flores_path": "tha_Thai", - "fleurs_tag": "th_th", - "commonvoice_hours": 172.0, - "commonvoice_locale": "th", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.22673808962220887, - "model": 1.0 - }, - { - "bcp_47": "th", - "speakers": 55181920, - "language_name": "Thai", - "flores_path": "tha_Thai", - "fleurs_tag": "th_th", - "commonvoice_hours": 172.0, - "commonvoice_locale": "th", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.41961494547318173, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "thl", @@ -9922,7 +8774,7 @@ "language_name": "Tigre", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 5.2, + "commonvoice_hours": 7.3, "commonvoice_locale": "tig", "in_benchmark": false, "task": null, @@ -10107,52 +8959,10 @@ "commonvoice_hours": 128.0, "commonvoice_locale": "tr", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "model": 1.0 - }, - { - "bcp_47": "tr", - "speakers": 80360704, - "language_name": "Turkish", - "flores_path": "tur_Latn", - "fleurs_tag": "tr_tr", - "commonvoice_hours": 128.0, - "commonvoice_locale": "tr", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9329665937492929, - "model": 1.0 - }, - { - "bcp_47": "tr", - "speakers": 80360704, - "language_name": "Turkish", - "flores_path": "tur_Latn", - "fleurs_tag": "tr_tr", - "commonvoice_hours": 128.0, - "commonvoice_locale": "tr", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.28856187360515456, - "model": 1.0 - }, - { - "bcp_47": "tr", - "speakers": 80360704, - "language_name": "Turkish", - "flores_path": "tur_Latn", - "fleurs_tag": "tr_tr", - "commonvoice_hours": 128.0, - "commonvoice_locale": "tr", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4483834465978942, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "tru", @@ -10174,7 +8984,7 @@ "language_name": "Taroko", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 8.9, + "commonvoice_hours": 10.0, "commonvoice_locale": "trv", "in_benchmark": false, "task": null, @@ -10501,8 +9311,8 @@ "in_benchmark": true, "task": "classification", "metric": "accuracy", - "score": 0.6166666666666667, - "model": 4.0 + "score": 0.5, + "model": 1.0 }, { "bcp_47": "ur", @@ -10515,8 +9325,8 @@ "in_benchmark": true, "task": "language_modeling", "metric": "chrf", - "score": 0.9375650463557482, - "model": 4.0 + "score": 0.9345902038028003, + "model": 1.0 }, { "bcp_47": "ur", @@ -10529,8 +9339,8 @@ "in_benchmark": true, "task": "translation", "metric": "bleu", - "score": 0.24342394573959453, - "model": 4.0 + "score": 0.2433585957791825, + "model": 1.0 }, { "bcp_47": "ur", @@ -10543,49 +9353,7 @@ "in_benchmark": true, "task": "translation", "metric": "chrf", - "score": 0.4181802717137555, - "model": 4.0 - }, - { - "bcp_47": "uz", - "speakers": 32792780, - "language_name": "Uzbek", - "flores_path": "uzn_Latn", - "fleurs_tag": "uz_uz", - "commonvoice_hours": 100.0, - "commonvoice_locale": "uz", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5, - "model": 1.0 - }, - { - "bcp_47": "uz", - "speakers": 32792780, - "language_name": "Uzbek", - "flores_path": "uzn_Latn", - "fleurs_tag": "uz_uz", - "commonvoice_hours": 100.0, - "commonvoice_locale": "uz", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9448611053734804, - "model": 1.0 - }, - { - "bcp_47": "uz", - "speakers": 32792780, - "language_name": "Uzbek", - "flores_path": "uzn_Latn", - "fleurs_tag": "uz_uz", - "commonvoice_hours": 100.0, - "commonvoice_locale": "uz", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.21766825893077738, + "score": 0.42196914378741973, "model": 1.0 }, { @@ -10597,10 +9365,10 @@ "commonvoice_hours": 100.0, "commonvoice_locale": "uz", "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4195087994775591, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "vai", @@ -10667,66 +9435,24 @@ "commonvoice_hours": 5.9, "commonvoice_locale": "vi", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { - "bcp_47": "vi", - "speakers": 86222962, - "language_name": "Vietnamese", - "flores_path": "vie_Latn", - "fleurs_tag": "vi_vn", - "commonvoice_hours": 5.9, - "commonvoice_locale": "vi", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9478434905295018, - "model": 1.0 - }, - { - "bcp_47": "vi", - "speakers": 86222962, - "language_name": "Vietnamese", - "flores_path": "vie_Latn", - "fleurs_tag": "vi_vn", - "commonvoice_hours": 5.9, - "commonvoice_locale": "vi", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.23107984716515415, - "model": 1.0 - }, - { - "bcp_47": "vi", - "speakers": 86222962, - "language_name": "Vietnamese", - "flores_path": "vie_Latn", - "fleurs_tag": "vi_vn", - "commonvoice_hours": 5.9, - "commonvoice_locale": "vi", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4169594776564998, - "model": 1.0 - }, - { - "bcp_47": "vic", - "speakers": 3113, - "language_name": "Virgin Islands Creole English", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null + "bcp_47": "vic", + "speakers": 3113, + "language_name": "Virgin Islands Creole English", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false, + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "vls", @@ -11185,52 +9911,10 @@ "commonvoice_hours": 203.0, "commonvoice_locale": "yue", "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "model": 1.0 - }, - { - "bcp_47": "yue", - "speakers": 79654759, - "language_name": "Cantonese", - "flores_path": "yue_Hant", - "fleurs_tag": "yue_hant_hk", - "commonvoice_hours": 203.0, - "commonvoice_locale": "yue", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.8955168718505389, - "model": 1.0 - }, - { - "bcp_47": "yue", - "speakers": 79654759, - "language_name": "Cantonese", - "flores_path": "yue_Hant", - "fleurs_tag": "yue_hant_hk", - "commonvoice_hours": 203.0, - "commonvoice_locale": "yue", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.24293503135220604, - "model": 1.0 - }, - { - "bcp_47": "yue", - "speakers": 79654759, - "language_name": "Cantonese", - "flores_path": "yue_Hant", - "fleurs_tag": "yue_hant_hk", - "commonvoice_hours": 203.0, - "commonvoice_locale": "yue", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.43687518387422897, - "model": 1.0 + "task": null, + "metric": null, + "score": null, + "model": null }, { "bcp_47": "za", @@ -11313,8 +9997,8 @@ "in_benchmark": true, "task": "classification", "metric": "accuracy", - "score": 0.6416666666666666, - "model": 4.0 + "score": 0.6466666666666667, + "model": 5.0 }, { "bcp_47": "zh", @@ -11327,8 +10011,8 @@ "in_benchmark": true, "task": "language_modeling", "metric": "chrf", - "score": 0.9270229139550292, - "model": 4.0 + "score": 0.93253470927813, + "model": 5.0 }, { "bcp_47": "zh", @@ -11341,8 +10025,8 @@ "in_benchmark": true, "task": "translation", "metric": "bleu", - "score": 0.3835031558957527, - "model": 4.0 + "score": 0.37546421356438325, + "model": 5.0 }, { "bcp_47": "zh", @@ -11355,8 +10039,8 @@ "in_benchmark": true, "task": "translation", "metric": "chrf", - "score": 0.557217638850705, - "model": 4.0 + "score": 0.553563744610482, + "model": 5.0 }, { "bcp_47": "zmi", @@ -11404,423 +10088,423 @@ "scores": [ { "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0.9, + "score": 0.8666666666666667, "sentence_nr": 14.5 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", + "bcp_47": "en", "task": "language_modeling", "metric": "chrf", - "score": 0.9662394255427802, + "score": 0.9775327885540744, "sentence_nr": 14.5 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.33684899445707545, + "score": 0.4939697152970565, "sentence_nr": 14.5 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5132383488065385, + "score": 0.6050713247311065, "sentence_nr": 14.5 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0.9, + "score": 0.8666666666666667, "sentence_nr": 14.5 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", + "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", - "score": 0.9157239945539242, + "score": 0.9474992128891527, "sentence_nr": 14.5 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.2946937518705462, + "score": 0.41074275566059204, "sentence_nr": 14.5 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.47901362996280916, + "score": 0.5606266861920302, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0.8666666666666667, + "score": 0.7, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", - "score": 0.9800307455880838, + "score": 0.9904802851977837, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.5005636231739998, + "score": 0.4234349336981381, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.6083608181074219, + "score": 0.5544956505144546, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0.8666666666666667, + "score": 0.7333333333333333, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", - "score": 0.9398094019787611, + "score": 0.9231083500990617, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.341382728522014, + "score": 0.32628982579803617, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.5025141961861047, + "score": 0.520771580386218, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 0.8666666666666667, + "score": 0.43333333333333335, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", - "score": 0.9883846649306203, + "score": 0.9307208363594199, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.340773340614899, + "score": 0.2837250166554738, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.5123861936301051, + "score": 0.4684314458952127, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 0.8666666666666667, + "score": 0.4, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", - "score": 0.9603938002806427, + "score": 0.8990036074617607, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.34270244014502865, + "score": 0.20360260890869705, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.5241272552859136, + "score": 0.4076175886917154, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0.8666666666666667, + "score": 0.6333333333333333, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "language_modeling", "metric": "chrf", - "score": 0.9170681035031951, + "score": 0.9708732257334283, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.37789354848061457, + "score": 0.4082092315494641, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5344045974435819, + "score": 0.5358317196308513, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 0.8, + "score": 0.5333333333333333, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "language_modeling", "metric": "chrf", - "score": 0.9780371913896582, + "score": 0.9602736376353148, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.3794780275173, + "score": 0.283646935447629, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.5349002453164619, + "score": 0.46056393670415496, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0.9, + "score": 0.5333333333333333, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", - "score": 0.9555863990700229, + "score": 0.9756346055321089, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.31728190361318, + "score": 0.3193235920661593, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.4881878357149611, + "score": 0.4875691290722964, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0.8666666666666667, + "score": 0.5333333333333333, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", - "score": 0.9539892816788119, + "score": 0.9489245079595486, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.41028031844473206, + "score": 0.31956422674397006, "sentence_nr": 14.5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.5607632287491137, + "score": 0.4896277852320754, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0.36666666666666664, + "score": 0.4, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", + "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", - "score": 0.9031129205404201, + "score": 0.8692673671947869, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.15002522598066087, + "score": 0.3368333727390049, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.3344555209113584, + "score": 0.4873541452250132, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0.4666666666666667, + "score": 0.5, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", - "score": 0.9551956628433433, + "score": 0.9723487188655754, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.2811587891139184, + "score": 0.3110105331834714, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.46109033782928127, + "score": 0.49172080600981716, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0.5666666666666667, + "score": 0.5, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", - "score": 0.9319821809429131, + "score": 0.9345902038028003, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.20127691717583826, + "score": 0.2433585957791825, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.38780198426317497, + "score": 0.42196914378741973, "sentence_nr": 14.5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0.5333333333333333, @@ -11828,1530 +10512,58 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9439975775415789, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.18364881639775618, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.3791669348856303, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0.3333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9334228689163548, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.24092898437545654, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.41894143077328727, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0.36666666666666664, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8847379468653113, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.23230854865261913, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.42063939652271853, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0.6666666666666666, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9788487181545229, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.31823711841849855, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.49174647549269207, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.5, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9685562893433212, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4203985756610074, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5300571003447904, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.961354451644757, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.26818936209357325, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.4507327709042758, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9396649061437635, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.2516753344674677, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4448545956789697, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9553160038255677, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.3248927726984041, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4689020729383555, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9659586250799681, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.32618133837885355, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.49526445590376544, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9421488818900206, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.24812610549809738, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.4318359636701651, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "language_modeling", - "metric": "chrf", - "score": 0.945704338611724, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.14767816277169443, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.34353973347368816, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9521896068400092, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.30694468553453214, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.4717417673162039, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9441891794331471, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.28485630651027877, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4561447909933971, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.96965822717479, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.27748101044407486, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.46396483435604213, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9339719005290334, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.26835607046224613, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.4411714629040183, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9274301130074318, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.25461973194021953, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4296209828775689, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9422502494463877, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.2550718176103704, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.44555883949922764, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9384158016197498, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.21969579072372616, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.4134343535369622, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9372865200487422, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.20114751241051923, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.39404171184956394, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9493846554320581, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.22832077978859452, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.4245203296342906, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9449541544914151, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.26403874316694886, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.45903065670305854, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "language_modeling", - "metric": "chrf", - "score": 0.918432369873729, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.18617334539824332, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.3705060280208132, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359301483225031, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.06509147151730071, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.22674886804446037, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9211968327986088, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.2521285657827072, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.4422326291663304, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8971009455771796, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.3260979917168239, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.48620285493698817, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9603182772393537, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.26987204535648013, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.45624928163848544, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9510604972420862, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2806919135940658, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.45314727718184383, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9702818211253317, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.21923866610511514, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.41987611292148114, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0.3333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9075877951969867, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.21679684560539594, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.4130326388570076, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9363185104933659, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.2687578645492076, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.435973049682813, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9604137478864351, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.22755903113370943, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.3968691612249629, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0.36666666666666664, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9017537718464212, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.2660789099709258, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4454927673606575, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9492189440417714, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.22673808962220887, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.41961494547318173, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9329665937492929, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.28856187360515456, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4483834465978942, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9128780231729426, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.21847309277555946, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.40360616182971726, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0.5, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9448611053734804, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.21766825893077738, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.4195087994775591, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9478434905295018, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.23107984716515415, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4169594776564998, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8955168718505389, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.24293503135220604, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.43687518387422897, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9131870034415596, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.39048110791786933, - "sentence_nr": 14.5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5665464429871689, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0.7, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9318859252084998, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2068354433755052, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.38571943918088597, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8721745881216986, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.17813156872400582, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.34058339171576163, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7598862193678689, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.39023213290403214, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5013108104875811, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9466716556334129, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.27947534892496323, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.43006530569343165, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0.7666666666666667, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9558889194048766, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.2750376678830284, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.441500491331595, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0.5, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.890720660152386, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.21739461408317115, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.359020024214477, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0.3, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8922615537670865, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.21871159296535658, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.35241753637848183, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "pt", + "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", - "score": 0.9492522967960337, + "score": 0.9410655280186742, "sentence_nr": 14.5 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.30258414492830743, + "score": 0.407349841474714, "sentence_nr": 14.5 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.4573560946306326, + "score": 0.5862284100611604, "sentence_nr": 14.5 }, { "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "ur", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0.6666666666666666, + "score": 0.4666666666666667, "sentence_nr": 14.5 }, { "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "ur", + "bcp_47": "en", "task": "language_modeling", "metric": "chrf", - "score": 0.9289246379251568, + "score": 0.7898342718049297, "sentence_nr": 14.5 }, { "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.18142367436048182, + "score": 0.39829687791881524, "sentence_nr": 14.5 }, { "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.34598666510470966, + "score": 0.5018233196301349, "sentence_nr": 14.5 }, { @@ -13359,7 +10571,7 @@ "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0.6666666666666666, + "score": 0.6333333333333333, "sentence_nr": 14.5 }, { @@ -13367,7 +10579,7 @@ "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", - "score": 0.9188930778144581, + "score": 0.921617170849841, "sentence_nr": 14.5 }, { @@ -13375,7 +10587,7 @@ "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.3631466653543813, + "score": 0.3584454346550059, "sentence_nr": 14.5 }, { @@ -13383,71 +10595,7 @@ "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.5520065979059031, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9489111124535548, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.26593690487060095, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.46048583874582116, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0.3333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899742833244405, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.20957267610789626, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3814341573965288, + "score": 0.5407815810671934, "sentence_nr": 14.5 }, { @@ -13463,7 +10611,7 @@ "bcp_47": "en", "task": "language_modeling", "metric": "chrf", - "score": 0.9816685337276303, + "score": 0.9671581799852296, "sentence_nr": 14.5 }, { @@ -13471,7 +10619,7 @@ "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.40476391184425864, + "score": 0.41353083493324033, "sentence_nr": 14.5 }, { @@ -13479,199 +10627,7 @@ "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5350771248567329, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.976774826379902, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.30479108723403314, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.4664322514490113, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0.6666666666666666, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9771465942743917, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3330934985862475, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.48898780285384186, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0.5, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9371251285777896, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.3034809934459214, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.4682857313818616, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0.3, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8988042374730737, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.3171219784022306, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.46859907170069254, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9609959534386474, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.33037537825270835, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.48617141944202497, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9528711252548706, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.2565171122091569, - "sentence_nr": 14.5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.43494042420563395, + "score": 0.5310916105215437, "sentence_nr": 14.5 }, { @@ -13687,7 +10643,7 @@ "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", - "score": 0.9220222928852874, + "score": 0.92938328453392, "sentence_nr": 14.5 }, { @@ -13695,7 +10651,7 @@ "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.37010453186602793, + "score": 0.37449321023356824, "sentence_nr": 14.5 }, { @@ -13703,7 +10659,7 @@ "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.5495542857606343, + "score": 0.559410465345808, "sentence_nr": 14.5 } ]